1 | /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. |
2 | Copyright (c) 2008, 2015, MariaDB |
3 | |
4 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published by |
6 | the Free Software Foundation; version 2 of the License. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
16 | |
17 | /* |
18 | TODO: |
19 | Fix that MAYBE_KEY are stored in the tree so that we can detect use |
20 | of full hash keys for queries like: |
21 | |
22 | select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205); |
23 | |
24 | */ |
25 | |
26 | /* |
27 | This file contains: |
28 | |
29 | RangeAnalysisModule |
30 | A module that accepts a condition, index (or partitioning) description, |
31 | and builds lists of intervals (in index/partitioning space), such that |
32 | all possible records that match the condition are contained within the |
33 | intervals. |
34 | The entry point for the range analysis module is get_mm_tree() function. |
35 | |
36 | The lists are returned in form of complicated structure of interlinked |
37 | SEL_TREE/SEL_IMERGE/SEL_ARG objects. |
38 | See quick_range_seq_next, find_used_partitions for examples of how to walk |
39 | this structure. |
40 | All direct "users" of this module are located within this file, too. |
41 | |
42 | |
43 | PartitionPruningModule |
44 | A module that accepts a partitioned table, condition, and finds which |
45 | partitions we will need to use in query execution. Search down for |
46 | "PartitionPruningModule" for description. |
47 | The module has single entry point - prune_partitions() function. |
48 | |
49 | |
50 | Range/index_merge/groupby-minmax optimizer module |
51 | A module that accepts a table, condition, and returns |
52 | - a QUICK_*_SELECT object that can be used to retrieve rows that match |
53 | the specified condition, or a "no records will match the condition" |
54 | statement. |
55 | |
56 | The module entry points are |
57 | test_quick_select() |
58 | get_quick_select_for_ref() |
59 | |
60 | |
61 | Record retrieval code for range/index_merge/groupby-min-max. |
62 | Implementations of QUICK_*_SELECT classes. |
63 | |
64 | KeyTupleFormat |
65 | ~~~~~~~~~~~~~~ |
66 | The code in this file (and elsewhere) makes operations on key value tuples. |
67 | Those tuples are stored in the following format: |
68 | |
69 | The tuple is a sequence of key part values. The length of key part value |
70 | depends only on its type (and not depends on the what value is stored) |
71 | |
72 | KeyTuple: keypart1-data, keypart2-data, ... |
73 | |
74 | The value of each keypart is stored in the following format: |
75 | |
76 | keypart_data: [isnull_byte] keypart-value-bytes |
77 | |
78 | If a keypart may have a NULL value (key_part->field->real_maybe_null() can |
79 | be used to check this), then the first byte is a NULL indicator with the |
80 | following valid values: |
81 | 1 - keypart has NULL value. |
82 | 0 - keypart has non-NULL value. |
83 | |
84 | <questionable-statement> If isnull_byte==1 (NULL value), then the following |
85 | keypart->length bytes must be 0. |
86 | </questionable-statement> |
87 | |
88 | keypart-value-bytes holds the value. Its format depends on the field type. |
89 | The length of keypart-value-bytes may or may not depend on the value being |
90 | stored. The default is that length is static and equal to |
91 | KEY_PART_INFO::length. |
92 | |
93 | Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the |
94 | value: |
95 | |
96 | keypart-value-bytes: value_length value_bytes |
97 | |
98 | The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes. |
99 | |
100 | See key_copy() and key_restore() for code to move data between index tuple |
101 | and table record |
102 | |
103 | CAUTION: the above description is only sergefp's understanding of the |
104 | subject and may omit some details. |
105 | */ |
106 | |
107 | #ifdef USE_PRAGMA_IMPLEMENTATION |
108 | #pragma implementation // gcc: Class implementation |
109 | #endif |
110 | |
111 | #include "mariadb.h" |
112 | #include "sql_priv.h" |
113 | #include "key.h" // is_key_used, key_copy, key_cmp, key_restore |
114 | #include "sql_parse.h" // check_stack_overrun |
115 | #include "sql_partition.h" // get_part_id_func, PARTITION_ITERATOR, |
116 | // struct partition_info, NOT_A_PARTITION_ID |
117 | #include "records.h" // init_read_record, end_read_record |
118 | #include <m_ctype.h> |
119 | #include "sql_select.h" |
120 | #include "sql_statistics.h" |
121 | #include "uniques.h" |
122 | |
123 | #ifndef EXTRA_DEBUG |
124 | #define test_rb_tree(A,B) {} |
125 | #define test_use_count(A) {} |
126 | #endif |
127 | |
128 | /* |
129 | Convert double value to #rows. Currently this does floor(), and we |
130 | might consider using round() instead. |
131 | */ |
132 | #define double2rows(x) ((ha_rows)(x)) |
133 | |
134 | /* |
135 | this should be long enough so that any memcmp with a string that |
136 | starts from '\0' won't cross is_null_string boundaries, even |
137 | if the memcmp is optimized to compare 4- 8- or 16- bytes at once |
138 | */ |
139 | static uchar is_null_string[20]= {1,0}; |
140 | |
141 | /** |
142 | Helper function to compare two SEL_ARG's. |
143 | */ |
144 | static bool all_same(const SEL_ARG *sa1, const SEL_ARG *sa2) |
145 | { |
146 | if (sa1 == NULL && sa2 == NULL) |
147 | return true; |
148 | if ((sa1 != NULL && sa2 == NULL) || (sa1 == NULL && sa2 != NULL)) |
149 | return false; |
150 | return sa1->all_same(sa2); |
151 | } |
152 | |
153 | class SEL_IMERGE; |
154 | |
155 | #define CLONE_KEY1_MAYBE 1 |
156 | #define CLONE_KEY2_MAYBE 2 |
157 | #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1) |
158 | |
159 | |
160 | /* |
161 | While objects of the class SEL_ARG represent ranges for indexes or |
162 | index infixes (including ranges for index prefixes and index suffixes), |
163 | objects of the class SEL_TREE represent AND/OR formulas of such ranges. |
164 | Currently an AND/OR formula represented by a SEL_TREE object can have |
165 | at most three levels: |
166 | |
167 | <SEL_TREE formula> ::= |
168 | [ <SEL_RANGE_TREE formula> AND ] |
169 | [ <SEL_IMERGE formula> [ AND <SEL_IMERGE formula> ...] ] |
170 | |
171 | <SEL_RANGE_TREE formula> ::= |
172 | <SEL_ARG formula> [ AND <SEL_ARG_formula> ... ] |
173 | |
174 | <SEL_IMERGE formula> ::= |
175 | <SEL_RANGE_TREE formula> [ OR <SEL_RANGE_TREE formula> ] |
176 | |
177 | As we can see from the above definitions: |
178 | - SEL_RANGE_TREE formula is a conjunction of SEL_ARG formulas |
179 | - SEL_IMERGE formula is a disjunction of SEL_RANGE_TREE formulas |
180 | - SEL_TREE formula is a conjunction of a SEL_RANGE_TREE formula |
181 | and SEL_IMERGE formulas. |
182 | It's required above that a SEL_TREE formula has at least one conjunct. |
183 | |
184 | Usually we will consider normalized SEL_RANGE_TREE formulas where we use |
185 | TRUE as conjunct members for those indexes whose SEL_ARG trees are empty. |
186 | |
187 | We will call an SEL_TREE object simply 'tree'. |
188 | The part of a tree that represents SEL_RANGE_TREE formula is called |
189 | 'range part' of the tree while the remaining part is called 'imerge part'. |
190 | If a tree contains only a range part then we call such a tree 'range tree'. |
191 | Components of a range tree that represent SEL_ARG formulas are called ranges. |
192 | If a tree does not contain any range part we call such a tree 'imerge tree'. |
193 | Components of the imerge part of a tree that represent SEL_IMERGE formula |
194 | are called imerges. |
195 | |
196 | Usually we'll designate: |
197 | SEL_TREE formulas by T_1,...,T_k |
198 | SEL_ARG formulas by R_1,...,R_k |
199 | SEL_RANGE_TREE formulas by RT_1,...,RT_k |
200 | SEL_IMERGE formulas by M_1,...,M_k |
201 | Accordingly we'll use: |
202 | t_1,...,t_k - to designate trees representing T_1,...,T_k |
203 | r_1,...,r_k - to designate ranges representing R_1,...,R_k |
204 | rt_1,...,r_tk - to designate range trees representing RT_1,...,RT_k |
205 | m_1,...,m_k - to designate imerges representing M_1,...,M_k |
206 | |
207 | SEL_TREE objects are usually built from WHERE conditions or |
208 | ON expressions. |
209 | A SEL_TREE object always represents an inference of the condition it is |
210 | built from. Therefore, if a row satisfies a SEL_TREE formula it also |
211 | satisfies the condition it is built from. |
212 | |
213 | The following transformations of tree t representing SEL_TREE formula T |
214 | yield a new tree t1 thar represents an inference of T: T=>T1. |
215 | (1) remove any of SEL_ARG tree from the range part of t |
216 | (2) remove any imerge from the tree t |
217 | (3) remove any of SEL_ARG tree from any range tree contained |
218 | in any imerge of tree |
219 | |
220 | Since the basic blocks of any SEL_TREE objects are ranges, SEL_TREE |
221 | objects in many cases can be effectively used to filter out a big part |
222 | of table rows that do not satisfy WHERE/IN conditions utilizing |
223 | only single or multiple range index scans. |
224 | |
225 | A single range index scan is constructed for a range tree that contains |
226 | only one SEL_ARG object for an index or an index prefix. |
227 | An index intersection scan can be constructed for a range tree |
228 | that contains several SEL_ARG objects. Currently index intersection |
229 | scans are constructed only for single-point ranges. |
230 | An index merge scan is constructed for a imerge tree that contains only |
231 | one imerge. If range trees of this imerge contain only single-point merges |
232 | than a union of index intersections can be built. |
233 | |
234 | Usually the tree built by the range optimizer for a query table contains |
235 | more than one range in the range part, and additionally may contain some |
236 | imerges in the imerge part. The range optimizer evaluates all of them one |
237 | by one and chooses the range or the imerge that provides the cheapest |
238 | single or multiple range index scan of the table. According to rules |
239 | (1)-(3) this scan always filter out only those rows that do not satisfy |
240 | the query conditions. |
241 | |
242 | For any condition the SEL_TREE object for it is built in a bottom up |
243 | manner starting from the range trees for the predicates. The tree_and |
244 | function builds a tree for any conjunction of formulas from the trees |
245 | for its conjuncts. The tree_or function builds a tree for any disjunction |
246 | of formulas from the trees for its disjuncts. |
247 | */ |
248 | |
249 | class SEL_TREE :public Sql_alloc |
250 | { |
251 | public: |
252 | /* |
253 | Starting an effort to document this field: |
254 | (for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) => |
255 | (type == SEL_TREE::IMPOSSIBLE) |
256 | */ |
257 | enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type; |
258 | |
259 | SEL_TREE(enum Type type_arg, MEM_ROOT *root, size_t num_keys) |
260 | : type(type_arg), keys(root, num_keys), n_ror_scans(0) |
261 | { |
262 | keys_map.clear_all(); |
263 | } |
264 | |
265 | SEL_TREE(MEM_ROOT *root, size_t num_keys) : |
266 | type(KEY), keys(root, num_keys), n_ror_scans(0) |
267 | { |
268 | keys_map.clear_all(); |
269 | } |
270 | |
271 | SEL_TREE(SEL_TREE *arg, bool without_merges, RANGE_OPT_PARAM *param); |
272 | /* |
273 | Note: there may exist SEL_TREE objects with sel_tree->type=KEY and |
274 | keys[i]=0 for all i. (SergeyP: it is not clear whether there is any |
275 | merit in range analyzer functions (e.g. get_mm_parts) returning a |
276 | pointer to such SEL_TREE instead of NULL) |
277 | */ |
278 | Mem_root_array<SEL_ARG *, true> keys; |
279 | |
280 | key_map keys_map; /* bitmask of non-NULL elements in keys */ |
281 | |
282 | /* |
283 | Possible ways to read rows using index_merge. The list is non-empty only |
284 | if type==KEY. Currently can be non empty only if keys_map.is_clear_all(). |
285 | */ |
286 | List<SEL_IMERGE> merges; |
287 | |
288 | /* The members below are filled/used only after get_mm_tree is done */ |
289 | key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */ |
290 | uint n_ror_scans; /* number of set bits in ror_scans_map */ |
291 | |
292 | struct st_index_scan_info **index_scans; /* list of index scans */ |
293 | struct st_index_scan_info **index_scans_end; /* last index scan */ |
294 | |
295 | struct st_ror_scan_info **ror_scans; /* list of ROR key scans */ |
296 | struct st_ror_scan_info **ror_scans_end; /* last ROR scan */ |
297 | /* Note that #records for each key scan is stored in table->quick_rows */ |
298 | |
299 | bool without_ranges() { return keys_map.is_clear_all(); } |
300 | bool without_imerges() { return merges.is_empty(); } |
301 | }; |
302 | |
303 | |
304 | class PARAM : public RANGE_OPT_PARAM |
305 | { |
306 | public: |
307 | ha_rows quick_rows[MAX_KEY]; |
308 | |
309 | /* |
310 | This will collect 'possible keys' based on the range optimization. |
311 | |
312 | Queries with a JOIN object actually use ref optimizer (see add_key_field) |
313 | to collect possible_keys. This is used by single table UPDATE/DELETE. |
314 | */ |
315 | key_map possible_keys; |
316 | longlong baseflag; |
317 | uint max_key_part, range_count; |
318 | |
319 | bool quick; // Don't calulate possible keys |
320 | |
321 | uint fields_bitmap_size; |
322 | MY_BITMAP needed_fields; /* bitmask of fields needed by the query */ |
323 | MY_BITMAP tmp_covered_fields; |
324 | |
325 | key_map *needed_reg; /* ptr to SQL_SELECT::needed_reg */ |
326 | |
327 | uint *imerge_cost_buff; /* buffer for index_merge cost estimates */ |
328 | uint imerge_cost_buff_size; /* size of the buffer */ |
329 | |
330 | /* TRUE if last checked tree->key can be used for ROR-scan */ |
331 | bool is_ror_scan; |
332 | /* Number of ranges in the last checked tree->key */ |
333 | uint n_ranges; |
334 | uint8 first_null_comp; /* first null component if any, 0 - otherwise */ |
335 | }; |
336 | |
337 | |
338 | class TABLE_READ_PLAN; |
339 | class TRP_RANGE; |
340 | class TRP_ROR_INTERSECT; |
341 | class TRP_ROR_UNION; |
342 | class TRP_INDEX_INTERSECT; |
343 | class TRP_INDEX_MERGE; |
344 | class TRP_GROUP_MIN_MAX; |
345 | |
346 | struct st_index_scan_info; |
347 | struct st_ror_scan_info; |
348 | |
349 | static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); |
350 | static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, |
351 | SEL_ARG *tree, bool update_tbl_stats, |
352 | uint *mrr_flags, uint *bufsize, |
353 | Cost_estimate *cost); |
354 | |
355 | QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index, |
356 | SEL_ARG *key_tree, uint mrr_flags, |
357 | uint mrr_buf_size, MEM_ROOT *alloc); |
358 | static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, |
359 | bool index_read_must_be_used, |
360 | bool update_tbl_stats, |
361 | double read_time); |
362 | static |
363 | TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree, |
364 | double read_time); |
365 | static |
366 | TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, |
367 | double read_time, |
368 | bool *are_all_covering); |
369 | static |
370 | TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param, |
371 | SEL_TREE *tree, |
372 | double read_time); |
373 | static |
374 | TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, |
375 | double read_time); |
376 | static |
377 | TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge, |
378 | TRP_INDEX_MERGE *imerge_trp, |
379 | double read_time); |
380 | static |
381 | TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree, |
382 | double read_time); |
383 | |
384 | #ifndef DBUG_OFF |
385 | static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, |
386 | const char *msg); |
387 | static void print_ror_scans_arr(TABLE *table, const char *msg, |
388 | struct st_ror_scan_info **start, |
389 | struct st_ror_scan_info **end); |
390 | static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg); |
391 | #endif |
392 | |
393 | static SEL_TREE *tree_and(RANGE_OPT_PARAM *param, |
394 | SEL_TREE *tree1, SEL_TREE *tree2); |
395 | static SEL_TREE *tree_or(RANGE_OPT_PARAM *param, |
396 | SEL_TREE *tree1,SEL_TREE *tree2); |
397 | static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2); |
398 | static SEL_ARG *key_or(RANGE_OPT_PARAM *param, |
399 | SEL_ARG *key1, SEL_ARG *key2); |
400 | static SEL_ARG *key_and(RANGE_OPT_PARAM *param, |
401 | SEL_ARG *key1, SEL_ARG *key2, |
402 | uint clone_flag); |
403 | static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1); |
404 | bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key, |
405 | SEL_ARG *key_tree, uchar *min_key,uint min_key_flag, |
406 | uchar *max_key,uint max_key_flag); |
407 | static bool eq_tree(SEL_ARG* a,SEL_ARG *b); |
408 | |
409 | static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE); |
410 | static bool null_part_in_key(KEY_PART *key_part, const uchar *key, |
411 | uint length); |
412 | static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); |
413 | |
414 | #include "opt_range_mrr.cc" |
415 | |
416 | static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2, |
417 | key_map *common_keys); |
418 | static void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param, |
419 | SEL_TREE *tree); |
420 | |
421 | static bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param, |
422 | SEL_TREE *tree1, SEL_TREE *tree2, |
423 | key_map *common_keys); |
424 | static bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param, |
425 | SEL_TREE *tree1, SEL_TREE *tree2, |
426 | key_map common_keys); |
427 | static int and_range_trees(RANGE_OPT_PARAM *param, |
428 | SEL_TREE *tree1, SEL_TREE *tree2, |
429 | SEL_TREE *result); |
430 | static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree); |
431 | |
432 | |
433 | /* |
434 | SEL_IMERGE is a list of possible ways to do index merge, i.e. it is |
435 | a condition in the following form: |
436 | (t_1||t_2||...||t_N) && (next) |
437 | |
438 | where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair |
439 | (t_i,t_j) contains SEL_ARGS for the same index. |
440 | |
441 | SEL_TREE contained in SEL_IMERGE always has merges=NULL. |
442 | |
443 | This class relies on memory manager to do the cleanup. |
444 | */ |
445 | |
446 | class SEL_IMERGE : public Sql_alloc |
447 | { |
448 | enum { PREALLOCED_TREES= 10}; |
449 | public: |
450 | SEL_TREE *trees_prealloced[PREALLOCED_TREES]; |
451 | SEL_TREE **trees; /* trees used to do index_merge */ |
452 | SEL_TREE **trees_next; /* last of these trees */ |
453 | SEL_TREE **trees_end; /* end of allocated space */ |
454 | |
455 | SEL_ARG ***best_keys; /* best keys to read in SEL_TREEs */ |
456 | |
457 | SEL_IMERGE() : |
458 | trees(&trees_prealloced[0]), |
459 | trees_next(trees), |
460 | trees_end(trees + PREALLOCED_TREES) |
461 | {} |
462 | SEL_IMERGE (SEL_IMERGE *arg, uint cnt, RANGE_OPT_PARAM *param); |
463 | int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree); |
464 | bool have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree); |
465 | int and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree, |
466 | SEL_IMERGE *new_imerge); |
467 | int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, |
468 | uint n_init_trees, |
469 | SEL_TREE *new_tree, |
470 | bool is_first_check_pass, |
471 | bool *is_last_check_pass); |
472 | int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, |
473 | uint n_init_trees, |
474 | SEL_IMERGE* imerge, |
475 | bool is_first_check_pass, |
476 | bool *is_last_check_pass); |
477 | }; |
478 | |
479 | |
480 | /* |
481 | Add a range tree to the range trees of this imerge |
482 | |
483 | SYNOPSIS |
484 | or_sel_tree() |
485 | param Context info for the operation |
486 | tree SEL_TREE to add to this imerge |
487 | |
488 | DESCRIPTION |
489 | The function just adds the range tree 'tree' to the range trees |
490 | of this imerge. |
491 | |
492 | RETURN |
493 | 0 if the operation is success |
494 | -1 if the function runs out memory |
495 | */ |
496 | |
497 | int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree) |
498 | { |
499 | if (trees_next == trees_end) |
500 | { |
501 | const int realloc_ratio= 2; /* Double size for next round */ |
502 | size_t old_elements= (trees_end - trees); |
503 | size_t old_size= sizeof(SEL_TREE**) * old_elements; |
504 | size_t new_size= old_size * realloc_ratio; |
505 | SEL_TREE **new_trees; |
506 | if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size))) |
507 | return -1; |
508 | memcpy(new_trees, trees, old_size); |
509 | trees= new_trees; |
510 | trees_next= trees + old_elements; |
511 | trees_end= trees + old_elements * realloc_ratio; |
512 | } |
513 | *(trees_next++)= tree; |
514 | return 0; |
515 | } |
516 | |
517 | |
518 | /* |
519 | Check if any of the range trees of this imerge intersects with a given tree |
520 | |
521 | SYNOPSIS |
522 | have_common_keys() |
523 | param Context info for the function |
524 | tree SEL_TREE intersection with the imerge range trees is checked for |
525 | |
526 | DESCRIPTION |
527 | The function checks whether there is any range tree rt_i in this imerge |
528 | such that there are some indexes for which ranges are defined in both |
529 | rt_i and the range part of the SEL_TREE tree. |
530 | To check this the function calls the function sel_trees_have_common_keys. |
531 | |
532 | RETURN |
533 | TRUE if there are such range trees in this imerge |
534 | FALSE otherwise |
535 | */ |
536 | |
537 | bool SEL_IMERGE::have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree) |
538 | { |
539 | for (SEL_TREE** or_tree= trees, **bound= trees_next; |
540 | or_tree != bound; or_tree++) |
541 | { |
542 | key_map common_keys; |
543 | if (sel_trees_have_common_keys(*or_tree, tree, &common_keys)) |
544 | return TRUE; |
545 | } |
546 | return FALSE; |
547 | } |
548 | |
549 | |
550 | /* |
551 | Perform AND operation for this imerge and the range part of a tree |
552 | |
553 | SYNOPSIS |
554 | and_sel_tree() |
555 | param Context info for the operation |
556 | tree SEL_TREE for the second operand of the operation |
557 | new_imerge OUT imerge for the result of the operation |
558 | |
559 | DESCRIPTION |
560 | This function performs AND operation for this imerge m and the |
561 | range part of the SEL_TREE tree rt. In other words the function |
562 | pushes rt into this imerge. The resulting imerge is returned in |
563 | the parameter new_imerge. |
564 | If this imerge m represent the formula |
565 | RT_1 OR ... OR RT_k |
566 | then the resulting imerge of the function represents the formula |
567 | (RT_1 AND RT) OR ... OR (RT_k AND RT) |
568 | The function calls the function and_range_trees to construct the |
569 | range tree representing (RT_i AND RT). |
570 | |
571 | NOTE |
572 | The function may return an empty imerge without any range trees. |
573 | This happens when each call of and_range_trees returns an |
574 | impossible range tree (SEL_TREE::IMPOSSIBLE). |
575 | Example: (key1 < 2 AND key2 > 10) AND (key1 > 4 OR key2 < 6). |
576 | |
577 | RETURN |
578 | 0 if the operation is a success |
579 | -1 otherwise: there is not enough memory to perform the operation |
580 | */ |
581 | |
582 | int SEL_IMERGE::and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree, |
583 | SEL_IMERGE *new_imerge) |
584 | { |
585 | for (SEL_TREE** or_tree= trees; or_tree != trees_next; or_tree++) |
586 | { |
587 | SEL_TREE *res_or_tree= 0; |
588 | SEL_TREE *and_tree= 0; |
589 | if (!(res_or_tree= new SEL_TREE(param->mem_root, param->keys)) || |
590 | !(and_tree= new SEL_TREE(tree, TRUE, param))) |
591 | return (-1); |
592 | if (!and_range_trees(param, *or_tree, and_tree, res_or_tree)) |
593 | { |
594 | if (new_imerge->or_sel_tree(param, res_or_tree)) |
595 | return (-1); |
596 | } |
597 | } |
598 | return 0; |
599 | } |
600 | |
601 | |
602 | /* |
603 | Perform OR operation on this imerge and the range part of a tree |
604 | |
605 | SYNOPSIS |
606 | or_sel_tree_with_checks() |
607 | param Context info for the operation |
608 | n_trees Number of trees in this imerge to check for oring |
609 | tree SEL_TREE whose range part is to be ored |
610 | is_first_check_pass <=> the first call of the function for this imerge |
611 | is_last_check_pass OUT <=> no more calls of the function for this imerge |
612 | |
613 | DESCRIPTION |
614 | The function performs OR operation on this imerge m and the range part |
615 | of the SEL_TREE tree rt. It always replaces this imerge with the result |
616 | of the operation. |
617 | |
618 | The operation can be performed in two different modes: with |
619 | is_first_check_pass==TRUE and is_first_check_pass==FALSE, transforming |
620 | this imerge differently. |
621 | |
622 | Given this imerge represents the formula |
623 | RT_1 OR ... OR RT_k: |
624 | |
625 | 1. In the first mode, when is_first_check_pass==TRUE : |
626 | 1.1. If rt must be ored(see the function sel_trees_must_be_ored) with |
627 | some rt_j (there may be only one such range tree in the imerge) |
628 | then the function produces an imerge representing the formula |
629 | RT_1 OR ... OR (RT_j OR RT) OR ... OR RT_k, |
630 | where the tree for (RT_j OR RT) is built by oring the pairs |
631 | of SEL_ARG trees for the corresponding indexes |
632 | 1.2. Otherwise the function produces the imerge representing the formula: |
633 | RT_1 OR ... OR RT_k OR RT. |
634 | |
635 | 2. In the second mode, when is_first_check_pass==FALSE : |
636 | 2.1. For each rt_j in the imerge that can be ored (see the function |
637 | sel_trees_can_be_ored) with rt the function replaces rt_j for a |
638 | range tree such that for each index for which ranges are defined |
639 | in both in rt_j and rt the tree contains the result of oring of |
640 | these ranges. |
641 | 2.2. In other cases the function does not produce any imerge. |
642 | |
643 | When is_first_check==TRUE the function returns FALSE in the parameter |
644 | is_last_check_pass if there is no rt_j such that rt_j can be ored with rt, |
645 | but, at the same time, it's not true that rt_j must be ored with rt. |
646 | When is_first_check==FALSE the function always returns FALSE in the |
647 | parameter is_last_check_pass. |
648 | |
649 | RETURN |
650 | 1 The result of oring of rt_j and rt that must be ored returns the |
651 | the range tree with type==SEL_TREE::ALWAYS |
652 | (in this case the imerge m should be discarded) |
653 | -1 The function runs out of memory |
654 | 0 in all other cases |
655 | */ |
656 | |
657 | int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, |
658 | uint n_trees, |
659 | SEL_TREE *tree, |
660 | bool is_first_check_pass, |
661 | bool *is_last_check_pass) |
662 | { |
663 | bool was_ored= FALSE; |
664 | *is_last_check_pass= is_first_check_pass; |
665 | SEL_TREE** or_tree = trees; |
666 | for (uint i= 0; i < n_trees; i++, or_tree++) |
667 | { |
668 | SEL_TREE *result= 0; |
669 | key_map result_keys; |
670 | key_map ored_keys; |
671 | if (sel_trees_can_be_ored(param, *or_tree, tree, &ored_keys)) |
672 | { |
673 | bool must_be_ored= sel_trees_must_be_ored(param, *or_tree, tree, |
674 | ored_keys); |
675 | if (must_be_ored || !is_first_check_pass) |
676 | { |
677 | result_keys.clear_all(); |
678 | result= *or_tree; |
679 | for (uint key_no= 0; key_no < param->keys; key_no++) |
680 | { |
681 | if (!ored_keys.is_set(key_no)) |
682 | { |
683 | result->keys[key_no]= 0; |
684 | continue; |
685 | } |
686 | SEL_ARG *key1= (*or_tree)->keys[key_no]; |
687 | SEL_ARG *key2= tree->keys[key_no]; |
688 | key2->incr_refs(); |
689 | if ((result->keys[key_no]= key_or(param, key1, key2))) |
690 | { |
691 | |
692 | result_keys.set_bit(key_no); |
693 | #ifdef EXTRA_DEBUG |
694 | if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS) |
695 | { |
696 | key1= result->keys[key_no]; |
697 | (key1)->test_use_count(key1); |
698 | } |
699 | #endif |
700 | } |
701 | } |
702 | } |
703 | else if(is_first_check_pass) |
704 | *is_last_check_pass= FALSE; |
705 | } |
706 | |
707 | if (result) |
708 | { |
709 | result->keys_map= result_keys; |
710 | if (result_keys.is_clear_all()) |
711 | result->type= SEL_TREE::ALWAYS; |
712 | if ((result->type == SEL_TREE::MAYBE) || |
713 | (result->type == SEL_TREE::ALWAYS)) |
714 | return 1; |
715 | /* SEL_TREE::IMPOSSIBLE is impossible here */ |
716 | *or_tree= result; |
717 | was_ored= TRUE; |
718 | } |
719 | } |
720 | if (was_ored) |
721 | return 0; |
722 | |
723 | if (is_first_check_pass && !*is_last_check_pass && |
724 | !(tree= new SEL_TREE(tree, FALSE, param))) |
725 | return (-1); |
726 | return or_sel_tree(param, tree); |
727 | } |
728 | |
729 | |
730 | /* |
731 | Perform OR operation on this imerge and and another imerge |
732 | |
733 | SYNOPSIS |
734 | or_sel_imerge_with_checks() |
735 | param Context info for the operation |
736 | n_trees Number of trees in this imerge to check for oring |
737 | imerge The second operand of the operation |
738 | is_first_check_pass <=> the first call of the function for this imerge |
739 | is_last_check_pass OUT <=> no more calls of the function for this imerge |
740 | |
741 | DESCRIPTION |
742 | For each range tree rt from 'imerge' the function calls the method |
743 | SEL_IMERGE::or_sel_tree_with_checks that performs OR operation on this |
744 | SEL_IMERGE object m and the tree rt. The mode of the operation is |
745 | specified by the parameter is_first_check_pass. Each call of |
746 | SEL_IMERGE::or_sel_tree_with_checks transforms this SEL_IMERGE object m. |
747 | The function returns FALSE in the prameter is_last_check_pass if |
748 | at least one of the calls of SEL_IMERGE::or_sel_tree_with_checks |
749 | returns FALSE as the value of its last parameter. |
750 | |
751 | RETURN |
752 | 1 One of the calls of SEL_IMERGE::or_sel_tree_with_checks returns 1. |
753 | (in this case the imerge m should be discarded) |
754 | -1 The function runs out of memory |
755 | 0 in all other cases |
756 | */ |
757 | |
758 | int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, |
759 | uint n_trees, |
760 | SEL_IMERGE* imerge, |
761 | bool is_first_check_pass, |
762 | bool *is_last_check_pass) |
763 | { |
764 | *is_last_check_pass= TRUE; |
765 | SEL_TREE** tree= imerge->trees; |
766 | SEL_TREE** tree_end= imerge->trees_next; |
767 | for ( ; tree < tree_end; tree++) |
768 | { |
769 | uint rc; |
770 | bool is_last= TRUE; |
771 | rc= or_sel_tree_with_checks(param, n_trees, *tree, |
772 | is_first_check_pass, &is_last); |
773 | if (!is_last) |
774 | *is_last_check_pass= FALSE; |
775 | if (rc) |
776 | return rc; |
777 | } |
778 | return 0; |
779 | } |
780 | |
781 | |
782 | /* |
783 | Copy constructor for SEL_TREE objects |
784 | |
785 | SYNOPSIS |
786 | SEL_TREE |
787 | arg The source tree for the constructor |
788 | without_merges <=> only the range part of the tree arg is copied |
789 | param Context info for the operation |
790 | |
791 | DESCRIPTION |
792 | The constructor creates a full copy of the SEL_TREE arg if |
793 | the prameter without_merges==FALSE. Otherwise a tree is created |
794 | that contains the copy only of the range part of the tree arg. |
795 | */ |
796 | |
797 | SEL_TREE::SEL_TREE(SEL_TREE *arg, bool without_merges, |
798 | RANGE_OPT_PARAM *param) |
799 | : Sql_alloc(), |
800 | keys(param->mem_root, param->keys), |
801 | n_ror_scans(0) |
802 | { |
803 | keys_map= arg->keys_map; |
804 | type= arg->type; |
805 | MEM_ROOT *mem_root; |
806 | |
807 | for (uint idx= 0; idx < param->keys; idx++) |
808 | { |
809 | if ((keys[idx]= arg->keys[idx])) |
810 | keys[idx]->incr_refs_all(); |
811 | } |
812 | |
813 | if (without_merges) |
814 | return; |
815 | |
816 | mem_root= current_thd->mem_root; |
817 | List_iterator<SEL_IMERGE> it(arg->merges); |
818 | for (SEL_IMERGE *el= it++; el; el= it++) |
819 | { |
820 | SEL_IMERGE *merge= new (mem_root) SEL_IMERGE(el, 0, param); |
821 | if (!merge || merge->trees == merge->trees_next) |
822 | { |
823 | merges.empty(); |
824 | return; |
825 | } |
826 | merges.push_back(merge, mem_root); |
827 | } |
828 | } |
829 | |
830 | |
831 | /* |
832 | Copy constructor for SEL_IMERGE objects |
833 | |
834 | SYNOPSIS |
835 | SEL_IMERGE |
836 | arg The source imerge for the constructor |
837 | cnt How many trees from arg are to be copied |
838 | param Context info for the operation |
839 | |
840 | DESCRIPTION |
841 | The cnt==0 then the constructor creates a full copy of the |
842 | imerge arg. Otherwise only the first cnt trees of the imerge |
843 | are copied. |
844 | */ |
845 | |
846 | SEL_IMERGE::SEL_IMERGE(SEL_IMERGE *arg, uint cnt, |
847 | RANGE_OPT_PARAM *param) : Sql_alloc() |
848 | { |
849 | size_t elements= (arg->trees_end - arg->trees); |
850 | if (elements > PREALLOCED_TREES) |
851 | { |
852 | size_t size= elements * sizeof (SEL_TREE **); |
853 | if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size))) |
854 | goto mem_err; |
855 | } |
856 | else |
857 | trees= &trees_prealloced[0]; |
858 | |
859 | trees_next= trees + (cnt ? cnt : arg->trees_next-arg->trees); |
860 | trees_end= trees + elements; |
861 | |
862 | for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_next; |
863 | tree++, arg_tree++) |
864 | { |
865 | if (!(*tree= new SEL_TREE(*arg_tree, TRUE, param))) |
866 | goto mem_err; |
867 | } |
868 | |
869 | return; |
870 | |
871 | mem_err: |
872 | trees= &trees_prealloced[0]; |
873 | trees_next= trees; |
874 | trees_end= trees; |
875 | } |
876 | |
877 | |
878 | /* |
879 | Perform AND operation on two imerge lists |
880 | |
881 | SYNOPSIS |
882 | imerge_list_and_list() |
883 | param Context info for the operation |
884 | im1 The first imerge list for the operation |
885 | im2 The second imerge list for the operation |
886 | |
887 | DESCRIPTION |
888 | The function just appends the imerge list im2 to the imerge list im1 |
889 | |
890 | RETURN VALUE |
891 | none |
892 | */ |
893 | |
894 | inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) |
895 | { |
896 | im1->append(im2); |
897 | } |
898 | |
899 | |
900 | /* |
901 | Perform OR operation on two imerge lists |
902 | |
903 | SYNOPSIS |
904 | imerge_list_or_list() |
905 | param Context info for the operation |
906 | im1 The first imerge list for the operation |
907 | im2 The second imerge list for the operation |
908 | |
909 | DESCRIPTION |
910 | Assuming that the first imerge list represents the formula |
911 | F1= M1_1 AND ... AND M1_k1 |
912 | while the second imerge list represents the formula |
913 | F2= M2_1 AND ... AND M2_k2, |
914 | where M1_i= RT1_i_1 OR ... OR RT1_i_l1i (i in [1..k1]) |
915 | and M2_i = RT2_i_1 OR ... OR RT2_i_l2i (i in [1..k2]), |
916 | the function builds a list of imerges for some formula that can be |
917 | inferred from the formula (F1 OR F2). |
918 | |
919 | More exactly the function builds imerges for the formula (M1_1 OR M2_1). |
920 | Note that |
921 | (F1 OR F2) = (M1_1 AND ... AND M1_k1) OR (M2_1 AND ... AND M2_k2) = |
922 | AND (M1_i OR M2_j) (i in [1..k1], j in [1..k2]) => |
923 | M1_1 OR M2_1. |
924 | So (M1_1 OR M2_1) is indeed an inference formula for (F1 OR F2). |
925 | |
926 | To build imerges for the formula (M1_1 OR M2_1) the function invokes, |
927 | possibly twice, the method SEL_IMERGE::or_sel_imerge_with_checks |
928 | for the imerge m1_1. |
929 | At its first invocation the method SEL_IMERGE::or_sel_imerge_with_checks |
930 | performs OR operation on the imerge m1_1 and the range tree rt2_1_1 by |
931 | calling SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==TRUE. |
932 | The resulting imerge of the operation is ored with the next range tree of |
933 | the imerge m2_1. This oring continues until the last range tree from |
934 | m2_1 has been ored. |
935 | At its second invocation the method SEL_IMERGE::or_sel_imerge_with_checks |
936 | performs the same sequence of OR operations, but now calling |
937 | SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==FALSE. |
938 | |
939 | The imerges that the operation produces replace those in the list im1 |
940 | |
941 | RETURN |
942 | 0 if the operation is a success |
943 | -1 if the function has run out of memory |
944 | */ |
945 | |
946 | int imerge_list_or_list(RANGE_OPT_PARAM *param, |
947 | List<SEL_IMERGE> *im1, |
948 | List<SEL_IMERGE> *im2) |
949 | { |
950 | |
951 | uint rc; |
952 | bool is_last_check_pass= FALSE; |
953 | SEL_IMERGE *imerge= im1->head(); |
954 | uint elems= (uint)(imerge->trees_next-imerge->trees); |
955 | MEM_ROOT *mem_root= current_thd->mem_root; |
956 | |
957 | im1->empty(); |
958 | im1->push_back(imerge, mem_root); |
959 | |
960 | rc= imerge->or_sel_imerge_with_checks(param, elems, im2->head(), |
961 | TRUE, &is_last_check_pass); |
962 | if (rc) |
963 | { |
964 | if (rc == 1) |
965 | { |
966 | im1->empty(); |
967 | rc= 0; |
968 | } |
969 | return rc; |
970 | } |
971 | |
972 | if (!is_last_check_pass) |
973 | { |
974 | SEL_IMERGE* new_imerge= new (mem_root) SEL_IMERGE(imerge, elems, param); |
975 | if (new_imerge) |
976 | { |
977 | is_last_check_pass= TRUE; |
978 | rc= new_imerge->or_sel_imerge_with_checks(param, elems, im2->head(), |
979 | FALSE, &is_last_check_pass); |
980 | if (!rc) |
981 | im1->push_back(new_imerge, mem_root); |
982 | } |
983 | } |
984 | return rc; |
985 | } |
986 | |
987 | |
988 | /* |
989 | Perform OR operation for each imerge from a list and the range part of a tree |
990 | |
991 | SYNOPSIS |
992 | imerge_list_or_tree() |
993 | param Context info for the operation |
994 | merges The list of imerges to be ored with the range part of tree |
995 | tree SEL_TREE whose range part is to be ored with the imerges |
996 | |
997 | DESCRIPTION |
998 | For each imerge mi from the list 'merges' the function performes OR |
999 | operation with mi and the range part of 'tree' rt, producing one or |
1000 | two imerges. |
1001 | |
1002 | Given the merge mi represent the formula RTi_1 OR ... OR RTi_k, |
1003 | the function forms the merges by the following rules: |
1004 | |
1005 | 1. If rt cannot be ored with any of the trees rti the function just |
1006 | produces an imerge that represents the formula |
1007 | RTi_1 OR ... RTi_k OR RT. |
1008 | 2. If there exist a tree rtj that must be ored with rt the function |
1009 | produces an imerge the represents the formula |
1010 | RTi_1 OR ... OR (RTi_j OR RT) OR ... OR RTi_k, |
1011 | where the range tree for (RTi_j OR RT) is constructed by oring the |
1012 | SEL_ARG trees that must be ored. |
1013 | 3. For each rti_j that can be ored with rt the function produces |
1014 | the new tree rti_j' and substitutes rti_j for this new range tree. |
1015 | |
1016 | In any case the function removes mi from the list and then adds all |
1017 | produced imerges. |
1018 | |
1019 | To build imerges by rules 1-3 the function calls the method |
1020 | SEL_IMERGE::or_sel_tree_with_checks, possibly twice. With the first |
1021 | call it passes TRUE for the third parameter of the function. |
1022 | At this first call imerges by rules 1-2 are built. If the call |
1023 | returns FALSE as the return value of its fourth parameter then the |
1024 | function are called for the second time. At this call the imerge |
1025 | of rule 3 is produced. |
1026 | |
1027 | If a call of SEL_IMERGE::or_sel_tree_with_checks returns 1 then |
1028 | then it means that the produced tree contains an always true |
1029 | range tree and the whole imerge can be discarded. |
1030 | |
1031 | RETURN |
1032 | 1 if no imerges are produced |
1033 | 0 otherwise |
1034 | */ |
1035 | |
1036 | static |
1037 | int imerge_list_or_tree(RANGE_OPT_PARAM *param, |
1038 | List<SEL_IMERGE> *merges, |
1039 | SEL_TREE *tree) |
1040 | { |
1041 | SEL_IMERGE *imerge; |
1042 | List<SEL_IMERGE> additional_merges; |
1043 | List_iterator<SEL_IMERGE> it(*merges); |
1044 | MEM_ROOT *mem_root= current_thd->mem_root; |
1045 | |
1046 | while ((imerge= it++)) |
1047 | { |
1048 | bool is_last_check_pass; |
1049 | int rc= 0; |
1050 | int rc1= 0; |
1051 | SEL_TREE *or_tree= new (mem_root) SEL_TREE (tree, FALSE, param); |
1052 | if (or_tree) |
1053 | { |
1054 | uint elems= (uint)(imerge->trees_next-imerge->trees); |
1055 | rc= imerge->or_sel_tree_with_checks(param, elems, or_tree, |
1056 | TRUE, &is_last_check_pass); |
1057 | if (!is_last_check_pass) |
1058 | { |
1059 | SEL_IMERGE *new_imerge= new (mem_root) SEL_IMERGE(imerge, elems, |
1060 | param); |
1061 | if (new_imerge) |
1062 | { |
1063 | rc1= new_imerge->or_sel_tree_with_checks(param, elems, or_tree, |
1064 | FALSE, &is_last_check_pass); |
1065 | if (!rc1) |
1066 | additional_merges.push_back(new_imerge, mem_root); |
1067 | } |
1068 | } |
1069 | } |
1070 | if (rc || rc1 || !or_tree) |
1071 | it.remove(); |
1072 | } |
1073 | |
1074 | merges->append(&additional_merges); |
1075 | return merges->is_empty(); |
1076 | } |
1077 | |
1078 | |
1079 | /* |
1080 | Perform pushdown operation of the range part of a tree into given imerges |
1081 | |
1082 | SYNOPSIS |
1083 | imerge_list_and_tree() |
1084 | param Context info for the operation |
1085 | merges IN/OUT List of imerges to push the range part of 'tree' into |
1086 | tree SEL_TREE whose range part is to be pushed into imerges |
1087 | replace if the pushdow operation for a imerge is a success |
1088 | then the original imerge is replaced for the result |
1089 | of the pushdown |
1090 | |
1091 | DESCRIPTION |
1092 | For each imerge from the list merges the function pushes the range part |
1093 | rt of 'tree' into the imerge. |
1094 | More exactly if the imerge mi from the list represents the formula |
1095 | RTi_1 OR ... OR RTi_k |
1096 | the function bulds a new imerge that represents the formula |
1097 | (RTi_1 AND RT) OR ... OR (RTi_k AND RT) |
1098 | and adds this imerge to the list merges. |
1099 | To perform this pushdown operation the function calls the method |
1100 | SEL_IMERGE::and_sel_tree. |
1101 | For any imerge mi the new imerge is not created if for each pair of |
1102 | trees rti_j and rt the intersection of the indexes with defined ranges |
1103 | is empty. |
1104 | If the result of the pushdown operation for the imerge mi returns an |
1105 | imerge with no trees then then not only nothing is added to the list |
1106 | merges but mi itself is removed from the list. |
1107 | |
1108 | TODO |
1109 | Optimize the code in order to not create new SEL_IMERGE and new SER_TREE |
1110 | objects when 'replace' is TRUE. (Currently this function is called always |
1111 | with this parameter equal to TRUE.) |
1112 | |
1113 | RETURN |
1114 | 1 if no imerges are left in the list merges |
1115 | 0 otherwise |
1116 | */ |
1117 | |
1118 | static |
1119 | int imerge_list_and_tree(RANGE_OPT_PARAM *param, |
1120 | List<SEL_IMERGE> *merges, |
1121 | SEL_TREE *tree, |
1122 | bool replace) |
1123 | { |
1124 | SEL_IMERGE *imerge; |
1125 | SEL_IMERGE *new_imerge= NULL; |
1126 | List<SEL_IMERGE> new_merges; |
1127 | List_iterator<SEL_IMERGE> it(*merges); |
1128 | MEM_ROOT *mem_root= current_thd->mem_root; |
1129 | |
1130 | while ((imerge= it++)) |
1131 | { |
1132 | if (!new_imerge) |
1133 | new_imerge= new (mem_root) SEL_IMERGE(); |
1134 | if (imerge->have_common_keys(param, tree) && |
1135 | new_imerge && !imerge->and_sel_tree(param, tree, new_imerge)) |
1136 | { |
1137 | if (new_imerge->trees == new_imerge->trees_next) |
1138 | it.remove(); |
1139 | else |
1140 | { |
1141 | if (replace) |
1142 | it.replace(new_imerge); |
1143 | else |
1144 | new_merges.push_back(new_imerge, mem_root); |
1145 | new_imerge= NULL; |
1146 | } |
1147 | } |
1148 | } |
1149 | imerge_list_and_list(&new_merges, merges); |
1150 | *merges= new_merges; |
1151 | return merges->is_empty(); |
1152 | } |
1153 | |
1154 | |
1155 | /*************************************************************************** |
1156 | ** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT |
1157 | ***************************************************************************/ |
1158 | |
1159 | /* make a select from mysql info |
1160 | Error is set as following: |
1161 | 0 = ok |
1162 | 1 = Got some error (out of memory?) |
1163 | */ |
1164 | |
1165 | SQL_SELECT *make_select(TABLE *head, table_map const_tables, |
1166 | table_map read_tables, COND *conds, |
1167 | SORT_INFO *filesort, |
1168 | bool allow_null_cond, |
1169 | int *error) |
1170 | { |
1171 | SQL_SELECT *select; |
1172 | DBUG_ENTER("make_select" ); |
1173 | |
1174 | *error=0; |
1175 | |
1176 | if (!conds && !allow_null_cond) |
1177 | DBUG_RETURN(0); |
1178 | if (!(select= new (head->in_use->mem_root) SQL_SELECT)) |
1179 | { |
1180 | *error= 1; // out of memory |
1181 | DBUG_RETURN(0); /* purecov: inspected */ |
1182 | } |
1183 | select->read_tables=read_tables; |
1184 | select->const_tables=const_tables; |
1185 | select->head=head; |
1186 | select->cond= conds; |
1187 | |
1188 | if (filesort && my_b_inited(&filesort->io_cache)) |
1189 | { |
1190 | /* |
1191 | Hijack the filesort io_cache for make_select |
1192 | SQL_SELECT will be responsible for ensuring that it's properly freed. |
1193 | */ |
1194 | select->file= filesort->io_cache; |
1195 | select->records=(ha_rows) (select->file.end_of_file/ |
1196 | head->file->ref_length); |
1197 | my_b_clear(&filesort->io_cache); |
1198 | } |
1199 | DBUG_RETURN(select); |
1200 | } |
1201 | |
1202 | |
1203 | SQL_SELECT::SQL_SELECT() :quick(0),cond(0),pre_idx_push_select_cond(NULL),free_cond(0) |
1204 | { |
1205 | quick_keys.clear_all(); needed_reg.clear_all(); |
1206 | my_b_clear(&file); |
1207 | } |
1208 | |
1209 | |
1210 | void SQL_SELECT::cleanup() |
1211 | { |
1212 | delete quick; |
1213 | quick= 0; |
1214 | if (free_cond) |
1215 | { |
1216 | free_cond=0; |
1217 | delete cond; |
1218 | cond= 0; |
1219 | } |
1220 | close_cached_file(&file); |
1221 | } |
1222 | |
1223 | |
1224 | SQL_SELECT::~SQL_SELECT() |
1225 | { |
1226 | cleanup(); |
1227 | } |
1228 | |
1229 | #undef index // Fix for Unixware 7 |
1230 | |
1231 | QUICK_SELECT_I::QUICK_SELECT_I() |
1232 | :max_used_key_length(0), |
1233 | used_key_parts(0) |
1234 | {} |
1235 | |
1236 | QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr, |
1237 | bool no_alloc, MEM_ROOT *parent_alloc, |
1238 | bool *create_error) |
1239 | :thd(thd), no_alloc(no_alloc), parent_alloc(parent_alloc), |
1240 | free_file(0),cur_range(NULL),last_range(0),dont_free(0) |
1241 | { |
1242 | my_bitmap_map *bitmap; |
1243 | DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT" ); |
1244 | |
1245 | in_ror_merged_scan= 0; |
1246 | index= key_nr; |
1247 | head= table; |
1248 | key_part_info= head->key_info[index].key_part; |
1249 | |
1250 | /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */ |
1251 | mrr_buf_size= thd->variables.mrr_buff_size; |
1252 | mrr_buf_desc= NULL; |
1253 | |
1254 | if (!no_alloc && !parent_alloc) |
1255 | { |
1256 | // Allocates everything through the internal memroot |
1257 | init_sql_alloc(&alloc, "QUICK_RANGE_SELECT" , |
1258 | thd->variables.range_alloc_block_size, 0, |
1259 | MYF(MY_THREAD_SPECIFIC)); |
1260 | thd->mem_root= &alloc; |
1261 | } |
1262 | else |
1263 | bzero((char*) &alloc,sizeof(alloc)); |
1264 | file= head->file; |
1265 | record= head->record[0]; |
1266 | |
1267 | my_init_dynamic_array2(&ranges, sizeof(QUICK_RANGE*), |
1268 | thd->alloc(sizeof(QUICK_RANGE*) * 16), 16, 16, |
1269 | MYF(MY_THREAD_SPECIFIC)); |
1270 | |
1271 | /* Allocate a bitmap for used columns */ |
1272 | if (!(bitmap= (my_bitmap_map*) thd->alloc(head->s->column_bitmap_size))) |
1273 | { |
1274 | column_bitmap.bitmap= 0; |
1275 | *create_error= 1; |
1276 | } |
1277 | else |
1278 | my_bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE); |
1279 | DBUG_VOID_RETURN; |
1280 | } |
1281 | |
1282 | |
1283 | void QUICK_RANGE_SELECT::need_sorted_output() |
1284 | { |
1285 | if (!(mrr_flags & HA_MRR_SORTED)) |
1286 | { |
1287 | /* |
1288 | Native implementation can't produce sorted output. We'll have to |
1289 | switch to default |
1290 | */ |
1291 | mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; |
1292 | } |
1293 | mrr_flags |= HA_MRR_SORTED; |
1294 | } |
1295 | |
1296 | |
1297 | int QUICK_RANGE_SELECT::init() |
1298 | { |
1299 | DBUG_ENTER("QUICK_RANGE_SELECT::init" ); |
1300 | |
1301 | if (file->inited != handler::NONE) |
1302 | file->ha_index_or_rnd_end(); |
1303 | DBUG_RETURN(FALSE); |
1304 | } |
1305 | |
1306 | |
1307 | void QUICK_RANGE_SELECT::range_end() |
1308 | { |
1309 | if (file->inited != handler::NONE) |
1310 | file->ha_index_or_rnd_end(); |
1311 | } |
1312 | |
1313 | |
1314 | QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT() |
1315 | { |
1316 | DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT" ); |
1317 | if (!dont_free) |
1318 | { |
1319 | /* file is NULL for CPK scan on covering ROR-intersection */ |
1320 | if (file) |
1321 | { |
1322 | range_end(); |
1323 | file->ha_end_keyread(); |
1324 | if (free_file) |
1325 | { |
1326 | DBUG_PRINT("info" , ("Freeing separate handler %p (free: %d)" , file, |
1327 | free_file)); |
1328 | file->ha_external_lock(current_thd, F_UNLCK); |
1329 | file->ha_close(); |
1330 | delete file; |
1331 | } |
1332 | } |
1333 | delete_dynamic(&ranges); /* ranges are allocated in alloc */ |
1334 | free_root(&alloc,MYF(0)); |
1335 | } |
1336 | my_free(mrr_buf_desc); |
1337 | DBUG_VOID_RETURN; |
1338 | } |
1339 | |
1340 | /* |
1341 | QUICK_INDEX_SORT_SELECT works as follows: |
1342 | - Do index scans, accumulate rowids in the Unique object |
1343 | (Unique will also sort and de-duplicate rowids) |
1344 | - Use rowids from unique to run a disk-ordered sweep |
1345 | */ |
1346 | |
1347 | QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param, |
1348 | TABLE *table) |
1349 | :unique(NULL), pk_quick_select(NULL), thd(thd_param) |
1350 | { |
1351 | DBUG_ENTER("QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT" ); |
1352 | index= MAX_KEY; |
1353 | head= table; |
1354 | bzero(&read_record, sizeof(read_record)); |
1355 | init_sql_alloc(&alloc, "QUICK_INDEX_SORT_SELECT" , |
1356 | thd->variables.range_alloc_block_size, 0, |
1357 | MYF(MY_THREAD_SPECIFIC)); |
1358 | DBUG_VOID_RETURN; |
1359 | } |
1360 | |
1361 | int QUICK_INDEX_SORT_SELECT::init() |
1362 | { |
1363 | DBUG_ENTER("QUICK_INDEX_SORT_SELECT::init" ); |
1364 | DBUG_RETURN(0); |
1365 | } |
1366 | |
1367 | int QUICK_INDEX_SORT_SELECT::reset() |
1368 | { |
1369 | DBUG_ENTER("QUICK_INDEX_SORT_SELECT::reset" ); |
1370 | const int retval= read_keys_and_merge(); |
1371 | DBUG_RETURN(retval); |
1372 | } |
1373 | |
1374 | bool |
1375 | QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range) |
1376 | { |
1377 | DBUG_ENTER("QUICK_INDEX_SORT_SELECT::push_quick_back" ); |
1378 | if (head->file->primary_key_is_clustered() && |
1379 | quick_sel_range->index == head->s->primary_key) |
1380 | { |
1381 | /* |
1382 | A quick_select over a clustered primary key is handled specifically |
1383 | Here we assume: |
1384 | - PK columns are included in any other merged index |
1385 | - Scan on the PK is disk-ordered. |
1386 | (not meeting #2 will only cause performance degradation) |
1387 | |
1388 | We could treat clustered PK as any other index, but that would |
1389 | be inefficient. There is no point in doing scan on |
1390 | CPK, remembering the rowid, then making rnd_pos() call with |
1391 | that rowid. |
1392 | */ |
1393 | pk_quick_select= quick_sel_range; |
1394 | DBUG_RETURN(0); |
1395 | } |
1396 | DBUG_RETURN(quick_selects.push_back(quick_sel_range, thd->mem_root)); |
1397 | } |
1398 | |
1399 | QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT() |
1400 | { |
1401 | List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects); |
1402 | QUICK_RANGE_SELECT* quick; |
1403 | DBUG_ENTER("QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT" ); |
1404 | delete unique; |
1405 | quick_it.rewind(); |
1406 | while ((quick= quick_it++)) |
1407 | quick->file= NULL; |
1408 | quick_selects.delete_elements(); |
1409 | delete pk_quick_select; |
1410 | /* It's ok to call the next two even if they are already deinitialized */ |
1411 | end_read_record(&read_record); |
1412 | free_root(&alloc,MYF(0)); |
1413 | DBUG_VOID_RETURN; |
1414 | } |
1415 | |
1416 | QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param, |
1417 | TABLE *table, |
1418 | bool retrieve_full_rows, |
1419 | MEM_ROOT *parent_alloc) |
1420 | : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows), |
1421 | scans_inited(FALSE) |
1422 | { |
1423 | index= MAX_KEY; |
1424 | head= table; |
1425 | record= head->record[0]; |
1426 | if (!parent_alloc) |
1427 | init_sql_alloc(&alloc, "QUICK_ROR_INTERSECT_SELECT" , |
1428 | thd->variables.range_alloc_block_size, 0, |
1429 | MYF(MY_THREAD_SPECIFIC)); |
1430 | else |
1431 | bzero(&alloc, sizeof(MEM_ROOT)); |
1432 | last_rowid= (uchar*) alloc_root(parent_alloc? parent_alloc : &alloc, |
1433 | head->file->ref_length); |
1434 | } |
1435 | |
1436 | |
1437 | /* |
1438 | Do post-constructor initialization. |
1439 | SYNOPSIS |
1440 | QUICK_ROR_INTERSECT_SELECT::init() |
1441 | |
1442 | RETURN |
1443 | 0 OK |
1444 | other Error code |
1445 | */ |
1446 | |
1447 | int QUICK_ROR_INTERSECT_SELECT::init() |
1448 | { |
1449 | DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init" ); |
1450 | /* Check if last_rowid was successfully allocated in ctor */ |
1451 | DBUG_RETURN(!last_rowid); |
1452 | } |
1453 | |
1454 | |
1455 | /* |
1456 | Initialize this quick select to be a ROR-merged scan. |
1457 | |
1458 | SYNOPSIS |
1459 | QUICK_RANGE_SELECT::init_ror_merged_scan() |
1460 | reuse_handler If TRUE, use head->file, otherwise create a separate |
1461 | handler object |
1462 | |
1463 | NOTES |
1464 | This function creates and prepares for subsequent use a separate handler |
1465 | object if it can't reuse head->file. The reason for this is that during |
1466 | ROR-merge several key scans are performed simultaneously, and a single |
1467 | handler is only capable of preserving context of a single key scan. |
1468 | |
1469 | In ROR-merge the quick select doing merge does full records retrieval, |
1470 | merged quick selects read only keys. |
1471 | |
1472 | RETURN |
1473 | 0 ROR child scan initialized, ok to use. |
1474 | 1 error |
1475 | */ |
1476 | |
1477 | int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler, |
1478 | MEM_ROOT *local_alloc) |
1479 | { |
1480 | handler *save_file= file, *org_file; |
1481 | THD *thd= head->in_use; |
1482 | MY_BITMAP * const save_vcol_set= head->vcol_set; |
1483 | MY_BITMAP * const save_read_set= head->read_set; |
1484 | MY_BITMAP * const save_write_set= head->write_set; |
1485 | DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan" ); |
1486 | |
1487 | in_ror_merged_scan= 1; |
1488 | if (reuse_handler) |
1489 | { |
1490 | DBUG_PRINT("info" , ("Reusing handler %p" , file)); |
1491 | if (init()) |
1492 | { |
1493 | DBUG_RETURN(1); |
1494 | } |
1495 | goto end; |
1496 | } |
1497 | |
1498 | /* Create a separate handler object for this quick select */ |
1499 | if (free_file) |
1500 | { |
1501 | /* already have own 'handler' object. */ |
1502 | DBUG_RETURN(0); |
1503 | } |
1504 | |
1505 | if (!(file= head->file->clone(head->s->normalized_path.str, local_alloc))) |
1506 | { |
1507 | /* |
1508 | Manually set the error flag. Note: there seems to be quite a few |
1509 | places where a failure could cause the server to "hang" the client by |
1510 | sending no response to a query. ATM those are not real errors because |
1511 | the storage engine calls in question happen to never fail with the |
1512 | existing storage engines. |
1513 | */ |
1514 | my_error(ER_OUT_OF_RESOURCES, MYF(0)); /* purecov: inspected */ |
1515 | /* Caller will free the memory */ |
1516 | goto failure; /* purecov: inspected */ |
1517 | } |
1518 | |
1519 | if (file->ha_external_lock(thd, F_RDLCK)) |
1520 | goto failure; |
1521 | |
1522 | if (init()) |
1523 | { |
1524 | file->ha_external_lock(thd, F_UNLCK); |
1525 | file->ha_close(); |
1526 | goto failure; |
1527 | } |
1528 | free_file= TRUE; |
1529 | last_rowid= file->ref; |
1530 | |
1531 | end: |
1532 | /* |
1533 | We are only going to read key fields and call position() on 'file' |
1534 | The following sets head->read_set (== column_bitmap) to only use this |
1535 | key. The 'column_bitmap' is used in ::get_next() |
1536 | */ |
1537 | org_file= head->file; |
1538 | head->file= file; |
1539 | |
1540 | head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap, &column_bitmap); |
1541 | head->prepare_for_keyread(index, &column_bitmap); |
1542 | head->prepare_for_position(); |
1543 | |
1544 | head->file= org_file; |
1545 | |
1546 | /* Restore head->read_set (and write_set) to what they had before the call */ |
1547 | head->column_bitmaps_set(save_read_set, save_write_set, save_vcol_set); |
1548 | |
1549 | if (reset()) |
1550 | { |
1551 | if (!reuse_handler) |
1552 | { |
1553 | file->ha_external_lock(thd, F_UNLCK); |
1554 | file->ha_close(); |
1555 | goto failure; |
1556 | } |
1557 | DBUG_RETURN(1); |
1558 | } |
1559 | DBUG_RETURN(0); |
1560 | |
1561 | failure: |
1562 | head->column_bitmaps_set(save_read_set, save_write_set, save_vcol_set); |
1563 | delete file; |
1564 | file= save_file; |
1565 | DBUG_RETURN(1); |
1566 | } |
1567 | |
1568 | |
1569 | /* |
1570 | Initialize this quick select to be a part of a ROR-merged scan. |
1571 | SYNOPSIS |
1572 | QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan() |
1573 | reuse_handler If TRUE, use head->file, otherwise create separate |
1574 | handler object. |
1575 | RETURN |
1576 | 0 OK |
1577 | other error code |
1578 | */ |
1579 | int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler, |
1580 | MEM_ROOT *local_alloc) |
1581 | { |
1582 | List_iterator_fast<QUICK_SELECT_WITH_RECORD> quick_it(quick_selects); |
1583 | QUICK_SELECT_WITH_RECORD *cur; |
1584 | QUICK_RANGE_SELECT *quick; |
1585 | DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan" ); |
1586 | |
1587 | /* Initialize all merged "children" quick selects */ |
1588 | DBUG_ASSERT(!need_to_fetch_row || reuse_handler); |
1589 | if (!need_to_fetch_row && reuse_handler) |
1590 | { |
1591 | cur= quick_it++; |
1592 | quick= cur->quick; |
1593 | /* |
1594 | There is no use of this->file. Use it for the first of merged range |
1595 | selects. |
1596 | */ |
1597 | int error= quick->init_ror_merged_scan(TRUE, local_alloc); |
1598 | if (unlikely(error)) |
1599 | DBUG_RETURN(error); |
1600 | quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); |
1601 | } |
1602 | while ((cur= quick_it++)) |
1603 | { |
1604 | quick= cur->quick; |
1605 | #ifndef DBUG_OFF |
1606 | const MY_BITMAP * const save_read_set= quick->head->read_set; |
1607 | const MY_BITMAP * const save_write_set= quick->head->write_set; |
1608 | #endif |
1609 | if (quick->init_ror_merged_scan(FALSE, local_alloc)) |
1610 | DBUG_RETURN(1); |
1611 | quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS); |
1612 | |
1613 | // Sets are shared by all members of "quick_selects" so must not change |
1614 | #ifndef DBUG_OFF |
1615 | DBUG_ASSERT(quick->head->read_set == save_read_set); |
1616 | DBUG_ASSERT(quick->head->write_set == save_write_set); |
1617 | #endif |
1618 | /* All merged scans share the same record buffer in intersection. */ |
1619 | quick->record= head->record[0]; |
1620 | } |
1621 | |
1622 | if (need_to_fetch_row && |
1623 | unlikely(head->file->ha_rnd_init_with_error(false))) |
1624 | { |
1625 | DBUG_PRINT("error" , ("ROR index_merge rnd_init call failed" )); |
1626 | DBUG_RETURN(1); |
1627 | } |
1628 | DBUG_RETURN(0); |
1629 | } |
1630 | |
1631 | |
1632 | /* |
1633 | Initialize quick select for row retrieval. |
1634 | SYNOPSIS |
1635 | reset() |
1636 | RETURN |
1637 | 0 OK |
1638 | other Error code |
1639 | */ |
1640 | |
1641 | int QUICK_ROR_INTERSECT_SELECT::reset() |
1642 | { |
1643 | DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset" ); |
1644 | if (!scans_inited && init_ror_merged_scan(TRUE, &alloc)) |
1645 | DBUG_RETURN(1); |
1646 | scans_inited= TRUE; |
1647 | List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects); |
1648 | QUICK_SELECT_WITH_RECORD *qr; |
1649 | while ((qr= it++)) |
1650 | qr->quick->reset(); |
1651 | DBUG_RETURN(0); |
1652 | } |
1653 | |
1654 | |
1655 | /* |
1656 | Add a merged quick select to this ROR-intersection quick select. |
1657 | |
1658 | SYNOPSIS |
1659 | QUICK_ROR_INTERSECT_SELECT::push_quick_back() |
1660 | alloc Mem root to create auxiliary structures on |
1661 | quick Quick select to be added. The quick select must return |
1662 | rows in rowid order. |
1663 | NOTES |
1664 | This call can only be made before init() is called. |
1665 | |
1666 | RETURN |
1667 | FALSE OK |
1668 | TRUE Out of memory. |
1669 | */ |
1670 | |
1671 | bool |
1672 | QUICK_ROR_INTERSECT_SELECT::push_quick_back(MEM_ROOT *local_alloc, |
1673 | QUICK_RANGE_SELECT *quick) |
1674 | { |
1675 | QUICK_SELECT_WITH_RECORD *qr; |
1676 | if (!(qr= new QUICK_SELECT_WITH_RECORD) || |
1677 | !(qr->key_tuple= (uchar*)alloc_root(local_alloc, |
1678 | quick->max_used_key_length))) |
1679 | return TRUE; |
1680 | qr->quick= quick; |
1681 | return quick_selects.push_back(qr); |
1682 | } |
1683 | |
1684 | |
1685 | QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT() |
1686 | { |
1687 | DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT" ); |
1688 | quick_selects.delete_elements(); |
1689 | delete cpk_quick; |
1690 | free_root(&alloc,MYF(0)); |
1691 | if (need_to_fetch_row && head->file->inited != handler::NONE) |
1692 | head->file->ha_rnd_end(); |
1693 | DBUG_VOID_RETURN; |
1694 | } |
1695 | |
1696 | |
1697 | QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param, |
1698 | TABLE *table) |
1699 | : thd(thd_param), scans_inited(FALSE) |
1700 | { |
1701 | index= MAX_KEY; |
1702 | head= table; |
1703 | rowid_length= table->file->ref_length; |
1704 | record= head->record[0]; |
1705 | init_sql_alloc(&alloc, "QUICK_ROR_UNION_SELECT" , |
1706 | thd->variables.range_alloc_block_size, 0, |
1707 | MYF(MY_THREAD_SPECIFIC)); |
1708 | thd_param->mem_root= &alloc; |
1709 | } |
1710 | |
1711 | |
1712 | /* |
1713 | Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority |
1714 | queue. |
1715 | |
1716 | SYNPOSIS |
1717 | QUICK_ROR_UNION_SELECT_queue_cmp() |
1718 | arg Pointer to QUICK_ROR_UNION_SELECT |
1719 | val1 First merged select |
1720 | val2 Second merged select |
1721 | */ |
1722 | |
1723 | C_MODE_START |
1724 | |
1725 | static int QUICK_ROR_UNION_SELECT_queue_cmp(void *arg, uchar *val1, uchar *val2) |
1726 | { |
1727 | QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg; |
1728 | return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid, |
1729 | ((QUICK_SELECT_I*)val2)->last_rowid); |
1730 | } |
1731 | |
1732 | C_MODE_END |
1733 | |
1734 | |
1735 | /* |
1736 | Do post-constructor initialization. |
1737 | SYNOPSIS |
1738 | QUICK_ROR_UNION_SELECT::init() |
1739 | |
1740 | RETURN |
1741 | 0 OK |
1742 | other Error code |
1743 | */ |
1744 | |
1745 | int QUICK_ROR_UNION_SELECT::init() |
1746 | { |
1747 | DBUG_ENTER("QUICK_ROR_UNION_SELECT::init" ); |
1748 | if (init_queue(&queue, quick_selects.elements, 0, |
1749 | FALSE , QUICK_ROR_UNION_SELECT_queue_cmp, |
1750 | (void*) this, 0, 0)) |
1751 | { |
1752 | bzero(&queue, sizeof(QUEUE)); |
1753 | DBUG_RETURN(1); |
1754 | } |
1755 | |
1756 | if (!(cur_rowid= (uchar*) alloc_root(&alloc, 2*head->file->ref_length))) |
1757 | DBUG_RETURN(1); |
1758 | prev_rowid= cur_rowid + head->file->ref_length; |
1759 | DBUG_RETURN(0); |
1760 | } |
1761 | |
1762 | |
1763 | /* |
1764 | Initialize quick select for row retrieval. |
1765 | SYNOPSIS |
1766 | reset() |
1767 | |
1768 | RETURN |
1769 | 0 OK |
1770 | other Error code |
1771 | */ |
1772 | |
1773 | int QUICK_ROR_UNION_SELECT::reset() |
1774 | { |
1775 | QUICK_SELECT_I *quick; |
1776 | int error; |
1777 | DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset" ); |
1778 | have_prev_rowid= FALSE; |
1779 | if (!scans_inited) |
1780 | { |
1781 | List_iterator_fast<QUICK_SELECT_I> it(quick_selects); |
1782 | while ((quick= it++)) |
1783 | { |
1784 | if (quick->init_ror_merged_scan(FALSE, &alloc)) |
1785 | DBUG_RETURN(1); |
1786 | } |
1787 | scans_inited= TRUE; |
1788 | } |
1789 | queue_remove_all(&queue); |
1790 | /* |
1791 | Initialize scans for merged quick selects and put all merged quick |
1792 | selects into the queue. |
1793 | */ |
1794 | List_iterator_fast<QUICK_SELECT_I> it(quick_selects); |
1795 | while ((quick= it++)) |
1796 | { |
1797 | if (unlikely((error= quick->reset()))) |
1798 | DBUG_RETURN(error); |
1799 | if (unlikely((error= quick->get_next()))) |
1800 | { |
1801 | if (error == HA_ERR_END_OF_FILE) |
1802 | continue; |
1803 | DBUG_RETURN(error); |
1804 | } |
1805 | quick->save_last_pos(); |
1806 | queue_insert(&queue, (uchar*)quick); |
1807 | } |
1808 | /* Prepare for ha_rnd_pos calls. */ |
1809 | if (head->file->inited && unlikely((error= head->file->ha_rnd_end()))) |
1810 | { |
1811 | DBUG_PRINT("error" , ("ROR index_merge rnd_end call failed" )); |
1812 | DBUG_RETURN(error); |
1813 | } |
1814 | if (unlikely((error= head->file->ha_rnd_init(false)))) |
1815 | { |
1816 | DBUG_PRINT("error" , ("ROR index_merge rnd_init call failed" )); |
1817 | DBUG_RETURN(error); |
1818 | } |
1819 | |
1820 | DBUG_RETURN(0); |
1821 | } |
1822 | |
1823 | |
1824 | bool |
1825 | QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range) |
1826 | { |
1827 | return quick_selects.push_back(quick_sel_range); |
1828 | } |
1829 | |
1830 | QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT() |
1831 | { |
1832 | DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT" ); |
1833 | delete_queue(&queue); |
1834 | quick_selects.delete_elements(); |
1835 | if (head->file->inited != handler::NONE) |
1836 | head->file->ha_rnd_end(); |
1837 | free_root(&alloc,MYF(0)); |
1838 | DBUG_VOID_RETURN; |
1839 | } |
1840 | |
1841 | |
1842 | QUICK_RANGE::QUICK_RANGE() |
1843 | :min_key(0),max_key(0),min_length(0),max_length(0), |
1844 | flag(NO_MIN_RANGE | NO_MAX_RANGE), |
1845 | min_keypart_map(0), max_keypart_map(0) |
1846 | {} |
1847 | |
1848 | SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc() |
1849 | { |
1850 | type=arg.type; |
1851 | min_flag=arg.min_flag; |
1852 | max_flag=arg.max_flag; |
1853 | maybe_flag=arg.maybe_flag; |
1854 | maybe_null=arg.maybe_null; |
1855 | part=arg.part; |
1856 | field=arg.field; |
1857 | min_value=arg.min_value; |
1858 | max_value=arg.max_value; |
1859 | next_key_part=arg.next_key_part; |
1860 | max_part_no= arg.max_part_no; |
1861 | use_count=1; elements=1; |
1862 | } |
1863 | |
1864 | |
1865 | inline void SEL_ARG::make_root() |
1866 | { |
1867 | left=right= &null_element; |
1868 | color=BLACK; |
1869 | next=prev=0; |
1870 | use_count=0; elements=1; |
1871 | } |
1872 | |
1873 | SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg, |
1874 | const uchar *max_value_arg) |
1875 | :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()), |
1876 | elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg), |
1877 | max_value((uchar*) max_value_arg), next(0),prev(0), |
1878 | next_key_part(0), color(BLACK), type(KEY_RANGE) |
1879 | { |
1880 | left=right= &null_element; |
1881 | max_part_no= 1; |
1882 | } |
1883 | |
1884 | SEL_ARG::SEL_ARG(Field *field_,uint8 part_, |
1885 | uchar *min_value_, uchar *max_value_, |
1886 | uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_) |
1887 | :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_), |
1888 | part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1), |
1889 | field(field_), min_value(min_value_), max_value(max_value_), |
1890 | next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE) |
1891 | { |
1892 | max_part_no= part+1; |
1893 | left=right= &null_element; |
1894 | } |
1895 | |
1896 | SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent, |
1897 | SEL_ARG **next_arg) |
1898 | { |
1899 | SEL_ARG *tmp; |
1900 | |
1901 | /* Bail out if we have already generated too many SEL_ARGs */ |
1902 | if (++param->alloced_sel_args > MAX_SEL_ARGS) |
1903 | return 0; |
1904 | |
1905 | if (type != KEY_RANGE) |
1906 | { |
1907 | if (!(tmp= new (param->mem_root) SEL_ARG(type))) |
1908 | return 0; // out of memory |
1909 | tmp->prev= *next_arg; // Link into next/prev chain |
1910 | (*next_arg)->next=tmp; |
1911 | (*next_arg)= tmp; |
1912 | tmp->part= this->part; |
1913 | } |
1914 | else |
1915 | { |
1916 | if (!(tmp= new (param->mem_root) SEL_ARG(field,part, min_value,max_value, |
1917 | min_flag, max_flag, maybe_flag))) |
1918 | return 0; // OOM |
1919 | tmp->parent=new_parent; |
1920 | tmp->next_key_part=next_key_part; |
1921 | if (left != &null_element) |
1922 | if (!(tmp->left=left->clone(param, tmp, next_arg))) |
1923 | return 0; // OOM |
1924 | |
1925 | tmp->prev= *next_arg; // Link into next/prev chain |
1926 | (*next_arg)->next=tmp; |
1927 | (*next_arg)= tmp; |
1928 | |
1929 | if (right != &null_element) |
1930 | if (!(tmp->right= right->clone(param, tmp, next_arg))) |
1931 | return 0; // OOM |
1932 | } |
1933 | increment_use_count(1); |
1934 | tmp->color= color; |
1935 | tmp->elements= this->elements; |
1936 | tmp->max_part_no= max_part_no; |
1937 | return tmp; |
1938 | } |
1939 | |
1940 | /** |
1941 | This gives the first SEL_ARG in the interval list, and the minimal element |
1942 | in the red-black tree |
1943 | |
1944 | @return |
1945 | SEL_ARG first SEL_ARG in the interval list |
1946 | */ |
1947 | SEL_ARG *SEL_ARG::first() |
1948 | { |
1949 | SEL_ARG *next_arg=this; |
1950 | if (!next_arg->left) |
1951 | return 0; // MAYBE_KEY |
1952 | while (next_arg->left != &null_element) |
1953 | next_arg=next_arg->left; |
1954 | return next_arg; |
1955 | } |
1956 | |
1957 | const SEL_ARG *SEL_ARG::first() const |
1958 | { |
1959 | return const_cast<SEL_ARG*>(this)->first(); |
1960 | } |
1961 | |
1962 | SEL_ARG *SEL_ARG::last() |
1963 | { |
1964 | SEL_ARG *next_arg=this; |
1965 | if (!next_arg->right) |
1966 | return 0; // MAYBE_KEY |
1967 | while (next_arg->right != &null_element) |
1968 | next_arg=next_arg->right; |
1969 | return next_arg; |
1970 | } |
1971 | |
1972 | |
1973 | /* |
1974 | Check if a compare is ok, when one takes ranges in account |
1975 | Returns -2 or 2 if the ranges where 'joined' like < 2 and >= 2 |
1976 | */ |
1977 | |
1978 | int SEL_ARG::sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag, |
1979 | uint8 b_flag) |
1980 | { |
1981 | int cmp; |
1982 | /* First check if there was a compare to a min or max element */ |
1983 | if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) |
1984 | { |
1985 | if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) == |
1986 | (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))) |
1987 | return 0; |
1988 | return (a_flag & NO_MIN_RANGE) ? -1 : 1; |
1989 | } |
1990 | if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) |
1991 | return (b_flag & NO_MIN_RANGE) ? 1 : -1; |
1992 | |
1993 | if (field->real_maybe_null()) // If null is part of key |
1994 | { |
1995 | if (*a != *b) |
1996 | { |
1997 | return *a ? -1 : 1; |
1998 | } |
1999 | if (*a) |
2000 | goto end; // NULL where equal |
2001 | a++; b++; // Skip NULL marker |
2002 | } |
2003 | cmp=field->key_cmp(a , b); |
2004 | if (cmp) return cmp < 0 ? -1 : 1; // The values differed |
2005 | |
2006 | // Check if the compared equal arguments was defined with open/closed range |
2007 | end: |
2008 | if (a_flag & (NEAR_MIN | NEAR_MAX)) |
2009 | { |
2010 | if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX))) |
2011 | return 0; |
2012 | if (!(b_flag & (NEAR_MIN | NEAR_MAX))) |
2013 | return (a_flag & NEAR_MIN) ? 2 : -2; |
2014 | return (a_flag & NEAR_MIN) ? 1 : -1; |
2015 | } |
2016 | if (b_flag & (NEAR_MIN | NEAR_MAX)) |
2017 | return (b_flag & NEAR_MIN) ? -2 : 2; |
2018 | return 0; // The elements where equal |
2019 | } |
2020 | |
2021 | |
2022 | SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param) |
2023 | { |
2024 | SEL_ARG tmp_link,*next_arg,*root; |
2025 | next_arg= &tmp_link; |
2026 | if (!(root= clone(param, (SEL_ARG *) 0, &next_arg))) |
2027 | return 0; |
2028 | next_arg->next=0; // Fix last link |
2029 | tmp_link.next->prev=0; // Fix first link |
2030 | if (root) // If not OOM |
2031 | root->use_count= 0; |
2032 | return root; |
2033 | } |
2034 | |
2035 | |
2036 | /* |
2037 | Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived |
2038 | objects from table read plans. |
2039 | */ |
2040 | class TABLE_READ_PLAN |
2041 | { |
2042 | public: |
2043 | /* |
2044 | Plan read cost, with or without cost of full row retrieval, depending |
2045 | on plan creation parameters. |
2046 | */ |
2047 | double read_cost; |
2048 | ha_rows records; /* estimate of #rows to be examined */ |
2049 | |
2050 | /* |
2051 | If TRUE, the scan returns rows in rowid order. This is used only for |
2052 | scans that can be both ROR and non-ROR. |
2053 | */ |
2054 | bool is_ror; |
2055 | |
2056 | /* |
2057 | Create quick select for this plan. |
2058 | SYNOPSIS |
2059 | make_quick() |
2060 | param Parameter from test_quick_select |
2061 | retrieve_full_rows If TRUE, created quick select will do full record |
2062 | retrieval. |
2063 | parent_alloc Memory pool to use, if any. |
2064 | |
2065 | NOTES |
2066 | retrieve_full_rows is ignored by some implementations. |
2067 | |
2068 | RETURN |
2069 | created quick select |
2070 | NULL on any error. |
2071 | */ |
2072 | virtual QUICK_SELECT_I *make_quick(PARAM *param, |
2073 | bool retrieve_full_rows, |
2074 | MEM_ROOT *parent_alloc=NULL) = 0; |
2075 | |
2076 | /* Table read plans are allocated on MEM_ROOT and are never deleted */ |
2077 | static void *operator new(size_t size, MEM_ROOT *mem_root) |
2078 | { return (void*) alloc_root(mem_root, (uint) size); } |
2079 | static void operator delete(void *ptr,size_t size) { TRASH_FREE(ptr, size); } |
2080 | static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ } |
2081 | virtual ~TABLE_READ_PLAN() {} /* Remove gcc warning */ |
2082 | |
2083 | }; |
2084 | |
2085 | class TRP_ROR_INTERSECT; |
2086 | class TRP_ROR_UNION; |
2087 | class TRP_INDEX_MERGE; |
2088 | |
2089 | |
2090 | /* |
2091 | Plan for a QUICK_RANGE_SELECT scan. |
2092 | TRP_RANGE::make_quick ignores retrieve_full_rows parameter because |
2093 | QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full |
2094 | record retrieval scans. |
2095 | */ |
2096 | |
2097 | class TRP_RANGE : public TABLE_READ_PLAN |
2098 | { |
2099 | public: |
2100 | SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */ |
2101 | uint key_idx; /* key number in PARAM::key */ |
2102 | uint mrr_flags; |
2103 | uint mrr_buf_size; |
2104 | |
2105 | TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg) |
2106 | : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg) |
2107 | {} |
2108 | virtual ~TRP_RANGE() {} /* Remove gcc warning */ |
2109 | |
2110 | QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, |
2111 | MEM_ROOT *parent_alloc) |
2112 | { |
2113 | DBUG_ENTER("TRP_RANGE::make_quick" ); |
2114 | QUICK_RANGE_SELECT *quick; |
2115 | if ((quick= get_quick_select(param, key_idx, key, mrr_flags, |
2116 | mrr_buf_size, parent_alloc))) |
2117 | { |
2118 | quick->records= records; |
2119 | quick->read_time= read_cost; |
2120 | } |
2121 | DBUG_RETURN(quick); |
2122 | } |
2123 | }; |
2124 | |
2125 | |
2126 | /* Plan for QUICK_ROR_INTERSECT_SELECT scan. */ |
2127 | |
2128 | class TRP_ROR_INTERSECT : public TABLE_READ_PLAN |
2129 | { |
2130 | public: |
2131 | TRP_ROR_INTERSECT() {} /* Remove gcc warning */ |
2132 | virtual ~TRP_ROR_INTERSECT() {} /* Remove gcc warning */ |
2133 | QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, |
2134 | MEM_ROOT *parent_alloc); |
2135 | |
2136 | /* Array of pointers to ROR range scans used in this intersection */ |
2137 | struct st_ror_scan_info **first_scan; |
2138 | struct st_ror_scan_info **last_scan; /* End of the above array */ |
2139 | struct st_ror_scan_info *cpk_scan; /* Clustered PK scan, if there is one */ |
2140 | bool is_covering; /* TRUE if no row retrieval phase is necessary */ |
2141 | double index_scan_costs; /* SUM(cost(index_scan)) */ |
2142 | }; |
2143 | |
2144 | |
2145 | /* |
2146 | Plan for QUICK_ROR_UNION_SELECT scan. |
2147 | QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows |
2148 | is ignored by make_quick. |
2149 | */ |
2150 | |
2151 | class TRP_ROR_UNION : public TABLE_READ_PLAN |
2152 | { |
2153 | public: |
2154 | TRP_ROR_UNION() {} /* Remove gcc warning */ |
2155 | virtual ~TRP_ROR_UNION() {} /* Remove gcc warning */ |
2156 | QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, |
2157 | MEM_ROOT *parent_alloc); |
2158 | TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */ |
2159 | TABLE_READ_PLAN **last_ror; /* end of the above array */ |
2160 | }; |
2161 | |
2162 | |
2163 | /* |
2164 | Plan for QUICK_INDEX_INTERSECT_SELECT scan. |
2165 | QUICK_INDEX_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows |
2166 | is ignored by make_quick. |
2167 | */ |
2168 | |
2169 | class TRP_INDEX_INTERSECT : public TABLE_READ_PLAN |
2170 | { |
2171 | public: |
2172 | TRP_INDEX_INTERSECT() {} /* Remove gcc warning */ |
2173 | virtual ~TRP_INDEX_INTERSECT() {} /* Remove gcc warning */ |
2174 | QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, |
2175 | MEM_ROOT *parent_alloc); |
2176 | TRP_RANGE **range_scans; /* array of ptrs to plans of intersected scans */ |
2177 | TRP_RANGE **range_scans_end; /* end of the array */ |
2178 | /* keys whose scans are to be filtered by cpk conditions */ |
2179 | key_map filtered_scans; |
2180 | }; |
2181 | |
2182 | |
2183 | /* |
2184 | Plan for QUICK_INDEX_MERGE_SELECT scan. |
2185 | QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows |
2186 | is ignored by make_quick. |
2187 | */ |
2188 | |
2189 | class TRP_INDEX_MERGE : public TABLE_READ_PLAN |
2190 | { |
2191 | public: |
2192 | TRP_INDEX_MERGE() {} /* Remove gcc warning */ |
2193 | virtual ~TRP_INDEX_MERGE() {} /* Remove gcc warning */ |
2194 | QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, |
2195 | MEM_ROOT *parent_alloc); |
2196 | TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */ |
2197 | TRP_RANGE **range_scans_end; /* end of the array */ |
2198 | }; |
2199 | |
2200 | |
2201 | /* |
2202 | Plan for a QUICK_GROUP_MIN_MAX_SELECT scan. |
2203 | */ |
2204 | |
2205 | class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN |
2206 | { |
2207 | private: |
2208 | bool have_min, have_max, have_agg_distinct; |
2209 | KEY_PART_INFO *min_max_arg_part; |
2210 | uint group_prefix_len; |
2211 | uint used_key_parts; |
2212 | uint group_key_parts; |
2213 | KEY *index_info; |
2214 | uint index; |
2215 | uint key_infix_len; |
2216 | uchar key_infix[MAX_KEY_LENGTH]; |
2217 | SEL_TREE *range_tree; /* Represents all range predicates in the query. */ |
2218 | SEL_ARG *index_tree; /* The SEL_ARG sub-tree corresponding to index_info. */ |
2219 | uint param_idx; /* Index of used key in param->key. */ |
2220 | bool is_index_scan; /* Use index_next() instead of random read */ |
2221 | public: |
2222 | /* Number of records selected by the ranges in index_tree. */ |
2223 | ha_rows quick_prefix_records; |
2224 | public: |
2225 | TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg, |
2226 | bool have_agg_distinct_arg, |
2227 | KEY_PART_INFO *min_max_arg_part_arg, |
2228 | uint group_prefix_len_arg, uint used_key_parts_arg, |
2229 | uint group_key_parts_arg, KEY *index_info_arg, |
2230 | uint index_arg, uint key_infix_len_arg, |
2231 | uchar *key_infix_arg, |
2232 | SEL_TREE *tree_arg, SEL_ARG *index_tree_arg, |
2233 | uint param_idx_arg, ha_rows quick_prefix_records_arg) |
2234 | : have_min(have_min_arg), have_max(have_max_arg), |
2235 | have_agg_distinct(have_agg_distinct_arg), |
2236 | min_max_arg_part(min_max_arg_part_arg), |
2237 | group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg), |
2238 | group_key_parts(group_key_parts_arg), index_info(index_info_arg), |
2239 | index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg), |
2240 | index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE), |
2241 | quick_prefix_records(quick_prefix_records_arg) |
2242 | { |
2243 | if (key_infix_len) |
2244 | memcpy(this->key_infix, key_infix_arg, key_infix_len); |
2245 | } |
2246 | virtual ~TRP_GROUP_MIN_MAX() {} /* Remove gcc warning */ |
2247 | |
2248 | QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows, |
2249 | MEM_ROOT *parent_alloc); |
2250 | void use_index_scan() { is_index_scan= TRUE; } |
2251 | }; |
2252 | |
2253 | |
2254 | typedef struct st_index_scan_info |
2255 | { |
2256 | uint idx; /* # of used key in param->keys */ |
2257 | uint keynr; /* # of used key in table */ |
2258 | uint range_count; |
2259 | ha_rows records; /* estimate of # records this scan will return */ |
2260 | |
2261 | /* Set of intervals over key fields that will be used for row retrieval. */ |
2262 | SEL_ARG *sel_arg; |
2263 | |
2264 | KEY *key_info; |
2265 | uint used_key_parts; |
2266 | |
2267 | /* Estimate of # records filtered out by intersection with cpk */ |
2268 | ha_rows filtered_out; |
2269 | /* Bitmap of fields used in index intersection */ |
2270 | MY_BITMAP used_fields; |
2271 | |
2272 | /* Fields used in the query and covered by ROR scan. */ |
2273 | MY_BITMAP covered_fields; |
2274 | uint used_fields_covered; /* # of set bits in covered_fields */ |
2275 | int key_rec_length; /* length of key record (including rowid) */ |
2276 | |
2277 | /* |
2278 | Cost of reading all index records with values in sel_arg intervals set |
2279 | (assuming there is no need to access full table records) |
2280 | */ |
2281 | double index_read_cost; |
2282 | uint first_uncovered_field; /* first unused bit in covered_fields */ |
2283 | uint key_components; /* # of parts in the key */ |
2284 | } INDEX_SCAN_INFO; |
2285 | |
2286 | /* |
2287 | Fill param->needed_fields with bitmap of fields used in the query. |
2288 | SYNOPSIS |
2289 | fill_used_fields_bitmap() |
2290 | param Parameter from test_quick_select function. |
2291 | |
2292 | NOTES |
2293 | Clustered PK members are not put into the bitmap as they are implicitly |
2294 | present in all keys (and it is impossible to avoid reading them). |
2295 | RETURN |
2296 | 0 Ok |
2297 | 1 Out of memory. |
2298 | */ |
2299 | |
2300 | static int fill_used_fields_bitmap(PARAM *param) |
2301 | { |
2302 | TABLE *table= param->table; |
2303 | my_bitmap_map *tmp; |
2304 | uint pk; |
2305 | param->tmp_covered_fields.bitmap= 0; |
2306 | param->fields_bitmap_size= table->s->column_bitmap_size; |
2307 | if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root, |
2308 | param->fields_bitmap_size)) || |
2309 | my_bitmap_init(¶m->needed_fields, tmp, table->s->fields, FALSE)) |
2310 | return 1; |
2311 | |
2312 | bitmap_copy(¶m->needed_fields, table->read_set); |
2313 | bitmap_union(¶m->needed_fields, table->write_set); |
2314 | |
2315 | pk= param->table->s->primary_key; |
2316 | if (pk != MAX_KEY && param->table->file->primary_key_is_clustered()) |
2317 | { |
2318 | /* The table uses clustered PK and it is not internally generated */ |
2319 | KEY_PART_INFO *key_part= param->table->key_info[pk].key_part; |
2320 | KEY_PART_INFO *key_part_end= key_part + |
2321 | param->table->key_info[pk].user_defined_key_parts; |
2322 | for (;key_part != key_part_end; ++key_part) |
2323 | bitmap_clear_bit(¶m->needed_fields, key_part->fieldnr-1); |
2324 | } |
2325 | return 0; |
2326 | } |
2327 | |
2328 | |
2329 | /* |
2330 | Test if a key can be used in different ranges |
2331 | |
2332 | SYNOPSIS |
2333 | SQL_SELECT::test_quick_select() |
2334 | thd Current thread |
2335 | keys_to_use Keys to use for range retrieval |
2336 | prev_tables Tables assumed to be already read when the scan is |
2337 | performed (but not read at the moment of this call) |
2338 | limit Query limit |
2339 | force_quick_range Prefer to use range (instead of full table scan) even |
2340 | if it is more expensive. |
2341 | |
2342 | NOTES |
2343 | Updates the following in the select parameter: |
2344 | needed_reg - Bits for keys with may be used if all prev regs are read |
2345 | quick - Parameter to use when reading records. |
2346 | |
2347 | In the table struct the following information is updated: |
2348 | quick_keys - Which keys can be used |
2349 | quick_rows - How many rows the key matches |
2350 | quick_condition_rows - E(# rows that will satisfy the table condition) |
2351 | |
2352 | IMPLEMENTATION |
2353 | quick_condition_rows value is obtained as follows: |
2354 | |
2355 | It is a minimum of E(#output rows) for all considered table access |
2356 | methods (range and index_merge accesses over various indexes). |
2357 | |
2358 | The obtained value is not a true E(#rows that satisfy table condition) |
2359 | but rather a pessimistic estimate. To obtain a true E(#...) one would |
2360 | need to combine estimates of various access methods, taking into account |
2361 | correlations between sets of rows they will return. |
2362 | |
2363 | For example, if values of tbl.key1 and tbl.key2 are independent (a right |
2364 | assumption if we have no information about their correlation) then the |
2365 | correct estimate will be: |
2366 | |
2367 | E(#rows("tbl.key1 < c1 AND tbl.key2 < c2")) = |
2368 | = E(#rows(tbl.key1 < c1)) / total_rows(tbl) * E(#rows(tbl.key2 < c2) |
2369 | |
2370 | which is smaller than |
2371 | |
2372 | MIN(E(#rows(tbl.key1 < c1), E(#rows(tbl.key2 < c2))) |
2373 | |
2374 | which is currently produced. |
2375 | |
2376 | TODO |
2377 | * Change the value returned in quick_condition_rows from a pessimistic |
2378 | estimate to true E(#rows that satisfy table condition). |
2379 | (we can re-use some of E(#rows) calcuation code from index_merge/intersection |
2380 | for this) |
2381 | |
2382 | * Check if this function really needs to modify keys_to_use, and change the |
2383 | code to pass it by reference if it doesn't. |
2384 | |
2385 | * In addition to force_quick_range other means can be (an usually are) used |
2386 | to make this function prefer range over full table scan. Figure out if |
2387 | force_quick_range is really needed. |
2388 | |
2389 | RETURN |
2390 | -1 if impossible select (i.e. certainly no rows will be selected) |
2391 | 0 if can't use quick_select |
2392 | 1 if found usable ranges and quick select has been successfully created. |
2393 | */ |
2394 | |
2395 | int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, |
2396 | table_map prev_tables, |
2397 | ha_rows limit, bool force_quick_range, |
2398 | bool ordered_output, |
2399 | bool remove_false_parts_of_where) |
2400 | { |
2401 | uint idx; |
2402 | double scan_time; |
2403 | DBUG_ENTER("SQL_SELECT::test_quick_select" ); |
2404 | DBUG_PRINT("enter" ,("keys_to_use: %lu prev_tables: %lu const_tables: %lu" , |
2405 | (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables, |
2406 | (ulong) const_tables)); |
2407 | DBUG_PRINT("info" , ("records: %lu" , (ulong) head->stat_records())); |
2408 | delete quick; |
2409 | quick=0; |
2410 | needed_reg.clear_all(); |
2411 | quick_keys.clear_all(); |
2412 | DBUG_ASSERT(!head->is_filled_at_execution()); |
2413 | if (keys_to_use.is_clear_all() || head->is_filled_at_execution()) |
2414 | DBUG_RETURN(0); |
2415 | records= head->stat_records(); |
2416 | if (!records) |
2417 | records++; /* purecov: inspected */ |
2418 | scan_time= (double) records / TIME_FOR_COMPARE + 1; |
2419 | read_time= (double) head->file->scan_time() + scan_time + 1.1; |
2420 | if (head->force_index) |
2421 | scan_time= read_time= DBL_MAX; |
2422 | if (limit < records) |
2423 | read_time= (double) records + scan_time + 1; // Force to use index |
2424 | |
2425 | possible_keys.clear_all(); |
2426 | |
2427 | DBUG_PRINT("info" ,("Time to scan table: %g" , read_time)); |
2428 | |
2429 | keys_to_use.intersect(head->keys_in_use_for_query); |
2430 | if (!keys_to_use.is_clear_all()) |
2431 | { |
2432 | uchar buff[STACK_BUFF_ALLOC]; |
2433 | MEM_ROOT alloc; |
2434 | SEL_TREE *tree= NULL; |
2435 | KEY_PART *key_parts; |
2436 | KEY *key_info; |
2437 | PARAM param; |
2438 | |
2439 | if (check_stack_overrun(thd, 2*STACK_MIN_SIZE + sizeof(PARAM), buff)) |
2440 | DBUG_RETURN(0); // Fatal error flag is set |
2441 | |
2442 | /* set up parameter that is passed to all functions */ |
2443 | param.thd= thd; |
2444 | param.baseflag= head->file->ha_table_flags(); |
2445 | param.prev_tables=prev_tables | const_tables; |
2446 | param.read_tables=read_tables; |
2447 | param.current_table= head->map; |
2448 | param.table=head; |
2449 | param.keys=0; |
2450 | param.mem_root= &alloc; |
2451 | param.old_root= thd->mem_root; |
2452 | param.needed_reg= &needed_reg; |
2453 | param.imerge_cost_buff_size= 0; |
2454 | param.using_real_indexes= TRUE; |
2455 | param.remove_jump_scans= TRUE; |
2456 | param.remove_false_where_parts= remove_false_parts_of_where; |
2457 | param.force_default_mrr= ordered_output; |
2458 | param.possible_keys.clear_all(); |
2459 | |
2460 | thd->no_errors=1; // Don't warn about NULL |
2461 | init_sql_alloc(&alloc, "test_quick_select" , |
2462 | thd->variables.range_alloc_block_size, 0, |
2463 | MYF(MY_THREAD_SPECIFIC)); |
2464 | if (!(param.key_parts= |
2465 | (KEY_PART*) alloc_root(&alloc, |
2466 | sizeof(KEY_PART) * |
2467 | head->s->actual_n_key_parts(thd))) || |
2468 | fill_used_fields_bitmap(¶m)) |
2469 | { |
2470 | thd->no_errors=0; |
2471 | free_root(&alloc,MYF(0)); // Return memory & allocator |
2472 | DBUG_RETURN(0); // Can't use range |
2473 | } |
2474 | key_parts= param.key_parts; |
2475 | |
2476 | /* |
2477 | Make an array with description of all key parts of all table keys. |
2478 | This is used in get_mm_parts function. |
2479 | */ |
2480 | key_info= head->key_info; |
2481 | uint max_key_len= 0; |
2482 | for (idx=0 ; idx < head->s->keys ; idx++, key_info++) |
2483 | { |
2484 | KEY_PART_INFO *key_part_info; |
2485 | uint n_key_parts= head->actual_n_key_parts(key_info); |
2486 | |
2487 | if (!keys_to_use.is_set(idx)) |
2488 | continue; |
2489 | if (key_info->flags & HA_FULLTEXT) |
2490 | continue; // ToDo: ft-keys in non-ft ranges, if possible SerG |
2491 | |
2492 | param.key[param.keys]=key_parts; |
2493 | key_part_info= key_info->key_part; |
2494 | uint cur_key_len= 0; |
2495 | for (uint part= 0 ; part < n_key_parts ; |
2496 | part++, key_parts++, key_part_info++) |
2497 | { |
2498 | key_parts->key= param.keys; |
2499 | key_parts->part= part; |
2500 | key_parts->length= key_part_info->length; |
2501 | key_parts->store_length= key_part_info->store_length; |
2502 | cur_key_len += key_part_info->store_length; |
2503 | key_parts->field= key_part_info->field; |
2504 | key_parts->null_bit= key_part_info->null_bit; |
2505 | key_parts->image_type = |
2506 | (key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW; |
2507 | /* Only HA_PART_KEY_SEG is used */ |
2508 | key_parts->flag= (uint8) key_part_info->key_part_flag; |
2509 | } |
2510 | param.real_keynr[param.keys++]=idx; |
2511 | if (cur_key_len > max_key_len) |
2512 | max_key_len= cur_key_len; |
2513 | } |
2514 | param.key_parts_end=key_parts; |
2515 | param.alloced_sel_args= 0; |
2516 | |
2517 | max_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */ |
2518 | if (!(param.min_key= (uchar*)alloc_root(&alloc,max_key_len)) || |
2519 | !(param.max_key= (uchar*)alloc_root(&alloc,max_key_len))) |
2520 | { |
2521 | thd->no_errors=0; |
2522 | free_root(&alloc,MYF(0)); // Return memory & allocator |
2523 | DBUG_RETURN(0); // Can't use range |
2524 | } |
2525 | |
2526 | thd->mem_root= &alloc; |
2527 | /* Calculate cost of full index read for the shortest covering index */ |
2528 | if (!force_quick_range && !head->covering_keys.is_clear_all()) |
2529 | { |
2530 | int key_for_use= find_shortest_key(head, &head->covering_keys); |
2531 | double key_read_time= head->file->keyread_time(key_for_use, 1, records) + |
2532 | (double) records / TIME_FOR_COMPARE; |
2533 | DBUG_PRINT("info" , ("'all'+'using index' scan will be using key %d, " |
2534 | "read time %g" , key_for_use, key_read_time)); |
2535 | if (key_read_time < read_time) |
2536 | read_time= key_read_time; |
2537 | } |
2538 | |
2539 | TABLE_READ_PLAN *best_trp= NULL; |
2540 | TRP_GROUP_MIN_MAX *group_trp; |
2541 | double best_read_time= read_time; |
2542 | |
2543 | if (cond) |
2544 | { |
2545 | if ((tree= cond->get_mm_tree(¶m, &cond))) |
2546 | { |
2547 | if (tree->type == SEL_TREE::IMPOSSIBLE) |
2548 | { |
2549 | records=0L; /* Return -1 from this function. */ |
2550 | read_time= (double) HA_POS_ERROR; |
2551 | goto free_mem; |
2552 | } |
2553 | /* |
2554 | If the tree can't be used for range scans, proceed anyway, as we |
2555 | can construct a group-min-max quick select |
2556 | */ |
2557 | if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) |
2558 | tree= NULL; |
2559 | } |
2560 | } |
2561 | |
2562 | /* |
2563 | Try to construct a QUICK_GROUP_MIN_MAX_SELECT. |
2564 | Notice that it can be constructed no matter if there is a range tree. |
2565 | */ |
2566 | group_trp= get_best_group_min_max(¶m, tree, best_read_time); |
2567 | if (group_trp) |
2568 | { |
2569 | param.table->quick_condition_rows= MY_MIN(group_trp->records, |
2570 | head->stat_records()); |
2571 | if (group_trp->read_cost < best_read_time) |
2572 | { |
2573 | best_trp= group_trp; |
2574 | best_read_time= best_trp->read_cost; |
2575 | } |
2576 | } |
2577 | |
2578 | if (tree) |
2579 | { |
2580 | /* |
2581 | It is possible to use a range-based quick select (but it might be |
2582 | slower than 'all' table scan). |
2583 | */ |
2584 | TRP_RANGE *range_trp; |
2585 | TRP_ROR_INTERSECT *rori_trp; |
2586 | TRP_INDEX_INTERSECT *intersect_trp; |
2587 | bool can_build_covering= FALSE; |
2588 | |
2589 | remove_nonrange_trees(¶m, tree); |
2590 | |
2591 | /* Get best 'range' plan and prepare data for making other plans */ |
2592 | if ((range_trp= get_key_scans_params(¶m, tree, FALSE, TRUE, |
2593 | best_read_time))) |
2594 | { |
2595 | best_trp= range_trp; |
2596 | best_read_time= best_trp->read_cost; |
2597 | } |
2598 | |
2599 | /* |
2600 | Simultaneous key scans and row deletes on several handler |
2601 | objects are not allowed so don't use ROR-intersection for |
2602 | table deletes. |
2603 | */ |
2604 | if ((thd->lex->sql_command != SQLCOM_DELETE) && |
2605 | optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE)) |
2606 | { |
2607 | /* |
2608 | Get best non-covering ROR-intersection plan and prepare data for |
2609 | building covering ROR-intersection. |
2610 | */ |
2611 | if ((rori_trp= get_best_ror_intersect(¶m, tree, best_read_time, |
2612 | &can_build_covering))) |
2613 | { |
2614 | best_trp= rori_trp; |
2615 | best_read_time= best_trp->read_cost; |
2616 | /* |
2617 | Try constructing covering ROR-intersect only if it looks possible |
2618 | and worth doing. |
2619 | */ |
2620 | if (!rori_trp->is_covering && can_build_covering && |
2621 | (rori_trp= get_best_covering_ror_intersect(¶m, tree, |
2622 | best_read_time))) |
2623 | best_trp= rori_trp; |
2624 | } |
2625 | } |
2626 | /* |
2627 | Do not look for an index intersection plan if there is a covering |
2628 | index. The scan by this covering index will be always cheaper than |
2629 | any index intersection. |
2630 | */ |
2631 | if (param.table->covering_keys.is_clear_all() && |
2632 | optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) && |
2633 | optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT)) |
2634 | { |
2635 | if ((intersect_trp= get_best_index_intersect(¶m, tree, |
2636 | best_read_time))) |
2637 | { |
2638 | best_trp= intersect_trp; |
2639 | best_read_time= best_trp->read_cost; |
2640 | set_if_smaller(param.table->quick_condition_rows, |
2641 | intersect_trp->records); |
2642 | } |
2643 | } |
2644 | |
2645 | if (optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) && |
2646 | head->stat_records() != 0) |
2647 | { |
2648 | /* Try creating index_merge/ROR-union scan. */ |
2649 | SEL_IMERGE *imerge; |
2650 | TABLE_READ_PLAN *best_conj_trp= NULL, |
2651 | *UNINIT_VAR(new_conj_trp); /* no empty index_merge lists possible */ |
2652 | DBUG_PRINT("info" ,("No range reads possible," |
2653 | " trying to construct index_merge" )); |
2654 | List_iterator_fast<SEL_IMERGE> it(tree->merges); |
2655 | while ((imerge= it++)) |
2656 | { |
2657 | new_conj_trp= get_best_disjunct_quick(¶m, imerge, best_read_time); |
2658 | if (new_conj_trp) |
2659 | set_if_smaller(param.table->quick_condition_rows, |
2660 | new_conj_trp->records); |
2661 | if (new_conj_trp && |
2662 | (!best_conj_trp || |
2663 | new_conj_trp->read_cost < best_conj_trp->read_cost)) |
2664 | { |
2665 | best_conj_trp= new_conj_trp; |
2666 | best_read_time= best_conj_trp->read_cost; |
2667 | } |
2668 | } |
2669 | if (best_conj_trp) |
2670 | best_trp= best_conj_trp; |
2671 | } |
2672 | } |
2673 | |
2674 | thd->mem_root= param.old_root; |
2675 | |
2676 | /* If we got a read plan, create a quick select from it. */ |
2677 | if (best_trp) |
2678 | { |
2679 | records= best_trp->records; |
2680 | if (!(quick= best_trp->make_quick(¶m, TRUE)) || quick->init()) |
2681 | { |
2682 | delete quick; |
2683 | quick= NULL; |
2684 | } |
2685 | } |
2686 | possible_keys= param.possible_keys; |
2687 | |
2688 | free_mem: |
2689 | free_root(&alloc,MYF(0)); // Return memory & allocator |
2690 | thd->mem_root= param.old_root; |
2691 | thd->no_errors=0; |
2692 | } |
2693 | |
2694 | DBUG_EXECUTE("info" , print_quick(quick, &needed_reg);); |
2695 | |
2696 | /* |
2697 | Assume that if the user is using 'limit' we will only need to scan |
2698 | limit rows if we are using a key |
2699 | */ |
2700 | DBUG_RETURN(records ? MY_TEST(quick) : -1); |
2701 | } |
2702 | |
2703 | /**************************************************************************** |
2704 | * Condition selectivity module |
2705 | ****************************************************************************/ |
2706 | |
2707 | |
2708 | /* |
2709 | Build descriptors of pseudo-indexes over columns to perform range analysis |
2710 | |
2711 | SYNOPSIS |
2712 | create_key_parts_for_pseudo_indexes() |
2713 | param IN/OUT data structure for the descriptors to be built |
2714 | used_fields bitmap of columns for which the descriptors are to be built |
2715 | |
2716 | DESCRIPTION |
2717 | For each column marked in the bitmap used_fields the function builds |
2718 | a descriptor of a single-component pseudo-index over this column that |
2719 | can be used for the range analysis of the predicates over this columns. |
2720 | The descriptors are created in the memory of param->mem_root. |
2721 | |
2722 | RETURN |
2723 | FALSE in the case of success |
2724 | TRUE otherwise |
2725 | */ |
2726 | |
2727 | static |
2728 | bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, |
2729 | MY_BITMAP *used_fields) |
2730 | { |
2731 | Field **field_ptr; |
2732 | TABLE *table= param->table; |
2733 | uint parts= 0; |
2734 | |
2735 | for (field_ptr= table->field; *field_ptr; field_ptr++) |
2736 | { |
2737 | if (bitmap_is_set(used_fields, (*field_ptr)->field_index)) |
2738 | parts++; |
2739 | } |
2740 | |
2741 | KEY_PART *key_part; |
2742 | uint keys= 0; |
2743 | |
2744 | if (!(key_part= (KEY_PART *) alloc_root(param->mem_root, |
2745 | sizeof(KEY_PART) * parts))) |
2746 | return TRUE; |
2747 | |
2748 | param->key_parts= key_part; |
2749 | uint max_key_len= 0; |
2750 | for (field_ptr= table->field; *field_ptr; field_ptr++) |
2751 | { |
2752 | if (bitmap_is_set(used_fields, (*field_ptr)->field_index)) |
2753 | { |
2754 | Field *field= *field_ptr; |
2755 | uint16 store_length; |
2756 | uint16 max_key_part_length= (uint16) table->file->max_key_part_length(); |
2757 | key_part->key= keys; |
2758 | key_part->part= 0; |
2759 | if (field->flags & BLOB_FLAG) |
2760 | key_part->length= max_key_part_length; |
2761 | else |
2762 | { |
2763 | key_part->length= (uint16) field->key_length(); |
2764 | set_if_smaller(key_part->length, max_key_part_length); |
2765 | } |
2766 | store_length= key_part->length; |
2767 | if (field->real_maybe_null()) |
2768 | store_length+= HA_KEY_NULL_LENGTH; |
2769 | if (field->real_type() == MYSQL_TYPE_VARCHAR) |
2770 | store_length+= HA_KEY_BLOB_LENGTH; |
2771 | if (max_key_len < store_length) |
2772 | max_key_len= store_length; |
2773 | key_part->store_length= store_length; |
2774 | key_part->field= field; |
2775 | key_part->image_type= Field::itRAW; |
2776 | key_part->flag= 0; |
2777 | param->key[keys]= key_part; |
2778 | keys++; |
2779 | key_part++; |
2780 | } |
2781 | } |
2782 | |
2783 | max_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */ |
2784 | if (!(param->min_key= (uchar*)alloc_root(param->mem_root, max_key_len)) || |
2785 | !(param->max_key= (uchar*)alloc_root(param->mem_root, max_key_len))) |
2786 | { |
2787 | return true; |
2788 | } |
2789 | param->keys= keys; |
2790 | param->key_parts_end= key_part; |
2791 | |
2792 | return FALSE; |
2793 | } |
2794 | |
2795 | |
2796 | /* |
2797 | Estimate the number of rows in all ranges built for a column |
2798 | by the range optimizer |
2799 | |
2800 | SYNOPSIS |
2801 | records_in_column_ranges() |
2802 | param the data structure to access descriptors of pseudo indexes |
2803 | built over columns used in the condition of the processed query |
2804 | idx the index of the descriptor of interest in param |
2805 | tree the tree representing ranges built for the interesting column |
2806 | |
2807 | DESCRIPTION |
2808 | This function retrieves the ranges represented by the SEL_ARG 'tree' and |
2809 | for each of them r it calls the function get_column_range_cardinality() |
2810 | that estimates the number of expected rows in r. It is assumed that param |
2811 | is the data structure containing the descriptors of pseudo-indexes that |
2812 | has been built to perform range analysis of the range conditions imposed |
2813 | on the columns used in the processed query, while idx is the index of the |
2814 | descriptor created in 'param' exactly for the column for which 'tree' |
2815 | has been built by the range optimizer. |
2816 | |
2817 | RETURN |
2818 | the number of rows in the retrieved ranges |
2819 | */ |
2820 | |
2821 | static |
2822 | double records_in_column_ranges(PARAM *param, uint idx, |
2823 | SEL_ARG *tree) |
2824 | { |
2825 | SEL_ARG_RANGE_SEQ seq; |
2826 | KEY_MULTI_RANGE range; |
2827 | range_seq_t seq_it; |
2828 | double rows; |
2829 | Field *field; |
2830 | uint flags= 0; |
2831 | double total_rows= 0; |
2832 | RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init, |
2833 | sel_arg_range_seq_next, 0, 0}; |
2834 | |
2835 | /* Handle cases when we don't have a valid non-empty list of range */ |
2836 | if (!tree) |
2837 | return DBL_MAX; |
2838 | if (tree->type == SEL_ARG::IMPOSSIBLE) |
2839 | return (0L); |
2840 | |
2841 | field= tree->field; |
2842 | |
2843 | seq.keyno= idx; |
2844 | seq.real_keyno= MAX_KEY; |
2845 | seq.param= param; |
2846 | seq.start= tree; |
2847 | |
2848 | seq_it= seq_if.init((void *) &seq, 0, flags); |
2849 | |
2850 | while (!seq_if.next(seq_it, &range)) |
2851 | { |
2852 | key_range *min_endp, *max_endp; |
2853 | min_endp= range.start_key.length? &range.start_key : NULL; |
2854 | max_endp= range.end_key.length? &range.end_key : NULL; |
2855 | rows= get_column_range_cardinality(field, min_endp, max_endp, |
2856 | range.range_flag); |
2857 | if (DBL_MAX == rows) |
2858 | { |
2859 | total_rows= DBL_MAX; |
2860 | break; |
2861 | } |
2862 | total_rows += rows; |
2863 | } |
2864 | return total_rows; |
2865 | } |
2866 | |
2867 | |
2868 | /* |
2869 | Calculate the selectivity of the condition imposed on the rows of a table |
2870 | |
2871 | SYNOPSIS |
2872 | calculate_cond_selectivity_for_table() |
2873 | thd the context handle |
2874 | table the table of interest |
2875 | cond conditions imposed on the rows of the table |
2876 | |
2877 | DESCRIPTION |
2878 | This function calculates the selectivity of range conditions cond imposed |
2879 | on the rows of 'table' in the processed query. |
2880 | The calculated selectivity is assigned to the field table->cond_selectivity. |
2881 | |
2882 | Selectivity is calculated as a product of selectivities imposed by: |
2883 | |
2884 | 1. possible range accesses. (if multiple range accesses use the same |
2885 | restrictions on the same field, we make adjustments for that) |
2886 | 2. Sargable conditions on fields for which we have column statistics (if |
2887 | a field is used in a possible range access, we assume that selectivity |
2888 | is already provided by the range access' estimates) |
2889 | 3. Reading a few records from the table pages and checking the condition |
2890 | selectivity (this is used for conditions like "column LIKE '%val%'" |
2891 | where approaches #1 and #2 do not provide selectivity data). |
2892 | |
2893 | NOTE |
2894 | Currently the selectivities of range conditions over different columns are |
2895 | considered independent. |
2896 | |
2897 | RETURN |
2898 | FALSE on success |
2899 | TRUE otherwise |
2900 | */ |
2901 | |
2902 | bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) |
2903 | { |
2904 | uint keynr; |
2905 | uint max_quick_key_parts= 0; |
2906 | MY_BITMAP *used_fields= &table->cond_set; |
2907 | double table_records= (double)table->stat_records(); |
2908 | DBUG_ENTER("calculate_cond_selectivity_for_table" ); |
2909 | |
2910 | table->cond_selectivity= 1.0; |
2911 | |
2912 | if (!*cond || table_records == 0) |
2913 | DBUG_RETURN(FALSE); |
2914 | |
2915 | if (table->pos_in_table_list->schema_table) |
2916 | DBUG_RETURN(FALSE); |
2917 | |
2918 | MY_BITMAP handled_columns; |
2919 | my_bitmap_map* buf; |
2920 | if (!(buf= (my_bitmap_map*)thd->alloc(table->s->column_bitmap_size))) |
2921 | DBUG_RETURN(TRUE); |
2922 | my_bitmap_init(&handled_columns, buf, table->s->fields, FALSE); |
2923 | |
2924 | /* |
2925 | Calculate the selectivity of the range conditions supported by indexes. |
2926 | |
2927 | First, take into account possible range accesses. |
2928 | range access estimates are the most precise, we prefer them to any other |
2929 | estimate sources. |
2930 | */ |
2931 | |
2932 | for (keynr= 0; keynr < table->s->keys; keynr++) |
2933 | { |
2934 | if (table->quick_keys.is_set(keynr)) |
2935 | set_if_bigger(max_quick_key_parts, table->quick_key_parts[keynr]); |
2936 | } |
2937 | |
2938 | /* |
2939 | Walk through all indexes, indexes where range access uses more keyparts |
2940 | go first. |
2941 | */ |
2942 | for (uint quick_key_parts= max_quick_key_parts; |
2943 | quick_key_parts; quick_key_parts--) |
2944 | { |
2945 | for (keynr= 0; keynr < table->s->keys; keynr++) |
2946 | { |
2947 | if (table->quick_keys.is_set(keynr) && |
2948 | table->quick_key_parts[keynr] == quick_key_parts) |
2949 | { |
2950 | uint i; |
2951 | uint used_key_parts= table->quick_key_parts[keynr]; |
2952 | double quick_cond_selectivity= table->quick_rows[keynr] / |
2953 | table_records; |
2954 | KEY *key_info= table->key_info + keynr; |
2955 | KEY_PART_INFO* key_part= key_info->key_part; |
2956 | /* |
2957 | Suppose, there are range conditions on two keys |
2958 | KEY1 (col1, col2) |
2959 | KEY2 (col3, col2) |
2960 | |
2961 | we don't want to count selectivity of condition on col2 twice. |
2962 | |
2963 | First, find the longest key prefix that's made of columns whose |
2964 | selectivity wasn't already accounted for. |
2965 | */ |
2966 | for (i= 0; i < used_key_parts; i++, key_part++) |
2967 | { |
2968 | if (bitmap_is_set(&handled_columns, key_part->fieldnr-1)) |
2969 | break; |
2970 | bitmap_set_bit(&handled_columns, key_part->fieldnr-1); |
2971 | } |
2972 | if (i) |
2973 | { |
2974 | double UNINIT_VAR(selectivity_mult); |
2975 | |
2976 | /* |
2977 | There is at least 1-column prefix of columns whose selectivity has |
2978 | not yet been accounted for. |
2979 | */ |
2980 | table->cond_selectivity*= quick_cond_selectivity; |
2981 | if (i != used_key_parts) |
2982 | { |
2983 | /* |
2984 | Range access got us estimate for #used_key_parts. |
2985 | We need estimate for #(i-1) key parts. |
2986 | */ |
2987 | double f1= key_info->actual_rec_per_key(i-1); |
2988 | double f2= key_info->actual_rec_per_key(i); |
2989 | if (f1 > 0 && f2 > 0) |
2990 | selectivity_mult= f1 / f2; |
2991 | else |
2992 | { |
2993 | /* |
2994 | No statistics available, assume the selectivity is proportional |
2995 | to the number of key parts. |
2996 | (i=0 means 1 keypart, i=1 means 2 keyparts, so use i+1) |
2997 | */ |
2998 | selectivity_mult= ((double)(i+1)) / i; |
2999 | } |
3000 | table->cond_selectivity*= selectivity_mult; |
3001 | } |
3002 | /* |
3003 | We need to set selectivity for fields supported by indexes. |
3004 | For single-component indexes and for some first components |
3005 | of other indexes we do it here. For the remaining fields |
3006 | we do it later in this function, in the same way as for the |
3007 | fields not used in any indexes. |
3008 | */ |
3009 | if (i == 1) |
3010 | { |
3011 | uint fieldnr= key_info->key_part[0].fieldnr; |
3012 | table->field[fieldnr-1]->cond_selectivity= quick_cond_selectivity; |
3013 | if (i != used_key_parts) |
3014 | table->field[fieldnr-1]->cond_selectivity*= selectivity_mult; |
3015 | bitmap_clear_bit(used_fields, fieldnr-1); |
3016 | } |
3017 | } |
3018 | } |
3019 | } |
3020 | } |
3021 | |
3022 | /* |
3023 | Second step: calculate the selectivity of the range conditions not |
3024 | supported by any index and selectivity of the range condition |
3025 | over the fields whose selectivity has not been set yet. |
3026 | */ |
3027 | |
3028 | if (thd->variables.optimizer_use_condition_selectivity > 2 && |
3029 | !bitmap_is_clear_all(used_fields)) |
3030 | { |
3031 | PARAM param; |
3032 | MEM_ROOT alloc; |
3033 | SEL_TREE *tree; |
3034 | double rows; |
3035 | |
3036 | init_sql_alloc(&alloc, "calculate_cond_selectivity_for_table" , |
3037 | thd->variables.range_alloc_block_size, 0, |
3038 | MYF(MY_THREAD_SPECIFIC)); |
3039 | param.thd= thd; |
3040 | param.mem_root= &alloc; |
3041 | param.old_root= thd->mem_root; |
3042 | param.table= table; |
3043 | param.is_ror_scan= FALSE; |
3044 | param.remove_false_where_parts= true; |
3045 | |
3046 | if (create_key_parts_for_pseudo_indexes(¶m, used_fields)) |
3047 | goto free_alloc; |
3048 | |
3049 | param.prev_tables= param.read_tables= 0; |
3050 | param.current_table= table->map; |
3051 | param.using_real_indexes= FALSE; |
3052 | param.real_keynr[0]= 0; |
3053 | param.alloced_sel_args= 0; |
3054 | |
3055 | thd->no_errors=1; |
3056 | |
3057 | tree= cond[0]->get_mm_tree(¶m, cond); |
3058 | |
3059 | if (!tree) |
3060 | goto free_alloc; |
3061 | |
3062 | table->reginfo.impossible_range= 0; |
3063 | if (tree->type == SEL_TREE::IMPOSSIBLE) |
3064 | { |
3065 | rows= 0; |
3066 | table->reginfo.impossible_range= 1; |
3067 | goto free_alloc; |
3068 | } |
3069 | else if (tree->type == SEL_TREE::ALWAYS) |
3070 | { |
3071 | rows= table_records; |
3072 | goto free_alloc; |
3073 | } |
3074 | else if (tree->type == SEL_TREE::MAYBE) |
3075 | { |
3076 | rows= table_records; |
3077 | goto free_alloc; |
3078 | } |
3079 | |
3080 | for (uint idx= 0; idx < param.keys; idx++) |
3081 | { |
3082 | SEL_ARG *key= tree->keys[idx]; |
3083 | if (key) |
3084 | { |
3085 | if (key->type == SEL_ARG::IMPOSSIBLE) |
3086 | { |
3087 | rows= 0; |
3088 | table->reginfo.impossible_range= 1; |
3089 | goto free_alloc; |
3090 | } |
3091 | else |
3092 | { |
3093 | rows= records_in_column_ranges(¶m, idx, key); |
3094 | if (rows != DBL_MAX) |
3095 | key->field->cond_selectivity= rows/table_records; |
3096 | } |
3097 | } |
3098 | } |
3099 | |
3100 | for (Field **field_ptr= table->field; *field_ptr; field_ptr++) |
3101 | { |
3102 | Field *table_field= *field_ptr; |
3103 | if (bitmap_is_set(used_fields, table_field->field_index) && |
3104 | table_field->cond_selectivity < 1.0) |
3105 | { |
3106 | if (!bitmap_is_set(&handled_columns, table_field->field_index)) |
3107 | table->cond_selectivity*= table_field->cond_selectivity; |
3108 | } |
3109 | } |
3110 | |
3111 | free_alloc: |
3112 | thd->no_errors= 0; |
3113 | thd->mem_root= param.old_root; |
3114 | free_root(&alloc, MYF(0)); |
3115 | |
3116 | } |
3117 | |
3118 | bitmap_union(used_fields, &handled_columns); |
3119 | |
3120 | /* Check if we can improve selectivity estimates by using sampling */ |
3121 | ulong check_rows= |
3122 | MY_MIN(thd->variables.optimizer_selectivity_sampling_limit, |
3123 | (ulong) (table_records * SELECTIVITY_SAMPLING_SHARE)); |
3124 | if (*cond && check_rows > SELECTIVITY_SAMPLING_THRESHOLD && |
3125 | thd->variables.optimizer_use_condition_selectivity > 4) |
3126 | { |
3127 | find_selective_predicates_list_processor_data *dt= |
3128 | (find_selective_predicates_list_processor_data *) |
3129 | alloc_root(thd->mem_root, |
3130 | sizeof(find_selective_predicates_list_processor_data)); |
3131 | if (!dt) |
3132 | DBUG_RETURN(TRUE); |
3133 | dt->list.empty(); |
3134 | dt->table= table; |
3135 | if ((*cond)->walk(&Item::find_selective_predicates_list_processor, 0, dt)) |
3136 | DBUG_RETURN(TRUE); |
3137 | if (dt->list.elements > 0) |
3138 | { |
3139 | check_rows= check_selectivity(thd, check_rows, table, &dt->list); |
3140 | if (check_rows > SELECTIVITY_SAMPLING_THRESHOLD) |
3141 | { |
3142 | COND_STATISTIC *stat; |
3143 | List_iterator_fast<COND_STATISTIC> it(dt->list); |
3144 | double examined_rows= check_rows; |
3145 | while ((stat= it++)) |
3146 | { |
3147 | if (!stat->positive) |
3148 | { |
3149 | DBUG_PRINT("info" , ("To avoid 0 assigned 1 to the counter" )); |
3150 | stat->positive= 1; // avoid 0 |
3151 | } |
3152 | DBUG_PRINT("info" , ("The predicate selectivity : %g" , |
3153 | (double)stat->positive / examined_rows)); |
3154 | double selectivity= ((double)stat->positive) / examined_rows; |
3155 | table->cond_selectivity*= selectivity; |
3156 | /* |
3157 | If a field is involved then we register its selectivity in case |
3158 | there in an equality with the field. |
3159 | For example in case |
3160 | t1.a LIKE "%bla%" and t1.a = t2.b |
3161 | the selectivity we have found could be used also for t2. |
3162 | */ |
3163 | if (stat->field_arg) |
3164 | { |
3165 | stat->field_arg->cond_selectivity*= selectivity; |
3166 | |
3167 | if (stat->field_arg->next_equal_field) |
3168 | { |
3169 | for (Field *next_field= stat->field_arg->next_equal_field; |
3170 | next_field != stat->field_arg; |
3171 | next_field= next_field->next_equal_field) |
3172 | { |
3173 | next_field->cond_selectivity*= selectivity; |
3174 | next_field->table->cond_selectivity*= selectivity; |
3175 | } |
3176 | } |
3177 | } |
3178 | } |
3179 | |
3180 | } |
3181 | /* This list and its elements put to mem_root so should not be freed */ |
3182 | table->cond_selectivity_sampling_explain= &dt->list; |
3183 | } |
3184 | } |
3185 | |
3186 | DBUG_RETURN(FALSE); |
3187 | } |
3188 | |
3189 | /**************************************************************************** |
3190 | * Condition selectivity code ends |
3191 | ****************************************************************************/ |
3192 | |
3193 | /**************************************************************************** |
3194 | * Partition pruning module |
3195 | ****************************************************************************/ |
3196 | |
3197 | /* |
3198 | Store field key image to table record |
3199 | |
3200 | SYNOPSIS |
3201 | store_key_image_to_rec() |
3202 | field Field which key image should be stored |
3203 | ptr Field value in key format |
3204 | len Length of the value, in bytes |
3205 | |
3206 | ATTENTION |
3207 | len is the length of the value not counting the NULL-byte (at the same |
3208 | time, ptr points to the key image, which starts with NULL-byte for |
3209 | nullable columns) |
3210 | |
3211 | DESCRIPTION |
3212 | Copy the field value from its key image to the table record. The source |
3213 | is the value in key image format, occupying len bytes in buffer pointed |
3214 | by ptr. The destination is table record, in "field value in table record" |
3215 | format. |
3216 | */ |
3217 | |
3218 | void store_key_image_to_rec(Field *field, uchar *ptr, uint len) |
3219 | { |
3220 | /* Do the same as print_key() does */ |
3221 | my_bitmap_map *old_map; |
3222 | |
3223 | if (field->real_maybe_null()) |
3224 | { |
3225 | if (*ptr) |
3226 | { |
3227 | field->set_null(); |
3228 | return; |
3229 | } |
3230 | field->set_notnull(); |
3231 | ptr++; |
3232 | } |
3233 | old_map= dbug_tmp_use_all_columns(field->table, |
3234 | field->table->write_set); |
3235 | field->set_key_image(ptr, len); |
3236 | dbug_tmp_restore_column_map(field->table->write_set, old_map); |
3237 | } |
3238 | |
3239 | #ifdef WITH_PARTITION_STORAGE_ENGINE |
3240 | |
3241 | /* |
3242 | PartitionPruningModule |
3243 | |
3244 | This part of the code does partition pruning. Partition pruning solves the |
3245 | following problem: given a query over partitioned tables, find partitions |
3246 | that we will not need to access (i.e. partitions that we can assume to be |
3247 | empty) when executing the query. |
3248 | The set of partitions to prune doesn't depend on which query execution |
3249 | plan will be used to execute the query. |
3250 | |
3251 | HOW IT WORKS |
3252 | |
3253 | Partition pruning module makes use of RangeAnalysisModule. The following |
3254 | examples show how the problem of partition pruning can be reduced to the |
3255 | range analysis problem: |
3256 | |
3257 | EXAMPLE 1 |
3258 | Consider a query: |
3259 | |
3260 | SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z' |
3261 | |
3262 | where table t1 is partitioned using PARTITION BY RANGE(t1.a). An apparent |
3263 | way to find the used (i.e. not pruned away) partitions is as follows: |
3264 | |
3265 | 1. analyze the WHERE clause and extract the list of intervals over t1.a |
3266 | for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)} |
3267 | |
3268 | 2. for each interval I |
3269 | { |
3270 | find partitions that have non-empty intersection with I; |
3271 | mark them as used; |
3272 | } |
3273 | |
3274 | EXAMPLE 2 |
3275 | Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then |
3276 | we need to: |
3277 | |
3278 | 1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b). |
3279 | The list of intervals we'll obtain will look like this: |
3280 | ((t1.a, t1.b) = (1,'foo')), |
3281 | ((t1.a, t1.b) = (2,'bar')), |
3282 | ((t1,a, t1.b) > (10,'zz')) |
3283 | |
3284 | 2. for each interval I |
3285 | { |
3286 | if (the interval has form "(t1.a, t1.b) = (const1, const2)" ) |
3287 | { |
3288 | calculate HASH(part_func(t1.a, t1.b)); |
3289 | find which partition has records with this hash value and mark |
3290 | it as used; |
3291 | } |
3292 | else |
3293 | { |
3294 | mark all partitions as used; |
3295 | break; |
3296 | } |
3297 | } |
3298 | |
3299 | For both examples the step #1 is exactly what RangeAnalysisModule could |
3300 | be used to do, if it was provided with appropriate index description |
3301 | (array of KEY_PART structures). |
3302 | In example #1, we need to provide it with description of index(t1.a), |
3303 | in example #2, we need to provide it with description of index(t1.a, t1.b). |
3304 | |
3305 | These index descriptions are further called "partitioning index |
3306 | descriptions". Note that it doesn't matter if such indexes really exist, |
3307 | as range analysis module only uses the description. |
3308 | |
3309 | Putting it all together, partitioning module works as follows: |
3310 | |
3311 | prune_partitions() { |
3312 | call create_partition_index_description(); |
3313 | |
3314 | call get_mm_tree(); // invoke the RangeAnalysisModule |
3315 | |
3316 | // analyze the obtained interval list and get used partitions |
3317 | call find_used_partitions(); |
3318 | } |
3319 | |
3320 | */ |
3321 | |
3322 | struct st_part_prune_param; |
3323 | struct st_part_opt_info; |
3324 | |
3325 | typedef void (*mark_full_part_func)(partition_info*, uint32); |
3326 | |
3327 | /* |
3328 | Partition pruning operation context |
3329 | */ |
3330 | typedef struct st_part_prune_param |
3331 | { |
3332 | RANGE_OPT_PARAM range_param; /* Range analyzer parameters */ |
3333 | |
3334 | /*************************************************************** |
3335 | Following fields are filled in based solely on partitioning |
3336 | definition and not modified after that: |
3337 | **************************************************************/ |
3338 | partition_info *part_info; /* Copy of table->part_info */ |
3339 | /* Function to get partition id from partitioning fields only */ |
3340 | get_part_id_func get_top_partition_id_func; |
3341 | /* Function to mark a partition as used (w/all subpartitions if they exist)*/ |
3342 | mark_full_part_func mark_full_partition_used; |
3343 | |
3344 | /* Partitioning 'index' description, array of key parts */ |
3345 | KEY_PART *key; |
3346 | |
3347 | /* |
3348 | Number of fields in partitioning 'index' definition created for |
3349 | partitioning (0 if partitioning 'index' doesn't include partitioning |
3350 | fields) |
3351 | */ |
3352 | uint part_fields; |
3353 | uint subpart_fields; /* Same as above for subpartitioning */ |
3354 | |
3355 | /* |
3356 | Number of the last partitioning field keypart in the index, or -1 if |
3357 | partitioning index definition doesn't include partitioning fields. |
3358 | */ |
3359 | int last_part_partno; |
3360 | int last_subpart_partno; /* Same as above for supartitioning */ |
3361 | |
3362 | /* |
3363 | is_part_keypart[i] == MY_TEST(keypart #i in partitioning index is a member |
3364 | used in partitioning) |
3365 | Used to maintain current values of cur_part_fields and cur_subpart_fields |
3366 | */ |
3367 | my_bool *is_part_keypart; |
3368 | /* Same as above for subpartitioning */ |
3369 | my_bool *is_subpart_keypart; |
3370 | |
3371 | my_bool ignore_part_fields; /* Ignore rest of partioning fields */ |
3372 | |
3373 | /*************************************************************** |
3374 | Following fields form find_used_partitions() recursion context: |
3375 | **************************************************************/ |
3376 | SEL_ARG **arg_stack; /* "Stack" of SEL_ARGs */ |
3377 | SEL_ARG **arg_stack_end; /* Top of the stack */ |
3378 | /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */ |
3379 | uint cur_part_fields; |
3380 | /* Same as cur_part_fields, but for subpartitioning */ |
3381 | uint cur_subpart_fields; |
3382 | |
3383 | /* Iterator to be used to obtain the "current" set of used partitions */ |
3384 | PARTITION_ITERATOR part_iter; |
3385 | |
3386 | /* Initialized bitmap of num_subparts size */ |
3387 | MY_BITMAP subparts_bitmap; |
3388 | |
3389 | uchar *cur_min_key; |
3390 | uchar *cur_max_key; |
3391 | |
3392 | uint cur_min_flag, cur_max_flag; |
3393 | } PART_PRUNE_PARAM; |
3394 | |
3395 | static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par); |
3396 | static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree); |
3397 | static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, |
3398 | SEL_IMERGE *imerge); |
3399 | static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, |
3400 | List<SEL_IMERGE> &merges); |
3401 | static void mark_all_partitions_as_used(partition_info *part_info); |
3402 | |
3403 | #ifndef DBUG_OFF |
3404 | static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end); |
3405 | static void dbug_print_field(Field *field); |
3406 | static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part); |
3407 | static void dbug_print_singlepoint_range(SEL_ARG **start, uint num); |
3408 | #endif |
3409 | |
3410 | |
3411 | /** |
3412 | Perform partition pruning for a given table and condition. |
3413 | |
3414 | @param thd Thread handle |
3415 | @param table Table to perform partition pruning for |
3416 | @param pprune_cond Condition to use for partition pruning |
3417 | |
3418 | @note This function assumes that lock_partitions are setup when it |
3419 | is invoked. The function analyzes the condition, finds partitions that |
3420 | need to be used to retrieve the records that match the condition, and |
3421 | marks them as used by setting appropriate bit in part_info->read_partitions |
3422 | In the worst case all partitions are marked as used. If the table is not |
3423 | yet locked, it will also unset bits in part_info->lock_partitions that is |
3424 | not set in read_partitions. |
3425 | |
3426 | This function returns promptly if called for non-partitioned table. |
3427 | |
3428 | @return Operation status |
3429 | @retval true Failure |
3430 | @retval false Success |
3431 | */ |
3432 | |
3433 | bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) |
3434 | { |
3435 | bool retval= FALSE; |
3436 | partition_info *part_info = table->part_info; |
3437 | DBUG_ENTER("prune_partitions" ); |
3438 | |
3439 | if (!part_info) |
3440 | DBUG_RETURN(FALSE); /* not a partitioned table */ |
3441 | |
3442 | if (!pprune_cond) |
3443 | { |
3444 | mark_all_partitions_as_used(part_info); |
3445 | DBUG_RETURN(FALSE); |
3446 | } |
3447 | |
3448 | PART_PRUNE_PARAM prune_param; |
3449 | MEM_ROOT alloc; |
3450 | RANGE_OPT_PARAM *range_par= &prune_param.range_param; |
3451 | my_bitmap_map *old_sets[2]; |
3452 | |
3453 | prune_param.part_info= part_info; |
3454 | init_sql_alloc(&alloc, "prune_partitions" , |
3455 | thd->variables.range_alloc_block_size, 0, |
3456 | MYF(MY_THREAD_SPECIFIC)); |
3457 | range_par->mem_root= &alloc; |
3458 | range_par->old_root= thd->mem_root; |
3459 | |
3460 | if (create_partition_index_description(&prune_param)) |
3461 | { |
3462 | mark_all_partitions_as_used(part_info); |
3463 | free_root(&alloc,MYF(0)); // Return memory & allocator |
3464 | DBUG_RETURN(FALSE); |
3465 | } |
3466 | |
3467 | dbug_tmp_use_all_columns(table, old_sets, |
3468 | table->read_set, table->write_set); |
3469 | range_par->thd= thd; |
3470 | range_par->table= table; |
3471 | /* range_par->cond doesn't need initialization */ |
3472 | range_par->prev_tables= range_par->read_tables= 0; |
3473 | range_par->current_table= table->map; |
3474 | /* It should be possible to switch the following ON: */ |
3475 | range_par->remove_false_where_parts= false; |
3476 | |
3477 | range_par->keys= 1; // one index |
3478 | range_par->using_real_indexes= FALSE; |
3479 | range_par->remove_jump_scans= FALSE; |
3480 | range_par->real_keynr[0]= 0; |
3481 | range_par->alloced_sel_args= 0; |
3482 | |
3483 | thd->no_errors=1; // Don't warn about NULL |
3484 | thd->mem_root=&alloc; |
3485 | |
3486 | bitmap_clear_all(&part_info->read_partitions); |
3487 | |
3488 | prune_param.key= prune_param.range_param.key_parts; |
3489 | SEL_TREE *tree; |
3490 | int res; |
3491 | |
3492 | tree= pprune_cond->get_mm_tree(range_par, &pprune_cond); |
3493 | if (!tree) |
3494 | goto all_used; |
3495 | |
3496 | if (tree->type == SEL_TREE::IMPOSSIBLE) |
3497 | { |
3498 | retval= TRUE; |
3499 | goto end; |
3500 | } |
3501 | |
3502 | if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) |
3503 | goto all_used; |
3504 | |
3505 | if (tree->merges.is_empty()) |
3506 | { |
3507 | /* Range analysis has produced a single list of intervals. */ |
3508 | prune_param.arg_stack_end= prune_param.arg_stack; |
3509 | prune_param.cur_part_fields= 0; |
3510 | prune_param.cur_subpart_fields= 0; |
3511 | |
3512 | prune_param.cur_min_key= prune_param.range_param.min_key; |
3513 | prune_param.cur_max_key= prune_param.range_param.max_key; |
3514 | prune_param.cur_min_flag= prune_param.cur_max_flag= 0; |
3515 | |
3516 | init_all_partitions_iterator(part_info, &prune_param.part_iter); |
3517 | if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param, |
3518 | tree->keys[0])))) |
3519 | goto all_used; |
3520 | } |
3521 | else |
3522 | { |
3523 | if (tree->merges.elements == 1) |
3524 | { |
3525 | /* |
3526 | Range analysis has produced a "merge" of several intervals lists, a |
3527 | SEL_TREE that represents an expression in form |
3528 | sel_imerge = (tree1 OR tree2 OR ... OR treeN) |
3529 | that cannot be reduced to one tree. This can only happen when |
3530 | partitioning index has several keyparts and the condition is OR of |
3531 | conditions that refer to different key parts. For example, we'll get |
3532 | here for "partitioning_field=const1 OR subpartitioning_field=const2" |
3533 | */ |
3534 | if (-1 == (res= find_used_partitions_imerge(&prune_param, |
3535 | tree->merges.head()))) |
3536 | goto all_used; |
3537 | } |
3538 | else |
3539 | { |
3540 | /* |
3541 | Range analysis has produced a list of several imerges, i.e. a |
3542 | structure that represents a condition in form |
3543 | imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN) |
3544 | This is produced for complicated WHERE clauses that range analyzer |
3545 | can't really analyze properly. |
3546 | */ |
3547 | if (-1 == (res= find_used_partitions_imerge_list(&prune_param, |
3548 | tree->merges))) |
3549 | goto all_used; |
3550 | } |
3551 | } |
3552 | |
3553 | /* |
3554 | res == 0 => no used partitions => retval=TRUE |
3555 | res == 1 => some used partitions => retval=FALSE |
3556 | res == -1 - we jump over this line to all_used: |
3557 | */ |
3558 | retval= MY_TEST(!res); |
3559 | goto end; |
3560 | |
3561 | all_used: |
3562 | retval= FALSE; // some partitions are used |
3563 | mark_all_partitions_as_used(prune_param.part_info); |
3564 | end: |
3565 | dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets); |
3566 | thd->no_errors=0; |
3567 | thd->mem_root= range_par->old_root; |
3568 | free_root(&alloc,MYF(0)); // Return memory & allocator |
3569 | /* |
3570 | Must be a subset of the locked partitions. |
3571 | lock_partitions contains the partitions marked by explicit partition |
3572 | selection (... t PARTITION (pX) ...) and we must only use partitions |
3573 | within that set. |
3574 | */ |
3575 | bitmap_intersect(&prune_param.part_info->read_partitions, |
3576 | &prune_param.part_info->lock_partitions); |
3577 | /* |
3578 | If not yet locked, also prune partitions to lock if not UPDATEing |
3579 | partition key fields. This will also prune lock_partitions if we are under |
3580 | LOCK TABLES, so prune away calls to start_stmt(). |
3581 | TODO: enhance this prune locking to also allow pruning of |
3582 | 'UPDATE t SET part_key = const WHERE cond_is_prunable' so it adds |
3583 | a lock for part_key partition. |
3584 | */ |
3585 | if (table->file->get_lock_type() == F_UNLCK && |
3586 | !partition_key_modified(table, table->write_set)) |
3587 | { |
3588 | bitmap_copy(&prune_param.part_info->lock_partitions, |
3589 | &prune_param.part_info->read_partitions); |
3590 | } |
3591 | if (bitmap_is_clear_all(&(prune_param.part_info->read_partitions))) |
3592 | { |
3593 | table->all_partitions_pruned_away= true; |
3594 | retval= TRUE; |
3595 | } |
3596 | DBUG_RETURN(retval); |
3597 | } |
3598 | |
3599 | |
3600 | /* |
3601 | For SEL_ARG* array, store sel_arg->min values into table record buffer |
3602 | |
3603 | SYNOPSIS |
3604 | store_selargs_to_rec() |
3605 | ppar Partition pruning context |
3606 | start Array of SEL_ARG* for which the minimum values should be stored |
3607 | num Number of elements in the array |
3608 | |
3609 | DESCRIPTION |
3610 | For each SEL_ARG* interval in the specified array, store the left edge |
3611 | field value (sel_arg->min, key image format) into the table record. |
3612 | */ |
3613 | |
3614 | static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start, |
3615 | int num) |
3616 | { |
3617 | KEY_PART *parts= ppar->range_param.key_parts; |
3618 | for (SEL_ARG **end= start + num; start != end; start++) |
3619 | { |
3620 | SEL_ARG *sel_arg= (*start); |
3621 | store_key_image_to_rec(sel_arg->field, sel_arg->min_value, |
3622 | parts[sel_arg->part].length); |
3623 | } |
3624 | } |
3625 | |
3626 | |
3627 | /* Mark a partition as used in the case when there are no subpartitions */ |
3628 | static void mark_full_partition_used_no_parts(partition_info* part_info, |
3629 | uint32 part_id) |
3630 | { |
3631 | DBUG_ENTER("mark_full_partition_used_no_parts" ); |
3632 | DBUG_PRINT("enter" , ("Mark partition %u as used" , part_id)); |
3633 | bitmap_set_bit(&part_info->read_partitions, part_id); |
3634 | DBUG_VOID_RETURN; |
3635 | } |
3636 | |
3637 | |
3638 | /* Mark a partition as used in the case when there are subpartitions */ |
3639 | static void mark_full_partition_used_with_parts(partition_info *part_info, |
3640 | uint32 part_id) |
3641 | { |
3642 | uint32 start= part_id * part_info->num_subparts; |
3643 | uint32 end= start + part_info->num_subparts; |
3644 | DBUG_ENTER("mark_full_partition_used_with_parts" ); |
3645 | |
3646 | for (; start != end; start++) |
3647 | { |
3648 | DBUG_PRINT("info" , ("1:Mark subpartition %u as used" , start)); |
3649 | bitmap_set_bit(&part_info->read_partitions, start); |
3650 | } |
3651 | DBUG_VOID_RETURN; |
3652 | } |
3653 | |
3654 | /* |
3655 | Find the set of used partitions for List<SEL_IMERGE> |
3656 | SYNOPSIS |
3657 | find_used_partitions_imerge_list |
3658 | ppar Partition pruning context. |
3659 | key_tree Intervals tree to perform pruning for. |
3660 | |
3661 | DESCRIPTION |
3662 | List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...". |
3663 | The set of used partitions is an intersection of used partitions sets |
3664 | for imerge_{i}. |
3665 | We accumulate this intersection in a separate bitmap. |
3666 | |
3667 | RETURN |
3668 | See find_used_partitions() |
3669 | */ |
3670 | |
3671 | static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar, |
3672 | List<SEL_IMERGE> &merges) |
3673 | { |
3674 | MY_BITMAP all_merges; |
3675 | uint bitmap_bytes; |
3676 | my_bitmap_map *bitmap_buf; |
3677 | uint n_bits= ppar->part_info->read_partitions.n_bits; |
3678 | bitmap_bytes= bitmap_buffer_size(n_bits); |
3679 | if (!(bitmap_buf= (my_bitmap_map*) alloc_root(ppar->range_param.mem_root, |
3680 | bitmap_bytes))) |
3681 | { |
3682 | /* |
3683 | Fallback, process just the first SEL_IMERGE. This can leave us with more |
3684 | partitions marked as used then actually needed. |
3685 | */ |
3686 | return find_used_partitions_imerge(ppar, merges.head()); |
3687 | } |
3688 | my_bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE); |
3689 | bitmap_set_prefix(&all_merges, n_bits); |
3690 | |
3691 | List_iterator<SEL_IMERGE> it(merges); |
3692 | SEL_IMERGE *imerge; |
3693 | while ((imerge=it++)) |
3694 | { |
3695 | int res= find_used_partitions_imerge(ppar, imerge); |
3696 | if (!res) |
3697 | { |
3698 | /* no used partitions on one ANDed imerge => no used partitions at all */ |
3699 | return 0; |
3700 | } |
3701 | |
3702 | if (res != -1) |
3703 | bitmap_intersect(&all_merges, &ppar->part_info->read_partitions); |
3704 | |
3705 | |
3706 | if (bitmap_is_clear_all(&all_merges)) |
3707 | return 0; |
3708 | |
3709 | bitmap_clear_all(&ppar->part_info->read_partitions); |
3710 | } |
3711 | memcpy(ppar->part_info->read_partitions.bitmap, all_merges.bitmap, |
3712 | bitmap_bytes); |
3713 | return 1; |
3714 | } |
3715 | |
3716 | |
3717 | /* |
3718 | Find the set of used partitions for SEL_IMERGE structure |
3719 | SYNOPSIS |
3720 | find_used_partitions_imerge() |
3721 | ppar Partition pruning context. |
3722 | key_tree Intervals tree to perform pruning for. |
3723 | |
3724 | DESCRIPTION |
3725 | SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is |
3726 | trivial - just use mark used partitions for each tree and bail out early |
3727 | if for some tree_{i} all partitions are used. |
3728 | |
3729 | RETURN |
3730 | See find_used_partitions(). |
3731 | */ |
3732 | |
3733 | static |
3734 | int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge) |
3735 | { |
3736 | int res= 0; |
3737 | for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++) |
3738 | { |
3739 | ppar->arg_stack_end= ppar->arg_stack; |
3740 | ppar->cur_part_fields= 0; |
3741 | ppar->cur_subpart_fields= 0; |
3742 | |
3743 | ppar->cur_min_key= ppar->range_param.min_key; |
3744 | ppar->cur_max_key= ppar->range_param.max_key; |
3745 | ppar->cur_min_flag= ppar->cur_max_flag= 0; |
3746 | |
3747 | init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); |
3748 | SEL_ARG *key_tree= (*ptree)->keys[0]; |
3749 | if (!key_tree || (-1 == (res |= find_used_partitions(ppar, key_tree)))) |
3750 | return -1; |
3751 | } |
3752 | return res; |
3753 | } |
3754 | |
3755 | |
3756 | /* |
3757 | Collect partitioning ranges for the SEL_ARG tree and mark partitions as used |
3758 | |
3759 | SYNOPSIS |
3760 | find_used_partitions() |
3761 | ppar Partition pruning context. |
3762 | key_tree SEL_ARG range tree to perform pruning for |
3763 | |
3764 | DESCRIPTION |
3765 | This function |
3766 | * recursively walks the SEL_ARG* tree collecting partitioning "intervals" |
3767 | * finds the partitions one needs to use to get rows in these intervals |
3768 | * marks these partitions as used. |
3769 | The next session desribes the process in greater detail. |
3770 | |
3771 | IMPLEMENTATION |
3772 | TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR |
3773 | We can find out which [sub]partitions to use if we obtain restrictions on |
3774 | [sub]partitioning fields in the following form: |
3775 | 1. "partition_field1=const1 AND ... AND partition_fieldN=constN" |
3776 | 1.1 Same as (1) but for subpartition fields |
3777 | |
3778 | If partitioning supports interval analysis (i.e. partitioning is a |
3779 | function of a single table field, and partition_info:: |
3780 | get_part_iter_for_interval != NULL), then we can also use condition in |
3781 | this form: |
3782 | 2. "const1 <=? partition_field <=? const2" |
3783 | 2.1 Same as (2) but for subpartition_field |
3784 | |
3785 | INFERRING THE RESTRICTIONS FROM SEL_ARG TREE |
3786 | |
3787 | The below is an example of what SEL_ARG tree may represent: |
3788 | |
3789 | (start) |
3790 | | $ |
3791 | | Partitioning keyparts $ subpartitioning keyparts |
3792 | | $ |
3793 | | ... ... $ |
3794 | | | | $ |
3795 | | +---------+ +---------+ $ +-----------+ +-----------+ |
3796 | \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5| |
3797 | +---------+ +---------+ $ +-----------+ +-----------+ |
3798 | | $ | | |
3799 | | $ | +-----------+ |
3800 | | $ | | subpar2=c6| |
3801 | | $ | +-----------+ |
3802 | | $ | |
3803 | | $ +-----------+ +-----------+ |
3804 | | $ | subpar1=c4|--| subpar2=c8| |
3805 | | $ +-----------+ +-----------+ |
3806 | | $ |
3807 | | $ |
3808 | +---------+ $ +------------+ +------------+ |
3809 | | par1=c2 |------------------| subpar1=c10|--| subpar2=c12| |
3810 | +---------+ $ +------------+ +------------+ |
3811 | | $ |
3812 | ... $ |
3813 | |
3814 | The up-down connections are connections via SEL_ARG::left and |
3815 | SEL_ARG::right. A horizontal connection to the right is the |
3816 | SEL_ARG::next_key_part connection. |
3817 | |
3818 | find_used_partitions() traverses the entire tree via recursion on |
3819 | * SEL_ARG::next_key_part (from left to right on the picture) |
3820 | * SEL_ARG::left|right (up/down on the pic). Left-right recursion is |
3821 | performed for each depth level. |
3822 | |
3823 | Recursion descent on SEL_ARG::next_key_part is used to accumulate (in |
3824 | ppar->arg_stack) constraints on partitioning and subpartitioning fields. |
3825 | For the example in the above picture, one of stack states is: |
3826 | in find_used_partitions(key_tree = "subpar2=c5") (***) |
3827 | in find_used_partitions(key_tree = "subpar1=c3") |
3828 | in find_used_partitions(key_tree = "par2=c2") (**) |
3829 | in find_used_partitions(key_tree = "par1=c1") |
3830 | in prune_partitions(...) |
3831 | We apply partitioning limits as soon as possible, e.g. when we reach the |
3832 | depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2", |
3833 | and save them in ppar->part_iter. |
3834 | When we reach the depth (***), we find which subpartition(s) correspond to |
3835 | "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in |
3836 | appropriate subpartitions as used. |
3837 | |
3838 | It is possible that constraints on some partitioning fields are missing. |
3839 | For the above example, consider this stack state: |
3840 | in find_used_partitions(key_tree = "subpar2=c12") (***) |
3841 | in find_used_partitions(key_tree = "subpar1=c10") |
3842 | in find_used_partitions(key_tree = "par1=c2") |
3843 | in prune_partitions(...) |
3844 | Here we don't have constraints for all partitioning fields. Since we've |
3845 | never set the ppar->part_iter to contain used set of partitions, we use |
3846 | its default "all partitions" value. We get subpartition id for |
3847 | "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every |
3848 | partition. |
3849 | |
3850 | The inverse is also possible: we may get constraints on partitioning |
3851 | fields, but not constraints on subpartitioning fields. In that case, |
3852 | calls to find_used_partitions() with depth below (**) will return -1, |
3853 | and we will mark entire partition as used. |
3854 | |
3855 | TODO |
3856 | Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop |
3857 | |
3858 | RETURN |
3859 | 1 OK, one or more [sub]partitions are marked as used. |
3860 | 0 The passed condition doesn't match any partitions |
3861 | -1 Couldn't infer any partition pruning "intervals" from the passed |
3862 | SEL_ARG* tree (which means that all partitions should be marked as |
3863 | used) Marking partitions as used is the responsibility of the caller. |
3864 | */ |
3865 | |
3866 | static |
3867 | int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) |
3868 | { |
3869 | int res, left_res=0, right_res=0; |
3870 | int key_tree_part= (int)key_tree->part; |
3871 | bool set_full_part_if_bad_ret= FALSE; |
3872 | bool ignore_part_fields= ppar->ignore_part_fields; |
3873 | bool did_set_ignore_part_fields= FALSE; |
3874 | RANGE_OPT_PARAM *range_par= &(ppar->range_param); |
3875 | |
3876 | if (check_stack_overrun(range_par->thd, 3*STACK_MIN_SIZE, NULL)) |
3877 | return -1; |
3878 | |
3879 | if (key_tree->left != &null_element) |
3880 | { |
3881 | if (-1 == (left_res= find_used_partitions(ppar,key_tree->left))) |
3882 | return -1; |
3883 | } |
3884 | |
3885 | /* Push SEL_ARG's to stack to enable looking backwards as well */ |
3886 | ppar->cur_part_fields+= ppar->is_part_keypart[key_tree_part]; |
3887 | ppar->cur_subpart_fields+= ppar->is_subpart_keypart[key_tree_part]; |
3888 | *(ppar->arg_stack_end++)= key_tree; |
3889 | |
3890 | if (ignore_part_fields) |
3891 | { |
3892 | /* |
3893 | We come here when a condition on the first partitioning |
3894 | fields led to evaluating the partitioning condition |
3895 | (due to finding a condition of the type a < const or |
3896 | b > const). Thus we must ignore the rest of the |
3897 | partitioning fields but we still want to analyse the |
3898 | subpartitioning fields. |
3899 | */ |
3900 | if (key_tree->next_key_part) |
3901 | res= find_used_partitions(ppar, key_tree->next_key_part); |
3902 | else |
3903 | res= -1; |
3904 | goto pop_and_go_right; |
3905 | } |
3906 | |
3907 | if (key_tree->type == SEL_ARG::KEY_RANGE) |
3908 | { |
3909 | if (ppar->part_info->get_part_iter_for_interval && |
3910 | key_tree->part <= ppar->last_part_partno) |
3911 | { |
3912 | /* Collect left and right bound, their lengths and flags */ |
3913 | uchar *min_key= ppar->cur_min_key; |
3914 | uchar *max_key= ppar->cur_max_key; |
3915 | uchar *tmp_min_key= min_key; |
3916 | uchar *tmp_max_key= max_key; |
3917 | key_tree->store_min(ppar->key[key_tree->part].store_length, |
3918 | &tmp_min_key, ppar->cur_min_flag); |
3919 | key_tree->store_max(ppar->key[key_tree->part].store_length, |
3920 | &tmp_max_key, ppar->cur_max_flag); |
3921 | uint flag; |
3922 | if (key_tree->next_key_part && |
3923 | key_tree->next_key_part->part == key_tree->part+1 && |
3924 | key_tree->next_key_part->part <= ppar->last_part_partno && |
3925 | key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) |
3926 | { |
3927 | /* |
3928 | There are more key parts for partition pruning to handle |
3929 | This mainly happens when the condition is an equality |
3930 | condition. |
3931 | */ |
3932 | if ((tmp_min_key - min_key) == (tmp_max_key - max_key) && |
3933 | (memcmp(min_key, max_key, (uint)(tmp_max_key - max_key)) == 0) && |
3934 | !key_tree->min_flag && !key_tree->max_flag) |
3935 | { |
3936 | /* Set 'parameters' */ |
3937 | ppar->cur_min_key= tmp_min_key; |
3938 | ppar->cur_max_key= tmp_max_key; |
3939 | uint save_min_flag= ppar->cur_min_flag; |
3940 | uint save_max_flag= ppar->cur_max_flag; |
3941 | |
3942 | ppar->cur_min_flag|= key_tree->min_flag; |
3943 | ppar->cur_max_flag|= key_tree->max_flag; |
3944 | |
3945 | res= find_used_partitions(ppar, key_tree->next_key_part); |
3946 | |
3947 | /* Restore 'parameters' back */ |
3948 | ppar->cur_min_key= min_key; |
3949 | ppar->cur_max_key= max_key; |
3950 | |
3951 | ppar->cur_min_flag= save_min_flag; |
3952 | ppar->cur_max_flag= save_max_flag; |
3953 | goto pop_and_go_right; |
3954 | } |
3955 | /* We have arrived at the last field in the partition pruning */ |
3956 | uint tmp_min_flag= key_tree->min_flag, |
3957 | tmp_max_flag= key_tree->max_flag; |
3958 | if (!tmp_min_flag) |
3959 | key_tree->next_key_part->store_min_key(ppar->key, |
3960 | &tmp_min_key, |
3961 | &tmp_min_flag, |
3962 | ppar->last_part_partno); |
3963 | if (!tmp_max_flag) |
3964 | key_tree->next_key_part->store_max_key(ppar->key, |
3965 | &tmp_max_key, |
3966 | &tmp_max_flag, |
3967 | ppar->last_part_partno); |
3968 | flag= tmp_min_flag | tmp_max_flag; |
3969 | } |
3970 | else |
3971 | flag= key_tree->min_flag | key_tree->max_flag; |
3972 | |
3973 | if (tmp_min_key != range_par->min_key) |
3974 | flag&= ~NO_MIN_RANGE; |
3975 | else |
3976 | flag|= NO_MIN_RANGE; |
3977 | if (tmp_max_key != range_par->max_key) |
3978 | flag&= ~NO_MAX_RANGE; |
3979 | else |
3980 | flag|= NO_MAX_RANGE; |
3981 | |
3982 | /* |
3983 | We need to call the interval mapper if we have a condition which |
3984 | makes sense to prune on. In the example of COLUMNS on a and |
3985 | b it makes sense if we have a condition on a, or conditions on |
3986 | both a and b. If we only have conditions on b it might make sense |
3987 | but this is a harder case we will solve later. For the harder case |
3988 | this clause then turns into use of all partitions and thus we |
3989 | simply set res= -1 as if the mapper had returned that. |
3990 | TODO: What to do here is defined in WL#4065. |
3991 | */ |
3992 | if (ppar->arg_stack[0]->part == 0 || ppar->part_info->part_type == VERSIONING_PARTITION) |
3993 | { |
3994 | uint32 i; |
3995 | uint32 store_length_array[MAX_KEY]; |
3996 | uint32 num_keys= ppar->part_fields; |
3997 | |
3998 | for (i= 0; i < num_keys; i++) |
3999 | store_length_array[i]= ppar->key[i].store_length; |
4000 | res= ppar->part_info-> |
4001 | get_part_iter_for_interval(ppar->part_info, |
4002 | FALSE, |
4003 | store_length_array, |
4004 | range_par->min_key, |
4005 | range_par->max_key, |
4006 | (uint)(tmp_min_key - range_par->min_key), |
4007 | (uint)(tmp_max_key - range_par->max_key), |
4008 | flag, |
4009 | &ppar->part_iter); |
4010 | if (!res) |
4011 | goto pop_and_go_right; /* res==0 --> no satisfying partitions */ |
4012 | } |
4013 | else |
4014 | res= -1; |
4015 | |
4016 | if (res == -1) |
4017 | { |
4018 | /* get a full range iterator */ |
4019 | init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); |
4020 | } |
4021 | /* |
4022 | Save our intent to mark full partition as used if we will not be able |
4023 | to obtain further limits on subpartitions |
4024 | */ |
4025 | if (key_tree_part < ppar->last_part_partno) |
4026 | { |
4027 | /* |
4028 | We need to ignore the rest of the partitioning fields in all |
4029 | evaluations after this |
4030 | */ |
4031 | did_set_ignore_part_fields= TRUE; |
4032 | ppar->ignore_part_fields= TRUE; |
4033 | } |
4034 | set_full_part_if_bad_ret= TRUE; |
4035 | goto process_next_key_part; |
4036 | } |
4037 | |
4038 | if (key_tree_part == ppar->last_subpart_partno && |
4039 | (NULL != ppar->part_info->get_subpart_iter_for_interval)) |
4040 | { |
4041 | PARTITION_ITERATOR subpart_iter; |
4042 | DBUG_EXECUTE("info" , dbug_print_segment_range(key_tree, |
4043 | range_par->key_parts);); |
4044 | res= ppar->part_info-> |
4045 | get_subpart_iter_for_interval(ppar->part_info, |
4046 | TRUE, |
4047 | NULL, /* Currently not used here */ |
4048 | key_tree->min_value, |
4049 | key_tree->max_value, |
4050 | 0, 0, /* Those are ignored here */ |
4051 | key_tree->min_flag | |
4052 | key_tree->max_flag, |
4053 | &subpart_iter); |
4054 | if (res == 0) |
4055 | { |
4056 | /* |
4057 | The only case where we can get "no satisfying subpartitions" |
4058 | returned from the above call is when an error has occurred. |
4059 | */ |
4060 | DBUG_ASSERT(range_par->thd->is_error()); |
4061 | return 0; |
4062 | } |
4063 | |
4064 | if (res == -1) |
4065 | goto pop_and_go_right; /* all subpartitions satisfy */ |
4066 | |
4067 | uint32 subpart_id; |
4068 | bitmap_clear_all(&ppar->subparts_bitmap); |
4069 | while ((subpart_id= subpart_iter.get_next(&subpart_iter)) != |
4070 | NOT_A_PARTITION_ID) |
4071 | bitmap_set_bit(&ppar->subparts_bitmap, subpart_id); |
4072 | |
4073 | /* Mark each partition as used in each subpartition. */ |
4074 | uint32 part_id; |
4075 | while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != |
4076 | NOT_A_PARTITION_ID) |
4077 | { |
4078 | for (uint i= 0; i < ppar->part_info->num_subparts; i++) |
4079 | if (bitmap_is_set(&ppar->subparts_bitmap, i)) |
4080 | bitmap_set_bit(&ppar->part_info->read_partitions, |
4081 | part_id * ppar->part_info->num_subparts + i); |
4082 | } |
4083 | goto pop_and_go_right; |
4084 | } |
4085 | |
4086 | if (key_tree->is_singlepoint()) |
4087 | { |
4088 | if (key_tree_part == ppar->last_part_partno && |
4089 | ppar->cur_part_fields == ppar->part_fields && |
4090 | ppar->part_info->get_part_iter_for_interval == NULL) |
4091 | { |
4092 | /* |
4093 | Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning |
4094 | fields. Save all constN constants into table record buffer. |
4095 | */ |
4096 | store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields); |
4097 | DBUG_EXECUTE("info" , dbug_print_singlepoint_range(ppar->arg_stack, |
4098 | ppar->part_fields);); |
4099 | uint32 part_id; |
4100 | longlong func_value; |
4101 | /* Find in which partition the {const1, ...,constN} tuple goes */ |
4102 | if (ppar->get_top_partition_id_func(ppar->part_info, &part_id, |
4103 | &func_value)) |
4104 | { |
4105 | res= 0; /* No satisfying partitions */ |
4106 | goto pop_and_go_right; |
4107 | } |
4108 | /* Rembember the limit we got - single partition #part_id */ |
4109 | init_single_partition_iterator(part_id, &ppar->part_iter); |
4110 | |
4111 | /* |
4112 | If there are no subpartitions/we fail to get any limit for them, |
4113 | then we'll mark full partition as used. |
4114 | */ |
4115 | set_full_part_if_bad_ret= TRUE; |
4116 | goto process_next_key_part; |
4117 | } |
4118 | |
4119 | if (key_tree_part == ppar->last_subpart_partno && |
4120 | ppar->cur_subpart_fields == ppar->subpart_fields) |
4121 | { |
4122 | /* |
4123 | Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning |
4124 | fields. Save all constN constants into table record buffer. |
4125 | */ |
4126 | store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields, |
4127 | ppar->subpart_fields); |
4128 | DBUG_EXECUTE("info" , dbug_print_singlepoint_range(ppar->arg_stack_end- |
4129 | ppar->subpart_fields, |
4130 | ppar->subpart_fields);); |
4131 | /* Find the subpartition (it's HASH/KEY so we always have one) */ |
4132 | partition_info *part_info= ppar->part_info; |
4133 | uint32 part_id, subpart_id; |
4134 | |
4135 | if (part_info->get_subpartition_id(part_info, &subpart_id)) |
4136 | return 0; |
4137 | |
4138 | /* Mark this partition as used in each subpartition. */ |
4139 | while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != |
4140 | NOT_A_PARTITION_ID) |
4141 | { |
4142 | bitmap_set_bit(&part_info->read_partitions, |
4143 | part_id * part_info->num_subparts + subpart_id); |
4144 | } |
4145 | res= 1; /* Some partitions were marked as used */ |
4146 | goto pop_and_go_right; |
4147 | } |
4148 | } |
4149 | else |
4150 | { |
4151 | /* |
4152 | Can't handle condition on current key part. If we're that deep that |
4153 | we're processing subpartititoning's key parts, this means we'll not be |
4154 | able to infer any suitable condition, so bail out. |
4155 | */ |
4156 | if (key_tree_part >= ppar->last_part_partno) |
4157 | { |
4158 | res= -1; |
4159 | goto pop_and_go_right; |
4160 | } |
4161 | /* |
4162 | No meaning in continuing with rest of partitioning key parts. |
4163 | Will try to continue with subpartitioning key parts. |
4164 | */ |
4165 | ppar->ignore_part_fields= true; |
4166 | did_set_ignore_part_fields= true; |
4167 | goto process_next_key_part; |
4168 | } |
4169 | } |
4170 | |
4171 | process_next_key_part: |
4172 | if (key_tree->next_key_part) |
4173 | res= find_used_partitions(ppar, key_tree->next_key_part); |
4174 | else |
4175 | res= -1; |
4176 | |
4177 | if (did_set_ignore_part_fields) |
4178 | { |
4179 | /* |
4180 | We have returned from processing all key trees linked to our next |
4181 | key part. We are ready to be moving down (using right pointers) and |
4182 | this tree is a new evaluation requiring its own decision on whether |
4183 | to ignore partitioning fields. |
4184 | */ |
4185 | ppar->ignore_part_fields= FALSE; |
4186 | } |
4187 | if (set_full_part_if_bad_ret) |
4188 | { |
4189 | if (res == -1) |
4190 | { |
4191 | /* Got "full range" for subpartitioning fields */ |
4192 | uint32 part_id; |
4193 | bool found= FALSE; |
4194 | while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != |
4195 | NOT_A_PARTITION_ID) |
4196 | { |
4197 | ppar->mark_full_partition_used(ppar->part_info, part_id); |
4198 | found= TRUE; |
4199 | } |
4200 | res= MY_TEST(found); |
4201 | } |
4202 | /* |
4203 | Restore the "used partitions iterator" to the default setting that |
4204 | specifies iteration over all partitions. |
4205 | */ |
4206 | init_all_partitions_iterator(ppar->part_info, &ppar->part_iter); |
4207 | } |
4208 | |
4209 | pop_and_go_right: |
4210 | /* Pop this key part info off the "stack" */ |
4211 | ppar->arg_stack_end--; |
4212 | ppar->cur_part_fields-= ppar->is_part_keypart[key_tree_part]; |
4213 | ppar->cur_subpart_fields-= ppar->is_subpart_keypart[key_tree_part]; |
4214 | |
4215 | if (res == -1) |
4216 | return -1; |
4217 | if (key_tree->right != &null_element) |
4218 | { |
4219 | if (-1 == (right_res= find_used_partitions(ppar,key_tree->right))) |
4220 | return -1; |
4221 | } |
4222 | return (left_res || right_res || res); |
4223 | } |
4224 | |
4225 | |
4226 | static void mark_all_partitions_as_used(partition_info *part_info) |
4227 | { |
4228 | bitmap_copy(&(part_info->read_partitions), |
4229 | &(part_info->lock_partitions)); |
4230 | } |
4231 | |
4232 | |
4233 | /* |
4234 | Check if field types allow to construct partitioning index description |
4235 | |
4236 | SYNOPSIS |
4237 | fields_ok_for_partition_index() |
4238 | pfield NULL-terminated array of pointers to fields. |
4239 | |
4240 | DESCRIPTION |
4241 | For an array of fields, check if we can use all of the fields to create |
4242 | partitioning index description. |
4243 | |
4244 | We can't process GEOMETRY fields - for these fields singlepoint intervals |
4245 | cant be generated, and non-singlepoint are "special" kinds of intervals |
4246 | to which our processing logic can't be applied. |
4247 | |
4248 | It is not known if we could process ENUM fields, so they are disabled to be |
4249 | on the safe side. |
4250 | |
4251 | RETURN |
4252 | TRUE Yes, fields can be used in partitioning index |
4253 | FALSE Otherwise |
4254 | */ |
4255 | |
4256 | static bool fields_ok_for_partition_index(Field **pfield) |
4257 | { |
4258 | if (!pfield) |
4259 | return FALSE; |
4260 | for (; (*pfield); pfield++) |
4261 | { |
4262 | enum_field_types ftype= (*pfield)->real_type(); |
4263 | if (ftype == MYSQL_TYPE_ENUM || ftype == MYSQL_TYPE_GEOMETRY) |
4264 | return FALSE; |
4265 | } |
4266 | return TRUE; |
4267 | } |
4268 | |
4269 | |
4270 | /* |
4271 | Create partition index description and fill related info in the context |
4272 | struct |
4273 | |
4274 | SYNOPSIS |
4275 | create_partition_index_description() |
4276 | prune_par INOUT Partition pruning context |
4277 | |
4278 | DESCRIPTION |
4279 | Create partition index description. Partition index description is: |
4280 | |
4281 | part_index(used_fields_list(part_expr), used_fields_list(subpart_expr)) |
4282 | |
4283 | If partitioning/sub-partitioning uses BLOB or Geometry fields, then |
4284 | corresponding fields_list(...) is not included into index description |
4285 | and we don't perform partition pruning for partitions/subpartitions. |
4286 | |
4287 | RETURN |
4288 | TRUE Out of memory or can't do partition pruning at all |
4289 | FALSE OK |
4290 | */ |
4291 | |
4292 | static bool create_partition_index_description(PART_PRUNE_PARAM *ppar) |
4293 | { |
4294 | RANGE_OPT_PARAM *range_par= &(ppar->range_param); |
4295 | partition_info *part_info= ppar->part_info; |
4296 | uint used_part_fields, used_subpart_fields; |
4297 | |
4298 | used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ? |
4299 | part_info->num_part_fields : 0; |
4300 | used_subpart_fields= |
4301 | fields_ok_for_partition_index(part_info->subpart_field_array)? |
4302 | part_info->num_subpart_fields : 0; |
4303 | |
4304 | uint total_parts= used_part_fields + used_subpart_fields; |
4305 | |
4306 | ppar->ignore_part_fields= FALSE; |
4307 | ppar->part_fields= used_part_fields; |
4308 | ppar->last_part_partno= (int)used_part_fields - 1; |
4309 | |
4310 | ppar->subpart_fields= used_subpart_fields; |
4311 | ppar->last_subpart_partno= |
4312 | used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1; |
4313 | |
4314 | if (part_info->is_sub_partitioned()) |
4315 | { |
4316 | ppar->mark_full_partition_used= mark_full_partition_used_with_parts; |
4317 | ppar->get_top_partition_id_func= part_info->get_part_partition_id; |
4318 | } |
4319 | else |
4320 | { |
4321 | ppar->mark_full_partition_used= mark_full_partition_used_no_parts; |
4322 | ppar->get_top_partition_id_func= part_info->get_partition_id; |
4323 | } |
4324 | |
4325 | KEY_PART *key_part; |
4326 | MEM_ROOT *alloc= range_par->mem_root; |
4327 | if (!total_parts || |
4328 | !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)* |
4329 | total_parts)) || |
4330 | !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)* |
4331 | total_parts)) || |
4332 | !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* |
4333 | total_parts)) || |
4334 | !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)* |
4335 | total_parts))) |
4336 | return TRUE; |
4337 | |
4338 | if (ppar->subpart_fields) |
4339 | { |
4340 | my_bitmap_map *buf; |
4341 | uint32 bufsize= bitmap_buffer_size(ppar->part_info->num_subparts); |
4342 | if (!(buf= (my_bitmap_map*) alloc_root(alloc, bufsize))) |
4343 | return TRUE; |
4344 | my_bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->num_subparts, |
4345 | FALSE); |
4346 | } |
4347 | range_par->key_parts= key_part; |
4348 | Field **field= (ppar->part_fields)? part_info->part_field_array : |
4349 | part_info->subpart_field_array; |
4350 | bool in_subpart_fields= FALSE; |
4351 | uint total_key_len= 0; |
4352 | for (uint part= 0; part < total_parts; part++, key_part++) |
4353 | { |
4354 | key_part->key= 0; |
4355 | key_part->part= part; |
4356 | key_part->length= (uint16)(*field)->key_length(); |
4357 | key_part->store_length= (uint16)get_partition_field_store_length(*field); |
4358 | total_key_len += key_part->store_length; |
4359 | |
4360 | DBUG_PRINT("info" , ("part %u length %u store_length %u" , part, |
4361 | key_part->length, key_part->store_length)); |
4362 | |
4363 | key_part->field= (*field); |
4364 | key_part->image_type = Field::itRAW; |
4365 | /* |
4366 | We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked |
4367 | in the RangeAnalysisModule. |
4368 | */ |
4369 | key_part->flag= 0; |
4370 | /* We don't set key_parts->null_bit as it will not be used */ |
4371 | |
4372 | ppar->is_part_keypart[part]= !in_subpart_fields; |
4373 | ppar->is_subpart_keypart[part]= in_subpart_fields; |
4374 | |
4375 | /* |
4376 | Check if this was last field in this array, in this case we |
4377 | switch to subpartitioning fields. (This will only happens if |
4378 | there are subpartitioning fields to cater for). |
4379 | */ |
4380 | if (!*(++field)) |
4381 | { |
4382 | field= part_info->subpart_field_array; |
4383 | in_subpart_fields= TRUE; |
4384 | } |
4385 | } |
4386 | range_par->key_parts_end= key_part; |
4387 | |
4388 | total_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */ |
4389 | if (!(range_par->min_key= (uchar*)alloc_root(alloc,total_key_len)) || |
4390 | !(range_par->max_key= (uchar*)alloc_root(alloc,total_key_len))) |
4391 | { |
4392 | return true; |
4393 | } |
4394 | |
4395 | DBUG_EXECUTE("info" , print_partitioning_index(range_par->key_parts, |
4396 | range_par->key_parts_end);); |
4397 | return FALSE; |
4398 | } |
4399 | |
4400 | |
4401 | #ifndef DBUG_OFF |
4402 | |
4403 | static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end) |
4404 | { |
4405 | DBUG_ENTER("print_partitioning_index" ); |
4406 | DBUG_LOCK_FILE; |
4407 | fprintf(DBUG_FILE, "partitioning INDEX(" ); |
4408 | for (KEY_PART *p=parts; p != parts_end; p++) |
4409 | { |
4410 | fprintf(DBUG_FILE, "%s%s" , p==parts?"" :" ," , p->field->field_name.str); |
4411 | } |
4412 | fputs(");\n" , DBUG_FILE); |
4413 | DBUG_UNLOCK_FILE; |
4414 | DBUG_VOID_RETURN; |
4415 | } |
4416 | |
4417 | /* Print field value into debug trace, in NULL-aware way. */ |
4418 | static void dbug_print_field(Field *field) |
4419 | { |
4420 | if (field->is_real_null()) |
4421 | fprintf(DBUG_FILE, "NULL" ); |
4422 | else |
4423 | { |
4424 | char buf[256]; |
4425 | String str(buf, sizeof(buf), &my_charset_bin); |
4426 | str.length(0); |
4427 | String *pstr; |
4428 | pstr= field->val_str(&str); |
4429 | fprintf(DBUG_FILE, "'%s'" , pstr->c_ptr_safe()); |
4430 | } |
4431 | } |
4432 | |
4433 | |
4434 | /* Print a "c1 < keypartX < c2" - type interval into debug trace. */ |
4435 | static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part) |
4436 | { |
4437 | DBUG_ENTER("dbug_print_segment_range" ); |
4438 | DBUG_LOCK_FILE; |
4439 | if (!(arg->min_flag & NO_MIN_RANGE)) |
4440 | { |
4441 | store_key_image_to_rec(part->field, arg->min_value, part->length); |
4442 | dbug_print_field(part->field); |
4443 | if (arg->min_flag & NEAR_MIN) |
4444 | fputs(" < " , DBUG_FILE); |
4445 | else |
4446 | fputs(" <= " , DBUG_FILE); |
4447 | } |
4448 | |
4449 | fprintf(DBUG_FILE, "%s" , part->field->field_name.str); |
4450 | |
4451 | if (!(arg->max_flag & NO_MAX_RANGE)) |
4452 | { |
4453 | if (arg->max_flag & NEAR_MAX) |
4454 | fputs(" < " , DBUG_FILE); |
4455 | else |
4456 | fputs(" <= " , DBUG_FILE); |
4457 | store_key_image_to_rec(part->field, arg->max_value, part->length); |
4458 | dbug_print_field(part->field); |
4459 | } |
4460 | fputs("\n" , DBUG_FILE); |
4461 | DBUG_UNLOCK_FILE; |
4462 | DBUG_VOID_RETURN; |
4463 | } |
4464 | |
4465 | |
4466 | /* |
4467 | Print a singlepoint multi-keypart range interval to debug trace |
4468 | |
4469 | SYNOPSIS |
4470 | dbug_print_singlepoint_range() |
4471 | start Array of SEL_ARG* ptrs representing conditions on key parts |
4472 | num Number of elements in the array. |
4473 | |
4474 | DESCRIPTION |
4475 | This function prints a "keypartN=constN AND ... AND keypartK=constK"-type |
4476 | interval to debug trace. |
4477 | */ |
4478 | |
4479 | static void dbug_print_singlepoint_range(SEL_ARG **start, uint num) |
4480 | { |
4481 | DBUG_ENTER("dbug_print_singlepoint_range" ); |
4482 | DBUG_LOCK_FILE; |
4483 | SEL_ARG **end= start + num; |
4484 | |
4485 | for (SEL_ARG **arg= start; arg != end; arg++) |
4486 | { |
4487 | Field *field= (*arg)->field; |
4488 | fprintf(DBUG_FILE, "%s%s=" , (arg==start)?"" :", " , field->field_name.str); |
4489 | dbug_print_field(field); |
4490 | } |
4491 | fputs("\n" , DBUG_FILE); |
4492 | DBUG_UNLOCK_FILE; |
4493 | DBUG_VOID_RETURN; |
4494 | } |
4495 | #endif |
4496 | |
4497 | /**************************************************************************** |
4498 | * Partition pruning code ends |
4499 | ****************************************************************************/ |
4500 | #endif |
4501 | |
4502 | |
4503 | /* |
4504 | Get cost of 'sweep' full records retrieval. |
4505 | SYNOPSIS |
4506 | get_sweep_read_cost() |
4507 | param Parameter from test_quick_select |
4508 | records # of records to be retrieved |
4509 | RETURN |
4510 | cost of sweep |
4511 | */ |
4512 | |
4513 | double get_sweep_read_cost(const PARAM *param, ha_rows records) |
4514 | { |
4515 | double result; |
4516 | DBUG_ENTER("get_sweep_read_cost" ); |
4517 | if (param->table->file->primary_key_is_clustered()) |
4518 | { |
4519 | /* |
4520 | We are using the primary key to find the rows. |
4521 | Calculate the cost for this. |
4522 | */ |
4523 | result= param->table->file->read_time(param->table->s->primary_key, |
4524 | (uint)records, records); |
4525 | } |
4526 | else |
4527 | { |
4528 | /* |
4529 | Rows will be retreived with rnd_pos(). Caluclate the expected |
4530 | cost for this. |
4531 | */ |
4532 | double n_blocks= |
4533 | ceil(ulonglong2double(param->table->file->stats.data_file_length) / |
4534 | IO_SIZE); |
4535 | double busy_blocks= |
4536 | n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records))); |
4537 | if (busy_blocks < 1.0) |
4538 | busy_blocks= 1.0; |
4539 | DBUG_PRINT("info" ,("sweep: nblocks: %g, busy_blocks: %g" , n_blocks, |
4540 | busy_blocks)); |
4541 | /* |
4542 | Disabled: Bail out if # of blocks to read is bigger than # of blocks in |
4543 | table data file. |
4544 | if (max_cost != DBL_MAX && (busy_blocks+index_reads_cost) >= n_blocks) |
4545 | return 1; |
4546 | */ |
4547 | JOIN *join= param->thd->lex->select_lex.join; |
4548 | if (!join || join->table_count == 1) |
4549 | { |
4550 | /* No join, assume reading is done in one 'sweep' */ |
4551 | result= busy_blocks*(DISK_SEEK_BASE_COST + |
4552 | DISK_SEEK_PROP_COST*n_blocks/busy_blocks); |
4553 | } |
4554 | else |
4555 | { |
4556 | /* |
4557 | Possibly this is a join with source table being non-last table, so |
4558 | assume that disk seeks are random here. |
4559 | */ |
4560 | result= busy_blocks; |
4561 | } |
4562 | } |
4563 | DBUG_PRINT("return" ,("cost: %g" , result)); |
4564 | DBUG_RETURN(result); |
4565 | } |
4566 | |
4567 | |
4568 | /* |
4569 | Get best plan for a SEL_IMERGE disjunctive expression. |
4570 | SYNOPSIS |
4571 | get_best_disjunct_quick() |
4572 | param Parameter from check_quick_select function |
4573 | imerge Expression to use |
4574 | read_time Don't create scans with cost > read_time |
4575 | |
4576 | NOTES |
4577 | index_merge cost is calculated as follows: |
4578 | index_merge_cost = |
4579 | cost(index_reads) + (see #1) |
4580 | cost(rowid_to_row_scan) + (see #2) |
4581 | cost(unique_use) (see #3) |
4582 | |
4583 | 1. cost(index_reads) =SUM_i(cost(index_read_i)) |
4584 | For non-CPK scans, |
4585 | cost(index_read_i) = {cost of ordinary 'index only' scan} |
4586 | For CPK scan, |
4587 | cost(index_read_i) = {cost of non-'index only' scan} |
4588 | |
4589 | 2. cost(rowid_to_row_scan) |
4590 | If table PK is clustered then |
4591 | cost(rowid_to_row_scan) = |
4592 | {cost of ordinary clustered PK scan with n_ranges=n_rows} |
4593 | |
4594 | Otherwise, we use the following model to calculate costs: |
4595 | We need to retrieve n_rows rows from file that occupies n_blocks blocks. |
4596 | We assume that offsets of rows we need are independent variates with |
4597 | uniform distribution in [0..max_file_offset] range. |
4598 | |
4599 | We'll denote block as "busy" if it contains row(s) we need to retrieve |
4600 | and "empty" if doesn't contain rows we need. |
4601 | |
4602 | Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this |
4603 | applies to any block in file). Let x_i be a variate taking value 1 if |
4604 | block #i is empty and 0 otherwise. |
4605 | |
4606 | Then E(x_i) = (1 - 1/n_blocks)^n_rows; |
4607 | |
4608 | E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) = |
4609 | = n_blocks * ((1 - 1/n_blocks)^n_rows) = |
4610 | ~= n_blocks * exp(-n_rows/n_blocks). |
4611 | |
4612 | E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) = |
4613 | ~= n_blocks * (1 - exp(-n_rows/n_blocks)). |
4614 | |
4615 | Average size of "hole" between neighbor non-empty blocks is |
4616 | E(hole_size) = n_blocks/E(n_busy_blocks). |
4617 | |
4618 | The total cost of reading all needed blocks in one "sweep" is: |
4619 | |
4620 | E(n_busy_blocks)* |
4621 | (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)). |
4622 | |
4623 | 3. Cost of Unique use is calculated in Unique::get_use_cost function. |
4624 | |
4625 | ROR-union cost is calculated in the same way index_merge, but instead of |
4626 | Unique a priority queue is used. |
4627 | |
4628 | RETURN |
4629 | Created read plan |
4630 | NULL - Out of memory or no read scan could be built. |
4631 | */ |
4632 | |
4633 | static |
4634 | TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, |
4635 | double read_time) |
4636 | { |
4637 | SEL_TREE **ptree; |
4638 | TRP_INDEX_MERGE *imerge_trp= NULL; |
4639 | TRP_RANGE **range_scans; |
4640 | TRP_RANGE **cur_child; |
4641 | TRP_RANGE **cpk_scan= NULL; |
4642 | bool imerge_too_expensive= FALSE; |
4643 | double imerge_cost= 0.0; |
4644 | ha_rows cpk_scan_records= 0; |
4645 | ha_rows non_cpk_scan_records= 0; |
4646 | bool pk_is_clustered= param->table->file->primary_key_is_clustered(); |
4647 | bool all_scans_ror_able= TRUE; |
4648 | bool all_scans_rors= TRUE; |
4649 | uint unique_calc_buff_size; |
4650 | TABLE_READ_PLAN **roru_read_plans; |
4651 | TABLE_READ_PLAN **cur_roru_plan; |
4652 | double roru_index_costs; |
4653 | ha_rows roru_total_records; |
4654 | double roru_intersect_part= 1.0; |
4655 | size_t n_child_scans; |
4656 | DBUG_ENTER("get_best_disjunct_quick" ); |
4657 | DBUG_PRINT("info" , ("Full table scan cost: %g" , read_time)); |
4658 | |
4659 | /* |
4660 | In every tree of imerge remove SEL_ARG trees that do not make ranges. |
4661 | If after this removal some SEL_ARG tree becomes empty discard imerge. |
4662 | */ |
4663 | for (ptree= imerge->trees; ptree != imerge->trees_next; ptree++) |
4664 | { |
4665 | if (remove_nonrange_trees(param, *ptree)) |
4666 | { |
4667 | imerge->trees_next= imerge->trees; |
4668 | break; |
4669 | } |
4670 | } |
4671 | |
4672 | n_child_scans= imerge->trees_next - imerge->trees; |
4673 | |
4674 | if (!n_child_scans) |
4675 | DBUG_RETURN(NULL); |
4676 | |
4677 | if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root, |
4678 | sizeof(TRP_RANGE*)* |
4679 | n_child_scans))) |
4680 | DBUG_RETURN(NULL); |
4681 | /* |
4682 | Collect best 'range' scan for each of disjuncts, and, while doing so, |
4683 | analyze possibility of ROR scans. Also calculate some values needed by |
4684 | other parts of the code. |
4685 | */ |
4686 | for (ptree= imerge->trees, cur_child= range_scans; |
4687 | ptree != imerge->trees_next; |
4688 | ptree++, cur_child++) |
4689 | { |
4690 | DBUG_EXECUTE("info" , print_sel_tree(param, *ptree, &(*ptree)->keys_map, |
4691 | "tree in SEL_IMERGE" );); |
4692 | if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE, read_time))) |
4693 | { |
4694 | /* |
4695 | One of index scans in this index_merge is more expensive than entire |
4696 | table read for another available option. The entire index_merge (and |
4697 | any possible ROR-union) will be more expensive then, too. We continue |
4698 | here only to update SQL_SELECT members. |
4699 | */ |
4700 | imerge_too_expensive= TRUE; |
4701 | } |
4702 | if (imerge_too_expensive) |
4703 | continue; |
4704 | |
4705 | imerge_cost += (*cur_child)->read_cost; |
4706 | all_scans_ror_able &= ((*ptree)->n_ror_scans > 0); |
4707 | all_scans_rors &= (*cur_child)->is_ror; |
4708 | if (pk_is_clustered && |
4709 | param->real_keynr[(*cur_child)->key_idx] == |
4710 | param->table->s->primary_key) |
4711 | { |
4712 | cpk_scan= cur_child; |
4713 | cpk_scan_records= (*cur_child)->records; |
4714 | } |
4715 | else |
4716 | non_cpk_scan_records += (*cur_child)->records; |
4717 | } |
4718 | |
4719 | DBUG_PRINT("info" , ("index_merge scans cost %g" , imerge_cost)); |
4720 | if (imerge_too_expensive || (imerge_cost > read_time) || |
4721 | ((non_cpk_scan_records+cpk_scan_records >= |
4722 | param->table->stat_records()) && |
4723 | read_time != DBL_MAX)) |
4724 | { |
4725 | /* |
4726 | Bail out if it is obvious that both index_merge and ROR-union will be |
4727 | more expensive |
4728 | */ |
4729 | DBUG_PRINT("info" , ("Sum of index_merge scans is more expensive than " |
4730 | "full table scan, bailing out" )); |
4731 | DBUG_RETURN(NULL); |
4732 | } |
4733 | |
4734 | /* |
4735 | If all scans happen to be ROR, proceed to generate a ROR-union plan (it's |
4736 | guaranteed to be cheaper than non-ROR union), unless ROR-unions are |
4737 | disabled in @@optimizer_switch |
4738 | */ |
4739 | if (all_scans_rors && |
4740 | optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_UNION)) |
4741 | { |
4742 | roru_read_plans= (TABLE_READ_PLAN**)range_scans; |
4743 | goto skip_to_ror_scan; |
4744 | } |
4745 | |
4746 | if (cpk_scan) |
4747 | { |
4748 | /* |
4749 | Add one ROWID comparison for each row retrieved on non-CPK scan. (it |
4750 | is done in QUICK_RANGE_SELECT::row_in_ranges) |
4751 | */ |
4752 | imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID; |
4753 | } |
4754 | |
4755 | /* Calculate cost(rowid_to_row_scan) */ |
4756 | imerge_cost += get_sweep_read_cost(param, non_cpk_scan_records); |
4757 | DBUG_PRINT("info" ,("index_merge cost with rowid-to-row scan: %g" , |
4758 | imerge_cost)); |
4759 | if (imerge_cost > read_time || |
4760 | !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION)) |
4761 | { |
4762 | goto build_ror_index_merge; |
4763 | } |
4764 | |
4765 | /* Add Unique operations cost */ |
4766 | unique_calc_buff_size= |
4767 | Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records, |
4768 | param->table->file->ref_length, |
4769 | (size_t)param->thd->variables.sortbuff_size); |
4770 | if (param->imerge_cost_buff_size < unique_calc_buff_size) |
4771 | { |
4772 | if (!(param->imerge_cost_buff= (uint*)alloc_root(param-> |
---|