1/* Copyright (c) 2000, 2015, Oracle and/or its affiliates.
2 Copyright (c) 2008, 2015, MariaDB
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17/*
18 TODO:
19 Fix that MAYBE_KEY are stored in the tree so that we can detect use
20 of full hash keys for queries like:
21
22 select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205);
23
24*/
25
26/*
27 This file contains:
28
29 RangeAnalysisModule
30 A module that accepts a condition, index (or partitioning) description,
31 and builds lists of intervals (in index/partitioning space), such that
32 all possible records that match the condition are contained within the
33 intervals.
34 The entry point for the range analysis module is get_mm_tree() function.
35
36 The lists are returned in form of complicated structure of interlinked
37 SEL_TREE/SEL_IMERGE/SEL_ARG objects.
38 See quick_range_seq_next, find_used_partitions for examples of how to walk
39 this structure.
40 All direct "users" of this module are located within this file, too.
41
42
43 PartitionPruningModule
44 A module that accepts a partitioned table, condition, and finds which
45 partitions we will need to use in query execution. Search down for
46 "PartitionPruningModule" for description.
47 The module has single entry point - prune_partitions() function.
48
49
50 Range/index_merge/groupby-minmax optimizer module
51 A module that accepts a table, condition, and returns
52 - a QUICK_*_SELECT object that can be used to retrieve rows that match
53 the specified condition, or a "no records will match the condition"
54 statement.
55
56 The module entry points are
57 test_quick_select()
58 get_quick_select_for_ref()
59
60
61 Record retrieval code for range/index_merge/groupby-min-max.
62 Implementations of QUICK_*_SELECT classes.
63
64 KeyTupleFormat
65 ~~~~~~~~~~~~~~
66 The code in this file (and elsewhere) makes operations on key value tuples.
67 Those tuples are stored in the following format:
68
69 The tuple is a sequence of key part values. The length of key part value
70 depends only on its type (and not depends on the what value is stored)
71
72 KeyTuple: keypart1-data, keypart2-data, ...
73
74 The value of each keypart is stored in the following format:
75
76 keypart_data: [isnull_byte] keypart-value-bytes
77
78 If a keypart may have a NULL value (key_part->field->real_maybe_null() can
79 be used to check this), then the first byte is a NULL indicator with the
80 following valid values:
81 1 - keypart has NULL value.
82 0 - keypart has non-NULL value.
83
84 <questionable-statement> If isnull_byte==1 (NULL value), then the following
85 keypart->length bytes must be 0.
86 </questionable-statement>
87
88 keypart-value-bytes holds the value. Its format depends on the field type.
89 The length of keypart-value-bytes may or may not depend on the value being
90 stored. The default is that length is static and equal to
91 KEY_PART_INFO::length.
92
93 Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the
94 value:
95
96 keypart-value-bytes: value_length value_bytes
97
98 The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
99
100 See key_copy() and key_restore() for code to move data between index tuple
101 and table record
102
103 CAUTION: the above description is only sergefp's understanding of the
104 subject and may omit some details.
105*/
106
107#ifdef USE_PRAGMA_IMPLEMENTATION
108#pragma implementation // gcc: Class implementation
109#endif
110
111#include "mariadb.h"
112#include "sql_priv.h"
113#include "key.h" // is_key_used, key_copy, key_cmp, key_restore
114#include "sql_parse.h" // check_stack_overrun
115#include "sql_partition.h" // get_part_id_func, PARTITION_ITERATOR,
116 // struct partition_info, NOT_A_PARTITION_ID
117#include "records.h" // init_read_record, end_read_record
118#include <m_ctype.h>
119#include "sql_select.h"
120#include "sql_statistics.h"
121#include "uniques.h"
122
123#ifndef EXTRA_DEBUG
124#define test_rb_tree(A,B) {}
125#define test_use_count(A) {}
126#endif
127
128/*
129 Convert double value to #rows. Currently this does floor(), and we
130 might consider using round() instead.
131*/
132#define double2rows(x) ((ha_rows)(x))
133
134/*
135 this should be long enough so that any memcmp with a string that
136 starts from '\0' won't cross is_null_string boundaries, even
137 if the memcmp is optimized to compare 4- 8- or 16- bytes at once
138*/
139static uchar is_null_string[20]= {1,0};
140
141/**
142 Helper function to compare two SEL_ARG's.
143*/
144static bool all_same(const SEL_ARG *sa1, const SEL_ARG *sa2)
145{
146 if (sa1 == NULL && sa2 == NULL)
147 return true;
148 if ((sa1 != NULL && sa2 == NULL) || (sa1 == NULL && sa2 != NULL))
149 return false;
150 return sa1->all_same(sa2);
151}
152
153class SEL_IMERGE;
154
155#define CLONE_KEY1_MAYBE 1
156#define CLONE_KEY2_MAYBE 2
157#define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)
158
159
160/*
161 While objects of the class SEL_ARG represent ranges for indexes or
162 index infixes (including ranges for index prefixes and index suffixes),
163 objects of the class SEL_TREE represent AND/OR formulas of such ranges.
164 Currently an AND/OR formula represented by a SEL_TREE object can have
165 at most three levels:
166
167 <SEL_TREE formula> ::=
168 [ <SEL_RANGE_TREE formula> AND ]
169 [ <SEL_IMERGE formula> [ AND <SEL_IMERGE formula> ...] ]
170
171 <SEL_RANGE_TREE formula> ::=
172 <SEL_ARG formula> [ AND <SEL_ARG_formula> ... ]
173
174 <SEL_IMERGE formula> ::=
175 <SEL_RANGE_TREE formula> [ OR <SEL_RANGE_TREE formula> ]
176
177 As we can see from the above definitions:
178 - SEL_RANGE_TREE formula is a conjunction of SEL_ARG formulas
179 - SEL_IMERGE formula is a disjunction of SEL_RANGE_TREE formulas
180 - SEL_TREE formula is a conjunction of a SEL_RANGE_TREE formula
181 and SEL_IMERGE formulas.
182 It's required above that a SEL_TREE formula has at least one conjunct.
183
184 Usually we will consider normalized SEL_RANGE_TREE formulas where we use
185 TRUE as conjunct members for those indexes whose SEL_ARG trees are empty.
186
187 We will call an SEL_TREE object simply 'tree'.
188 The part of a tree that represents SEL_RANGE_TREE formula is called
189 'range part' of the tree while the remaining part is called 'imerge part'.
190 If a tree contains only a range part then we call such a tree 'range tree'.
191 Components of a range tree that represent SEL_ARG formulas are called ranges.
192 If a tree does not contain any range part we call such a tree 'imerge tree'.
193 Components of the imerge part of a tree that represent SEL_IMERGE formula
194 are called imerges.
195
196 Usually we'll designate:
197 SEL_TREE formulas by T_1,...,T_k
198 SEL_ARG formulas by R_1,...,R_k
199 SEL_RANGE_TREE formulas by RT_1,...,RT_k
200 SEL_IMERGE formulas by M_1,...,M_k
201 Accordingly we'll use:
202 t_1,...,t_k - to designate trees representing T_1,...,T_k
203 r_1,...,r_k - to designate ranges representing R_1,...,R_k
204 rt_1,...,r_tk - to designate range trees representing RT_1,...,RT_k
205 m_1,...,m_k - to designate imerges representing M_1,...,M_k
206
207 SEL_TREE objects are usually built from WHERE conditions or
208 ON expressions.
209 A SEL_TREE object always represents an inference of the condition it is
210 built from. Therefore, if a row satisfies a SEL_TREE formula it also
211 satisfies the condition it is built from.
212
213 The following transformations of tree t representing SEL_TREE formula T
214 yield a new tree t1 thar represents an inference of T: T=>T1.
215 (1) remove any of SEL_ARG tree from the range part of t
216 (2) remove any imerge from the tree t
217 (3) remove any of SEL_ARG tree from any range tree contained
218 in any imerge of tree
219
220 Since the basic blocks of any SEL_TREE objects are ranges, SEL_TREE
221 objects in many cases can be effectively used to filter out a big part
222 of table rows that do not satisfy WHERE/IN conditions utilizing
223 only single or multiple range index scans.
224
225 A single range index scan is constructed for a range tree that contains
226 only one SEL_ARG object for an index or an index prefix.
227 An index intersection scan can be constructed for a range tree
228 that contains several SEL_ARG objects. Currently index intersection
229 scans are constructed only for single-point ranges.
230 An index merge scan is constructed for a imerge tree that contains only
231 one imerge. If range trees of this imerge contain only single-point merges
232 than a union of index intersections can be built.
233
234 Usually the tree built by the range optimizer for a query table contains
235 more than one range in the range part, and additionally may contain some
236 imerges in the imerge part. The range optimizer evaluates all of them one
237 by one and chooses the range or the imerge that provides the cheapest
238 single or multiple range index scan of the table. According to rules
239 (1)-(3) this scan always filter out only those rows that do not satisfy
240 the query conditions.
241
242 For any condition the SEL_TREE object for it is built in a bottom up
243 manner starting from the range trees for the predicates. The tree_and
244 function builds a tree for any conjunction of formulas from the trees
245 for its conjuncts. The tree_or function builds a tree for any disjunction
246 of formulas from the trees for its disjuncts.
247*/
248
249class SEL_TREE :public Sql_alloc
250{
251public:
252 /*
253 Starting an effort to document this field:
254 (for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) =>
255 (type == SEL_TREE::IMPOSSIBLE)
256 */
257 enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
258
259 SEL_TREE(enum Type type_arg, MEM_ROOT *root, size_t num_keys)
260 : type(type_arg), keys(root, num_keys), n_ror_scans(0)
261 {
262 keys_map.clear_all();
263 }
264
265 SEL_TREE(MEM_ROOT *root, size_t num_keys) :
266 type(KEY), keys(root, num_keys), n_ror_scans(0)
267 {
268 keys_map.clear_all();
269 }
270
271 SEL_TREE(SEL_TREE *arg, bool without_merges, RANGE_OPT_PARAM *param);
272 /*
273 Note: there may exist SEL_TREE objects with sel_tree->type=KEY and
274 keys[i]=0 for all i. (SergeyP: it is not clear whether there is any
275 merit in range analyzer functions (e.g. get_mm_parts) returning a
276 pointer to such SEL_TREE instead of NULL)
277 */
278 Mem_root_array<SEL_ARG *, true> keys;
279
280 key_map keys_map; /* bitmask of non-NULL elements in keys */
281
282 /*
283 Possible ways to read rows using index_merge. The list is non-empty only
284 if type==KEY. Currently can be non empty only if keys_map.is_clear_all().
285 */
286 List<SEL_IMERGE> merges;
287
288 /* The members below are filled/used only after get_mm_tree is done */
289 key_map ror_scans_map; /* bitmask of ROR scan-able elements in keys */
290 uint n_ror_scans; /* number of set bits in ror_scans_map */
291
292 struct st_index_scan_info **index_scans; /* list of index scans */
293 struct st_index_scan_info **index_scans_end; /* last index scan */
294
295 struct st_ror_scan_info **ror_scans; /* list of ROR key scans */
296 struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
297 /* Note that #records for each key scan is stored in table->quick_rows */
298
299 bool without_ranges() { return keys_map.is_clear_all(); }
300 bool without_imerges() { return merges.is_empty(); }
301};
302
303
304class PARAM : public RANGE_OPT_PARAM
305{
306public:
307 ha_rows quick_rows[MAX_KEY];
308
309 /*
310 This will collect 'possible keys' based on the range optimization.
311
312 Queries with a JOIN object actually use ref optimizer (see add_key_field)
313 to collect possible_keys. This is used by single table UPDATE/DELETE.
314 */
315 key_map possible_keys;
316 longlong baseflag;
317 uint max_key_part, range_count;
318
319 bool quick; // Don't calulate possible keys
320
321 uint fields_bitmap_size;
322 MY_BITMAP needed_fields; /* bitmask of fields needed by the query */
323 MY_BITMAP tmp_covered_fields;
324
325 key_map *needed_reg; /* ptr to SQL_SELECT::needed_reg */
326
327 uint *imerge_cost_buff; /* buffer for index_merge cost estimates */
328 uint imerge_cost_buff_size; /* size of the buffer */
329
330 /* TRUE if last checked tree->key can be used for ROR-scan */
331 bool is_ror_scan;
332 /* Number of ranges in the last checked tree->key */
333 uint n_ranges;
334 uint8 first_null_comp; /* first null component if any, 0 - otherwise */
335};
336
337
338class TABLE_READ_PLAN;
339 class TRP_RANGE;
340 class TRP_ROR_INTERSECT;
341 class TRP_ROR_UNION;
342 class TRP_INDEX_INTERSECT;
343 class TRP_INDEX_MERGE;
344 class TRP_GROUP_MIN_MAX;
345
346struct st_index_scan_info;
347struct st_ror_scan_info;
348
349static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
350static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
351 SEL_ARG *tree, bool update_tbl_stats,
352 uint *mrr_flags, uint *bufsize,
353 Cost_estimate *cost);
354
355QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
356 SEL_ARG *key_tree, uint mrr_flags,
357 uint mrr_buf_size, MEM_ROOT *alloc);
358static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
359 bool index_read_must_be_used,
360 bool update_tbl_stats,
361 double read_time);
362static
363TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
364 double read_time);
365static
366TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
367 double read_time,
368 bool *are_all_covering);
369static
370TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
371 SEL_TREE *tree,
372 double read_time);
373static
374TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
375 double read_time);
376static
377TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
378 TRP_INDEX_MERGE *imerge_trp,
379 double read_time);
380static
381TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree,
382 double read_time);
383
384#ifndef DBUG_OFF
385static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
386 const char *msg);
387static void print_ror_scans_arr(TABLE *table, const char *msg,
388 struct st_ror_scan_info **start,
389 struct st_ror_scan_info **end);
390static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
391#endif
392
393static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,
394 SEL_TREE *tree1, SEL_TREE *tree2);
395static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,
396 SEL_TREE *tree1,SEL_TREE *tree2);
397static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
398static SEL_ARG *key_or(RANGE_OPT_PARAM *param,
399 SEL_ARG *key1, SEL_ARG *key2);
400static SEL_ARG *key_and(RANGE_OPT_PARAM *param,
401 SEL_ARG *key1, SEL_ARG *key2,
402 uint clone_flag);
403static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
404bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
405 SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
406 uchar *max_key,uint max_key_flag);
407static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
408
409static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
410static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
411 uint length);
412static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
413
414#include "opt_range_mrr.cc"
415
416static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2,
417 key_map *common_keys);
418static void eliminate_single_tree_imerges(RANGE_OPT_PARAM *param,
419 SEL_TREE *tree);
420
421static bool sel_trees_can_be_ored(RANGE_OPT_PARAM* param,
422 SEL_TREE *tree1, SEL_TREE *tree2,
423 key_map *common_keys);
424static bool sel_trees_must_be_ored(RANGE_OPT_PARAM* param,
425 SEL_TREE *tree1, SEL_TREE *tree2,
426 key_map common_keys);
427static int and_range_trees(RANGE_OPT_PARAM *param,
428 SEL_TREE *tree1, SEL_TREE *tree2,
429 SEL_TREE *result);
430static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree);
431
432
433/*
434 SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
435 a condition in the following form:
436 (t_1||t_2||...||t_N) && (next)
437
438 where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair
439 (t_i,t_j) contains SEL_ARGS for the same index.
440
441 SEL_TREE contained in SEL_IMERGE always has merges=NULL.
442
443 This class relies on memory manager to do the cleanup.
444*/
445
446class SEL_IMERGE : public Sql_alloc
447{
448 enum { PREALLOCED_TREES= 10};
449public:
450 SEL_TREE *trees_prealloced[PREALLOCED_TREES];
451 SEL_TREE **trees; /* trees used to do index_merge */
452 SEL_TREE **trees_next; /* last of these trees */
453 SEL_TREE **trees_end; /* end of allocated space */
454
455 SEL_ARG ***best_keys; /* best keys to read in SEL_TREEs */
456
457 SEL_IMERGE() :
458 trees(&trees_prealloced[0]),
459 trees_next(trees),
460 trees_end(trees + PREALLOCED_TREES)
461 {}
462 SEL_IMERGE (SEL_IMERGE *arg, uint cnt, RANGE_OPT_PARAM *param);
463 int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
464 bool have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree);
465 int and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree,
466 SEL_IMERGE *new_imerge);
467 int or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
468 uint n_init_trees,
469 SEL_TREE *new_tree,
470 bool is_first_check_pass,
471 bool *is_last_check_pass);
472 int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param,
473 uint n_init_trees,
474 SEL_IMERGE* imerge,
475 bool is_first_check_pass,
476 bool *is_last_check_pass);
477};
478
479
480/*
481 Add a range tree to the range trees of this imerge
482
483 SYNOPSIS
484 or_sel_tree()
485 param Context info for the operation
486 tree SEL_TREE to add to this imerge
487
488 DESCRIPTION
489 The function just adds the range tree 'tree' to the range trees
490 of this imerge.
491
492 RETURN
493 0 if the operation is success
494 -1 if the function runs out memory
495*/
496
497int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
498{
499 if (trees_next == trees_end)
500 {
501 const int realloc_ratio= 2; /* Double size for next round */
502 size_t old_elements= (trees_end - trees);
503 size_t old_size= sizeof(SEL_TREE**) * old_elements;
504 size_t new_size= old_size * realloc_ratio;
505 SEL_TREE **new_trees;
506 if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
507 return -1;
508 memcpy(new_trees, trees, old_size);
509 trees= new_trees;
510 trees_next= trees + old_elements;
511 trees_end= trees + old_elements * realloc_ratio;
512 }
513 *(trees_next++)= tree;
514 return 0;
515}
516
517
518/*
519 Check if any of the range trees of this imerge intersects with a given tree
520
521 SYNOPSIS
522 have_common_keys()
523 param Context info for the function
524 tree SEL_TREE intersection with the imerge range trees is checked for
525
526 DESCRIPTION
527 The function checks whether there is any range tree rt_i in this imerge
528 such that there are some indexes for which ranges are defined in both
529 rt_i and the range part of the SEL_TREE tree.
530 To check this the function calls the function sel_trees_have_common_keys.
531
532 RETURN
533 TRUE if there are such range trees in this imerge
534 FALSE otherwise
535*/
536
537bool SEL_IMERGE::have_common_keys(RANGE_OPT_PARAM *param, SEL_TREE *tree)
538{
539 for (SEL_TREE** or_tree= trees, **bound= trees_next;
540 or_tree != bound; or_tree++)
541 {
542 key_map common_keys;
543 if (sel_trees_have_common_keys(*or_tree, tree, &common_keys))
544 return TRUE;
545 }
546 return FALSE;
547}
548
549
550/*
551 Perform AND operation for this imerge and the range part of a tree
552
553 SYNOPSIS
554 and_sel_tree()
555 param Context info for the operation
556 tree SEL_TREE for the second operand of the operation
557 new_imerge OUT imerge for the result of the operation
558
559 DESCRIPTION
560 This function performs AND operation for this imerge m and the
561 range part of the SEL_TREE tree rt. In other words the function
562 pushes rt into this imerge. The resulting imerge is returned in
563 the parameter new_imerge.
564 If this imerge m represent the formula
565 RT_1 OR ... OR RT_k
566 then the resulting imerge of the function represents the formula
567 (RT_1 AND RT) OR ... OR (RT_k AND RT)
568 The function calls the function and_range_trees to construct the
569 range tree representing (RT_i AND RT).
570
571 NOTE
572 The function may return an empty imerge without any range trees.
573 This happens when each call of and_range_trees returns an
574 impossible range tree (SEL_TREE::IMPOSSIBLE).
575 Example: (key1 < 2 AND key2 > 10) AND (key1 > 4 OR key2 < 6).
576
577 RETURN
578 0 if the operation is a success
579 -1 otherwise: there is not enough memory to perform the operation
580*/
581
582int SEL_IMERGE::and_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree,
583 SEL_IMERGE *new_imerge)
584{
585 for (SEL_TREE** or_tree= trees; or_tree != trees_next; or_tree++)
586 {
587 SEL_TREE *res_or_tree= 0;
588 SEL_TREE *and_tree= 0;
589 if (!(res_or_tree= new SEL_TREE(param->mem_root, param->keys)) ||
590 !(and_tree= new SEL_TREE(tree, TRUE, param)))
591 return (-1);
592 if (!and_range_trees(param, *or_tree, and_tree, res_or_tree))
593 {
594 if (new_imerge->or_sel_tree(param, res_or_tree))
595 return (-1);
596 }
597 }
598 return 0;
599}
600
601
602/*
603 Perform OR operation on this imerge and the range part of a tree
604
605 SYNOPSIS
606 or_sel_tree_with_checks()
607 param Context info for the operation
608 n_trees Number of trees in this imerge to check for oring
609 tree SEL_TREE whose range part is to be ored
610 is_first_check_pass <=> the first call of the function for this imerge
611 is_last_check_pass OUT <=> no more calls of the function for this imerge
612
613 DESCRIPTION
614 The function performs OR operation on this imerge m and the range part
615 of the SEL_TREE tree rt. It always replaces this imerge with the result
616 of the operation.
617
618 The operation can be performed in two different modes: with
619 is_first_check_pass==TRUE and is_first_check_pass==FALSE, transforming
620 this imerge differently.
621
622 Given this imerge represents the formula
623 RT_1 OR ... OR RT_k:
624
625 1. In the first mode, when is_first_check_pass==TRUE :
626 1.1. If rt must be ored(see the function sel_trees_must_be_ored) with
627 some rt_j (there may be only one such range tree in the imerge)
628 then the function produces an imerge representing the formula
629 RT_1 OR ... OR (RT_j OR RT) OR ... OR RT_k,
630 where the tree for (RT_j OR RT) is built by oring the pairs
631 of SEL_ARG trees for the corresponding indexes
632 1.2. Otherwise the function produces the imerge representing the formula:
633 RT_1 OR ... OR RT_k OR RT.
634
635 2. In the second mode, when is_first_check_pass==FALSE :
636 2.1. For each rt_j in the imerge that can be ored (see the function
637 sel_trees_can_be_ored) with rt the function replaces rt_j for a
638 range tree such that for each index for which ranges are defined
639 in both in rt_j and rt the tree contains the result of oring of
640 these ranges.
641 2.2. In other cases the function does not produce any imerge.
642
643 When is_first_check==TRUE the function returns FALSE in the parameter
644 is_last_check_pass if there is no rt_j such that rt_j can be ored with rt,
645 but, at the same time, it's not true that rt_j must be ored with rt.
646 When is_first_check==FALSE the function always returns FALSE in the
647 parameter is_last_check_pass.
648
649 RETURN
650 1 The result of oring of rt_j and rt that must be ored returns the
651 the range tree with type==SEL_TREE::ALWAYS
652 (in this case the imerge m should be discarded)
653 -1 The function runs out of memory
654 0 in all other cases
655*/
656
657int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
658 uint n_trees,
659 SEL_TREE *tree,
660 bool is_first_check_pass,
661 bool *is_last_check_pass)
662{
663 bool was_ored= FALSE;
664 *is_last_check_pass= is_first_check_pass;
665 SEL_TREE** or_tree = trees;
666 for (uint i= 0; i < n_trees; i++, or_tree++)
667 {
668 SEL_TREE *result= 0;
669 key_map result_keys;
670 key_map ored_keys;
671 if (sel_trees_can_be_ored(param, *or_tree, tree, &ored_keys))
672 {
673 bool must_be_ored= sel_trees_must_be_ored(param, *or_tree, tree,
674 ored_keys);
675 if (must_be_ored || !is_first_check_pass)
676 {
677 result_keys.clear_all();
678 result= *or_tree;
679 for (uint key_no= 0; key_no < param->keys; key_no++)
680 {
681 if (!ored_keys.is_set(key_no))
682 {
683 result->keys[key_no]= 0;
684 continue;
685 }
686 SEL_ARG *key1= (*or_tree)->keys[key_no];
687 SEL_ARG *key2= tree->keys[key_no];
688 key2->incr_refs();
689 if ((result->keys[key_no]= key_or(param, key1, key2)))
690 {
691
692 result_keys.set_bit(key_no);
693#ifdef EXTRA_DEBUG
694 if (param->alloced_sel_args < SEL_ARG::MAX_SEL_ARGS)
695 {
696 key1= result->keys[key_no];
697 (key1)->test_use_count(key1);
698 }
699#endif
700 }
701 }
702 }
703 else if(is_first_check_pass)
704 *is_last_check_pass= FALSE;
705 }
706
707 if (result)
708 {
709 result->keys_map= result_keys;
710 if (result_keys.is_clear_all())
711 result->type= SEL_TREE::ALWAYS;
712 if ((result->type == SEL_TREE::MAYBE) ||
713 (result->type == SEL_TREE::ALWAYS))
714 return 1;
715 /* SEL_TREE::IMPOSSIBLE is impossible here */
716 *or_tree= result;
717 was_ored= TRUE;
718 }
719 }
720 if (was_ored)
721 return 0;
722
723 if (is_first_check_pass && !*is_last_check_pass &&
724 !(tree= new SEL_TREE(tree, FALSE, param)))
725 return (-1);
726 return or_sel_tree(param, tree);
727}
728
729
730/*
731 Perform OR operation on this imerge and and another imerge
732
733 SYNOPSIS
734 or_sel_imerge_with_checks()
735 param Context info for the operation
736 n_trees Number of trees in this imerge to check for oring
737 imerge The second operand of the operation
738 is_first_check_pass <=> the first call of the function for this imerge
739 is_last_check_pass OUT <=> no more calls of the function for this imerge
740
741 DESCRIPTION
742 For each range tree rt from 'imerge' the function calls the method
743 SEL_IMERGE::or_sel_tree_with_checks that performs OR operation on this
744 SEL_IMERGE object m and the tree rt. The mode of the operation is
745 specified by the parameter is_first_check_pass. Each call of
746 SEL_IMERGE::or_sel_tree_with_checks transforms this SEL_IMERGE object m.
747 The function returns FALSE in the prameter is_last_check_pass if
748 at least one of the calls of SEL_IMERGE::or_sel_tree_with_checks
749 returns FALSE as the value of its last parameter.
750
751 RETURN
752 1 One of the calls of SEL_IMERGE::or_sel_tree_with_checks returns 1.
753 (in this case the imerge m should be discarded)
754 -1 The function runs out of memory
755 0 in all other cases
756*/
757
758int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param,
759 uint n_trees,
760 SEL_IMERGE* imerge,
761 bool is_first_check_pass,
762 bool *is_last_check_pass)
763{
764 *is_last_check_pass= TRUE;
765 SEL_TREE** tree= imerge->trees;
766 SEL_TREE** tree_end= imerge->trees_next;
767 for ( ; tree < tree_end; tree++)
768 {
769 uint rc;
770 bool is_last= TRUE;
771 rc= or_sel_tree_with_checks(param, n_trees, *tree,
772 is_first_check_pass, &is_last);
773 if (!is_last)
774 *is_last_check_pass= FALSE;
775 if (rc)
776 return rc;
777 }
778 return 0;
779}
780
781
782/*
783 Copy constructor for SEL_TREE objects
784
785 SYNOPSIS
786 SEL_TREE
787 arg The source tree for the constructor
788 without_merges <=> only the range part of the tree arg is copied
789 param Context info for the operation
790
791 DESCRIPTION
792 The constructor creates a full copy of the SEL_TREE arg if
793 the prameter without_merges==FALSE. Otherwise a tree is created
794 that contains the copy only of the range part of the tree arg.
795*/
796
797SEL_TREE::SEL_TREE(SEL_TREE *arg, bool without_merges,
798 RANGE_OPT_PARAM *param)
799 : Sql_alloc(),
800 keys(param->mem_root, param->keys),
801 n_ror_scans(0)
802{
803 keys_map= arg->keys_map;
804 type= arg->type;
805 MEM_ROOT *mem_root;
806
807 for (uint idx= 0; idx < param->keys; idx++)
808 {
809 if ((keys[idx]= arg->keys[idx]))
810 keys[idx]->incr_refs_all();
811 }
812
813 if (without_merges)
814 return;
815
816 mem_root= current_thd->mem_root;
817 List_iterator<SEL_IMERGE> it(arg->merges);
818 for (SEL_IMERGE *el= it++; el; el= it++)
819 {
820 SEL_IMERGE *merge= new (mem_root) SEL_IMERGE(el, 0, param);
821 if (!merge || merge->trees == merge->trees_next)
822 {
823 merges.empty();
824 return;
825 }
826 merges.push_back(merge, mem_root);
827 }
828}
829
830
831/*
832 Copy constructor for SEL_IMERGE objects
833
834 SYNOPSIS
835 SEL_IMERGE
836 arg The source imerge for the constructor
837 cnt How many trees from arg are to be copied
838 param Context info for the operation
839
840 DESCRIPTION
841 The cnt==0 then the constructor creates a full copy of the
842 imerge arg. Otherwise only the first cnt trees of the imerge
843 are copied.
844*/
845
846SEL_IMERGE::SEL_IMERGE(SEL_IMERGE *arg, uint cnt,
847 RANGE_OPT_PARAM *param) : Sql_alloc()
848{
849 size_t elements= (arg->trees_end - arg->trees);
850 if (elements > PREALLOCED_TREES)
851 {
852 size_t size= elements * sizeof (SEL_TREE **);
853 if (!(trees= (SEL_TREE **)alloc_root(param->mem_root, size)))
854 goto mem_err;
855 }
856 else
857 trees= &trees_prealloced[0];
858
859 trees_next= trees + (cnt ? cnt : arg->trees_next-arg->trees);
860 trees_end= trees + elements;
861
862 for (SEL_TREE **tree = trees, **arg_tree= arg->trees; tree < trees_next;
863 tree++, arg_tree++)
864 {
865 if (!(*tree= new SEL_TREE(*arg_tree, TRUE, param)))
866 goto mem_err;
867 }
868
869 return;
870
871mem_err:
872 trees= &trees_prealloced[0];
873 trees_next= trees;
874 trees_end= trees;
875}
876
877
878/*
879 Perform AND operation on two imerge lists
880
881 SYNOPSIS
882 imerge_list_and_list()
883 param Context info for the operation
884 im1 The first imerge list for the operation
885 im2 The second imerge list for the operation
886
887 DESCRIPTION
888 The function just appends the imerge list im2 to the imerge list im1
889
890 RETURN VALUE
891 none
892*/
893
894inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
895{
896 im1->append(im2);
897}
898
899
900/*
901 Perform OR operation on two imerge lists
902
903 SYNOPSIS
904 imerge_list_or_list()
905 param Context info for the operation
906 im1 The first imerge list for the operation
907 im2 The second imerge list for the operation
908
909 DESCRIPTION
910 Assuming that the first imerge list represents the formula
911 F1= M1_1 AND ... AND M1_k1
912 while the second imerge list represents the formula
913 F2= M2_1 AND ... AND M2_k2,
914 where M1_i= RT1_i_1 OR ... OR RT1_i_l1i (i in [1..k1])
915 and M2_i = RT2_i_1 OR ... OR RT2_i_l2i (i in [1..k2]),
916 the function builds a list of imerges for some formula that can be
917 inferred from the formula (F1 OR F2).
918
919 More exactly the function builds imerges for the formula (M1_1 OR M2_1).
920 Note that
921 (F1 OR F2) = (M1_1 AND ... AND M1_k1) OR (M2_1 AND ... AND M2_k2) =
922 AND (M1_i OR M2_j) (i in [1..k1], j in [1..k2]) =>
923 M1_1 OR M2_1.
924 So (M1_1 OR M2_1) is indeed an inference formula for (F1 OR F2).
925
926 To build imerges for the formula (M1_1 OR M2_1) the function invokes,
927 possibly twice, the method SEL_IMERGE::or_sel_imerge_with_checks
928 for the imerge m1_1.
929 At its first invocation the method SEL_IMERGE::or_sel_imerge_with_checks
930 performs OR operation on the imerge m1_1 and the range tree rt2_1_1 by
931 calling SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==TRUE.
932 The resulting imerge of the operation is ored with the next range tree of
933 the imerge m2_1. This oring continues until the last range tree from
934 m2_1 has been ored.
935 At its second invocation the method SEL_IMERGE::or_sel_imerge_with_checks
936 performs the same sequence of OR operations, but now calling
937 SEL_IMERGE::or_sel_tree_with_checks with is_first_pass_check==FALSE.
938
939 The imerges that the operation produces replace those in the list im1
940
941 RETURN
942 0 if the operation is a success
943 -1 if the function has run out of memory
944*/
945
946int imerge_list_or_list(RANGE_OPT_PARAM *param,
947 List<SEL_IMERGE> *im1,
948 List<SEL_IMERGE> *im2)
949{
950
951 uint rc;
952 bool is_last_check_pass= FALSE;
953 SEL_IMERGE *imerge= im1->head();
954 uint elems= (uint)(imerge->trees_next-imerge->trees);
955 MEM_ROOT *mem_root= current_thd->mem_root;
956
957 im1->empty();
958 im1->push_back(imerge, mem_root);
959
960 rc= imerge->or_sel_imerge_with_checks(param, elems, im2->head(),
961 TRUE, &is_last_check_pass);
962 if (rc)
963 {
964 if (rc == 1)
965 {
966 im1->empty();
967 rc= 0;
968 }
969 return rc;
970 }
971
972 if (!is_last_check_pass)
973 {
974 SEL_IMERGE* new_imerge= new (mem_root) SEL_IMERGE(imerge, elems, param);
975 if (new_imerge)
976 {
977 is_last_check_pass= TRUE;
978 rc= new_imerge->or_sel_imerge_with_checks(param, elems, im2->head(),
979 FALSE, &is_last_check_pass);
980 if (!rc)
981 im1->push_back(new_imerge, mem_root);
982 }
983 }
984 return rc;
985}
986
987
988/*
989 Perform OR operation for each imerge from a list and the range part of a tree
990
991 SYNOPSIS
992 imerge_list_or_tree()
993 param Context info for the operation
994 merges The list of imerges to be ored with the range part of tree
995 tree SEL_TREE whose range part is to be ored with the imerges
996
997 DESCRIPTION
998 For each imerge mi from the list 'merges' the function performes OR
999 operation with mi and the range part of 'tree' rt, producing one or
1000 two imerges.
1001
1002 Given the merge mi represent the formula RTi_1 OR ... OR RTi_k,
1003 the function forms the merges by the following rules:
1004
1005 1. If rt cannot be ored with any of the trees rti the function just
1006 produces an imerge that represents the formula
1007 RTi_1 OR ... RTi_k OR RT.
1008 2. If there exist a tree rtj that must be ored with rt the function
1009 produces an imerge the represents the formula
1010 RTi_1 OR ... OR (RTi_j OR RT) OR ... OR RTi_k,
1011 where the range tree for (RTi_j OR RT) is constructed by oring the
1012 SEL_ARG trees that must be ored.
1013 3. For each rti_j that can be ored with rt the function produces
1014 the new tree rti_j' and substitutes rti_j for this new range tree.
1015
1016 In any case the function removes mi from the list and then adds all
1017 produced imerges.
1018
1019 To build imerges by rules 1-3 the function calls the method
1020 SEL_IMERGE::or_sel_tree_with_checks, possibly twice. With the first
1021 call it passes TRUE for the third parameter of the function.
1022 At this first call imerges by rules 1-2 are built. If the call
1023 returns FALSE as the return value of its fourth parameter then the
1024 function are called for the second time. At this call the imerge
1025 of rule 3 is produced.
1026
1027 If a call of SEL_IMERGE::or_sel_tree_with_checks returns 1 then
1028 then it means that the produced tree contains an always true
1029 range tree and the whole imerge can be discarded.
1030
1031 RETURN
1032 1 if no imerges are produced
1033 0 otherwise
1034*/
1035
1036static
1037int imerge_list_or_tree(RANGE_OPT_PARAM *param,
1038 List<SEL_IMERGE> *merges,
1039 SEL_TREE *tree)
1040{
1041 SEL_IMERGE *imerge;
1042 List<SEL_IMERGE> additional_merges;
1043 List_iterator<SEL_IMERGE> it(*merges);
1044 MEM_ROOT *mem_root= current_thd->mem_root;
1045
1046 while ((imerge= it++))
1047 {
1048 bool is_last_check_pass;
1049 int rc= 0;
1050 int rc1= 0;
1051 SEL_TREE *or_tree= new (mem_root) SEL_TREE (tree, FALSE, param);
1052 if (or_tree)
1053 {
1054 uint elems= (uint)(imerge->trees_next-imerge->trees);
1055 rc= imerge->or_sel_tree_with_checks(param, elems, or_tree,
1056 TRUE, &is_last_check_pass);
1057 if (!is_last_check_pass)
1058 {
1059 SEL_IMERGE *new_imerge= new (mem_root) SEL_IMERGE(imerge, elems,
1060 param);
1061 if (new_imerge)
1062 {
1063 rc1= new_imerge->or_sel_tree_with_checks(param, elems, or_tree,
1064 FALSE, &is_last_check_pass);
1065 if (!rc1)
1066 additional_merges.push_back(new_imerge, mem_root);
1067 }
1068 }
1069 }
1070 if (rc || rc1 || !or_tree)
1071 it.remove();
1072 }
1073
1074 merges->append(&additional_merges);
1075 return merges->is_empty();
1076}
1077
1078
1079/*
1080 Perform pushdown operation of the range part of a tree into given imerges
1081
1082 SYNOPSIS
1083 imerge_list_and_tree()
1084 param Context info for the operation
1085 merges IN/OUT List of imerges to push the range part of 'tree' into
1086 tree SEL_TREE whose range part is to be pushed into imerges
1087 replace if the pushdow operation for a imerge is a success
1088 then the original imerge is replaced for the result
1089 of the pushdown
1090
1091 DESCRIPTION
1092 For each imerge from the list merges the function pushes the range part
1093 rt of 'tree' into the imerge.
1094 More exactly if the imerge mi from the list represents the formula
1095 RTi_1 OR ... OR RTi_k
1096 the function bulds a new imerge that represents the formula
1097 (RTi_1 AND RT) OR ... OR (RTi_k AND RT)
1098 and adds this imerge to the list merges.
1099 To perform this pushdown operation the function calls the method
1100 SEL_IMERGE::and_sel_tree.
1101 For any imerge mi the new imerge is not created if for each pair of
1102 trees rti_j and rt the intersection of the indexes with defined ranges
1103 is empty.
1104 If the result of the pushdown operation for the imerge mi returns an
1105 imerge with no trees then then not only nothing is added to the list
1106 merges but mi itself is removed from the list.
1107
1108 TODO
1109 Optimize the code in order to not create new SEL_IMERGE and new SER_TREE
1110 objects when 'replace' is TRUE. (Currently this function is called always
1111 with this parameter equal to TRUE.)
1112
1113 RETURN
1114 1 if no imerges are left in the list merges
1115 0 otherwise
1116*/
1117
1118static
1119int imerge_list_and_tree(RANGE_OPT_PARAM *param,
1120 List<SEL_IMERGE> *merges,
1121 SEL_TREE *tree,
1122 bool replace)
1123{
1124 SEL_IMERGE *imerge;
1125 SEL_IMERGE *new_imerge= NULL;
1126 List<SEL_IMERGE> new_merges;
1127 List_iterator<SEL_IMERGE> it(*merges);
1128 MEM_ROOT *mem_root= current_thd->mem_root;
1129
1130 while ((imerge= it++))
1131 {
1132 if (!new_imerge)
1133 new_imerge= new (mem_root) SEL_IMERGE();
1134 if (imerge->have_common_keys(param, tree) &&
1135 new_imerge && !imerge->and_sel_tree(param, tree, new_imerge))
1136 {
1137 if (new_imerge->trees == new_imerge->trees_next)
1138 it.remove();
1139 else
1140 {
1141 if (replace)
1142 it.replace(new_imerge);
1143 else
1144 new_merges.push_back(new_imerge, mem_root);
1145 new_imerge= NULL;
1146 }
1147 }
1148 }
1149 imerge_list_and_list(&new_merges, merges);
1150 *merges= new_merges;
1151 return merges->is_empty();
1152}
1153
1154
1155/***************************************************************************
1156** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT
1157***************************************************************************/
1158
1159 /* make a select from mysql info
1160 Error is set as following:
1161 0 = ok
1162 1 = Got some error (out of memory?)
1163 */
1164
1165SQL_SELECT *make_select(TABLE *head, table_map const_tables,
1166 table_map read_tables, COND *conds,
1167 SORT_INFO *filesort,
1168 bool allow_null_cond,
1169 int *error)
1170{
1171 SQL_SELECT *select;
1172 DBUG_ENTER("make_select");
1173
1174 *error=0;
1175
1176 if (!conds && !allow_null_cond)
1177 DBUG_RETURN(0);
1178 if (!(select= new (head->in_use->mem_root) SQL_SELECT))
1179 {
1180 *error= 1; // out of memory
1181 DBUG_RETURN(0); /* purecov: inspected */
1182 }
1183 select->read_tables=read_tables;
1184 select->const_tables=const_tables;
1185 select->head=head;
1186 select->cond= conds;
1187
1188 if (filesort && my_b_inited(&filesort->io_cache))
1189 {
1190 /*
1191 Hijack the filesort io_cache for make_select
1192 SQL_SELECT will be responsible for ensuring that it's properly freed.
1193 */
1194 select->file= filesort->io_cache;
1195 select->records=(ha_rows) (select->file.end_of_file/
1196 head->file->ref_length);
1197 my_b_clear(&filesort->io_cache);
1198 }
1199 DBUG_RETURN(select);
1200}
1201
1202
1203SQL_SELECT::SQL_SELECT() :quick(0),cond(0),pre_idx_push_select_cond(NULL),free_cond(0)
1204{
1205 quick_keys.clear_all(); needed_reg.clear_all();
1206 my_b_clear(&file);
1207}
1208
1209
1210void SQL_SELECT::cleanup()
1211{
1212 delete quick;
1213 quick= 0;
1214 if (free_cond)
1215 {
1216 free_cond=0;
1217 delete cond;
1218 cond= 0;
1219 }
1220 close_cached_file(&file);
1221}
1222
1223
1224SQL_SELECT::~SQL_SELECT()
1225{
1226 cleanup();
1227}
1228
1229#undef index // Fix for Unixware 7
1230
1231QUICK_SELECT_I::QUICK_SELECT_I()
1232 :max_used_key_length(0),
1233 used_key_parts(0)
1234{}
1235
1236QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
1237 bool no_alloc, MEM_ROOT *parent_alloc,
1238 bool *create_error)
1239 :thd(thd), no_alloc(no_alloc), parent_alloc(parent_alloc),
1240 free_file(0),cur_range(NULL),last_range(0),dont_free(0)
1241{
1242 my_bitmap_map *bitmap;
1243 DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
1244
1245 in_ror_merged_scan= 0;
1246 index= key_nr;
1247 head= table;
1248 key_part_info= head->key_info[index].key_part;
1249
1250 /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
1251 mrr_buf_size= thd->variables.mrr_buff_size;
1252 mrr_buf_desc= NULL;
1253
1254 if (!no_alloc && !parent_alloc)
1255 {
1256 // Allocates everything through the internal memroot
1257 init_sql_alloc(&alloc, "QUICK_RANGE_SELECT",
1258 thd->variables.range_alloc_block_size, 0,
1259 MYF(MY_THREAD_SPECIFIC));
1260 thd->mem_root= &alloc;
1261 }
1262 else
1263 bzero((char*) &alloc,sizeof(alloc));
1264 file= head->file;
1265 record= head->record[0];
1266
1267 my_init_dynamic_array2(&ranges, sizeof(QUICK_RANGE*),
1268 thd->alloc(sizeof(QUICK_RANGE*) * 16), 16, 16,
1269 MYF(MY_THREAD_SPECIFIC));
1270
1271 /* Allocate a bitmap for used columns */
1272 if (!(bitmap= (my_bitmap_map*) thd->alloc(head->s->column_bitmap_size)))
1273 {
1274 column_bitmap.bitmap= 0;
1275 *create_error= 1;
1276 }
1277 else
1278 my_bitmap_init(&column_bitmap, bitmap, head->s->fields, FALSE);
1279 DBUG_VOID_RETURN;
1280}
1281
1282
1283void QUICK_RANGE_SELECT::need_sorted_output()
1284{
1285 if (!(mrr_flags & HA_MRR_SORTED))
1286 {
1287 /*
1288 Native implementation can't produce sorted output. We'll have to
1289 switch to default
1290 */
1291 mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
1292 }
1293 mrr_flags |= HA_MRR_SORTED;
1294}
1295
1296
1297int QUICK_RANGE_SELECT::init()
1298{
1299 DBUG_ENTER("QUICK_RANGE_SELECT::init");
1300
1301 if (file->inited != handler::NONE)
1302 file->ha_index_or_rnd_end();
1303 DBUG_RETURN(FALSE);
1304}
1305
1306
1307void QUICK_RANGE_SELECT::range_end()
1308{
1309 if (file->inited != handler::NONE)
1310 file->ha_index_or_rnd_end();
1311}
1312
1313
1314QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
1315{
1316 DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
1317 if (!dont_free)
1318 {
1319 /* file is NULL for CPK scan on covering ROR-intersection */
1320 if (file)
1321 {
1322 range_end();
1323 file->ha_end_keyread();
1324 if (free_file)
1325 {
1326 DBUG_PRINT("info", ("Freeing separate handler %p (free: %d)", file,
1327 free_file));
1328 file->ha_external_lock(current_thd, F_UNLCK);
1329 file->ha_close();
1330 delete file;
1331 }
1332 }
1333 delete_dynamic(&ranges); /* ranges are allocated in alloc */
1334 free_root(&alloc,MYF(0));
1335 }
1336 my_free(mrr_buf_desc);
1337 DBUG_VOID_RETURN;
1338}
1339
1340/*
1341 QUICK_INDEX_SORT_SELECT works as follows:
1342 - Do index scans, accumulate rowids in the Unique object
1343 (Unique will also sort and de-duplicate rowids)
1344 - Use rowids from unique to run a disk-ordered sweep
1345*/
1346
1347QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT(THD *thd_param,
1348 TABLE *table)
1349 :unique(NULL), pk_quick_select(NULL), thd(thd_param)
1350{
1351 DBUG_ENTER("QUICK_INDEX_SORT_SELECT::QUICK_INDEX_SORT_SELECT");
1352 index= MAX_KEY;
1353 head= table;
1354 bzero(&read_record, sizeof(read_record));
1355 init_sql_alloc(&alloc, "QUICK_INDEX_SORT_SELECT",
1356 thd->variables.range_alloc_block_size, 0,
1357 MYF(MY_THREAD_SPECIFIC));
1358 DBUG_VOID_RETURN;
1359}
1360
1361int QUICK_INDEX_SORT_SELECT::init()
1362{
1363 DBUG_ENTER("QUICK_INDEX_SORT_SELECT::init");
1364 DBUG_RETURN(0);
1365}
1366
1367int QUICK_INDEX_SORT_SELECT::reset()
1368{
1369 DBUG_ENTER("QUICK_INDEX_SORT_SELECT::reset");
1370 const int retval= read_keys_and_merge();
1371 DBUG_RETURN(retval);
1372}
1373
1374bool
1375QUICK_INDEX_SORT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
1376{
1377 DBUG_ENTER("QUICK_INDEX_SORT_SELECT::push_quick_back");
1378 if (head->file->primary_key_is_clustered() &&
1379 quick_sel_range->index == head->s->primary_key)
1380 {
1381 /*
1382 A quick_select over a clustered primary key is handled specifically
1383 Here we assume:
1384 - PK columns are included in any other merged index
1385 - Scan on the PK is disk-ordered.
1386 (not meeting #2 will only cause performance degradation)
1387
1388 We could treat clustered PK as any other index, but that would
1389 be inefficient. There is no point in doing scan on
1390 CPK, remembering the rowid, then making rnd_pos() call with
1391 that rowid.
1392 */
1393 pk_quick_select= quick_sel_range;
1394 DBUG_RETURN(0);
1395 }
1396 DBUG_RETURN(quick_selects.push_back(quick_sel_range, thd->mem_root));
1397}
1398
1399QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT()
1400{
1401 List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
1402 QUICK_RANGE_SELECT* quick;
1403 DBUG_ENTER("QUICK_INDEX_SORT_SELECT::~QUICK_INDEX_SORT_SELECT");
1404 delete unique;
1405 quick_it.rewind();
1406 while ((quick= quick_it++))
1407 quick->file= NULL;
1408 quick_selects.delete_elements();
1409 delete pk_quick_select;
1410 /* It's ok to call the next two even if they are already deinitialized */
1411 end_read_record(&read_record);
1412 free_root(&alloc,MYF(0));
1413 DBUG_VOID_RETURN;
1414}
1415
1416QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
1417 TABLE *table,
1418 bool retrieve_full_rows,
1419 MEM_ROOT *parent_alloc)
1420 : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows),
1421 scans_inited(FALSE)
1422{
1423 index= MAX_KEY;
1424 head= table;
1425 record= head->record[0];
1426 if (!parent_alloc)
1427 init_sql_alloc(&alloc, "QUICK_ROR_INTERSECT_SELECT",
1428 thd->variables.range_alloc_block_size, 0,
1429 MYF(MY_THREAD_SPECIFIC));
1430 else
1431 bzero(&alloc, sizeof(MEM_ROOT));
1432 last_rowid= (uchar*) alloc_root(parent_alloc? parent_alloc : &alloc,
1433 head->file->ref_length);
1434}
1435
1436
1437/*
1438 Do post-constructor initialization.
1439 SYNOPSIS
1440 QUICK_ROR_INTERSECT_SELECT::init()
1441
1442 RETURN
1443 0 OK
1444 other Error code
1445*/
1446
1447int QUICK_ROR_INTERSECT_SELECT::init()
1448{
1449 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init");
1450 /* Check if last_rowid was successfully allocated in ctor */
1451 DBUG_RETURN(!last_rowid);
1452}
1453
1454
1455/*
1456 Initialize this quick select to be a ROR-merged scan.
1457
1458 SYNOPSIS
1459 QUICK_RANGE_SELECT::init_ror_merged_scan()
1460 reuse_handler If TRUE, use head->file, otherwise create a separate
1461 handler object
1462
1463 NOTES
1464 This function creates and prepares for subsequent use a separate handler
1465 object if it can't reuse head->file. The reason for this is that during
1466 ROR-merge several key scans are performed simultaneously, and a single
1467 handler is only capable of preserving context of a single key scan.
1468
1469 In ROR-merge the quick select doing merge does full records retrieval,
1470 merged quick selects read only keys.
1471
1472 RETURN
1473 0 ROR child scan initialized, ok to use.
1474 1 error
1475*/
1476
1477int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler,
1478 MEM_ROOT *local_alloc)
1479{
1480 handler *save_file= file, *org_file;
1481 THD *thd= head->in_use;
1482 MY_BITMAP * const save_vcol_set= head->vcol_set;
1483 MY_BITMAP * const save_read_set= head->read_set;
1484 MY_BITMAP * const save_write_set= head->write_set;
1485 DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
1486
1487 in_ror_merged_scan= 1;
1488 if (reuse_handler)
1489 {
1490 DBUG_PRINT("info", ("Reusing handler %p", file));
1491 if (init())
1492 {
1493 DBUG_RETURN(1);
1494 }
1495 goto end;
1496 }
1497
1498 /* Create a separate handler object for this quick select */
1499 if (free_file)
1500 {
1501 /* already have own 'handler' object. */
1502 DBUG_RETURN(0);
1503 }
1504
1505 if (!(file= head->file->clone(head->s->normalized_path.str, local_alloc)))
1506 {
1507 /*
1508 Manually set the error flag. Note: there seems to be quite a few
1509 places where a failure could cause the server to "hang" the client by
1510 sending no response to a query. ATM those are not real errors because
1511 the storage engine calls in question happen to never fail with the
1512 existing storage engines.
1513 */
1514 my_error(ER_OUT_OF_RESOURCES, MYF(0)); /* purecov: inspected */
1515 /* Caller will free the memory */
1516 goto failure; /* purecov: inspected */
1517 }
1518
1519 if (file->ha_external_lock(thd, F_RDLCK))
1520 goto failure;
1521
1522 if (init())
1523 {
1524 file->ha_external_lock(thd, F_UNLCK);
1525 file->ha_close();
1526 goto failure;
1527 }
1528 free_file= TRUE;
1529 last_rowid= file->ref;
1530
1531end:
1532 /*
1533 We are only going to read key fields and call position() on 'file'
1534 The following sets head->read_set (== column_bitmap) to only use this
1535 key. The 'column_bitmap' is used in ::get_next()
1536 */
1537 org_file= head->file;
1538 head->file= file;
1539
1540 head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap, &column_bitmap);
1541 head->prepare_for_keyread(index, &column_bitmap);
1542 head->prepare_for_position();
1543
1544 head->file= org_file;
1545
1546 /* Restore head->read_set (and write_set) to what they had before the call */
1547 head->column_bitmaps_set(save_read_set, save_write_set, save_vcol_set);
1548
1549 if (reset())
1550 {
1551 if (!reuse_handler)
1552 {
1553 file->ha_external_lock(thd, F_UNLCK);
1554 file->ha_close();
1555 goto failure;
1556 }
1557 DBUG_RETURN(1);
1558 }
1559 DBUG_RETURN(0);
1560
1561failure:
1562 head->column_bitmaps_set(save_read_set, save_write_set, save_vcol_set);
1563 delete file;
1564 file= save_file;
1565 DBUG_RETURN(1);
1566}
1567
1568
1569/*
1570 Initialize this quick select to be a part of a ROR-merged scan.
1571 SYNOPSIS
1572 QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
1573 reuse_handler If TRUE, use head->file, otherwise create separate
1574 handler object.
1575 RETURN
1576 0 OK
1577 other error code
1578*/
1579int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler,
1580 MEM_ROOT *local_alloc)
1581{
1582 List_iterator_fast<QUICK_SELECT_WITH_RECORD> quick_it(quick_selects);
1583 QUICK_SELECT_WITH_RECORD *cur;
1584 QUICK_RANGE_SELECT *quick;
1585 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
1586
1587 /* Initialize all merged "children" quick selects */
1588 DBUG_ASSERT(!need_to_fetch_row || reuse_handler);
1589 if (!need_to_fetch_row && reuse_handler)
1590 {
1591 cur= quick_it++;
1592 quick= cur->quick;
1593 /*
1594 There is no use of this->file. Use it for the first of merged range
1595 selects.
1596 */
1597 int error= quick->init_ror_merged_scan(TRUE, local_alloc);
1598 if (unlikely(error))
1599 DBUG_RETURN(error);
1600 quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1601 }
1602 while ((cur= quick_it++))
1603 {
1604 quick= cur->quick;
1605#ifndef DBUG_OFF
1606 const MY_BITMAP * const save_read_set= quick->head->read_set;
1607 const MY_BITMAP * const save_write_set= quick->head->write_set;
1608#endif
1609 if (quick->init_ror_merged_scan(FALSE, local_alloc))
1610 DBUG_RETURN(1);
1611 quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1612
1613 // Sets are shared by all members of "quick_selects" so must not change
1614#ifndef DBUG_OFF
1615 DBUG_ASSERT(quick->head->read_set == save_read_set);
1616 DBUG_ASSERT(quick->head->write_set == save_write_set);
1617#endif
1618 /* All merged scans share the same record buffer in intersection. */
1619 quick->record= head->record[0];
1620 }
1621
1622 if (need_to_fetch_row &&
1623 unlikely(head->file->ha_rnd_init_with_error(false)))
1624 {
1625 DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1626 DBUG_RETURN(1);
1627 }
1628 DBUG_RETURN(0);
1629}
1630
1631
1632/*
1633 Initialize quick select for row retrieval.
1634 SYNOPSIS
1635 reset()
1636 RETURN
1637 0 OK
1638 other Error code
1639*/
1640
1641int QUICK_ROR_INTERSECT_SELECT::reset()
1642{
1643 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
1644 if (!scans_inited && init_ror_merged_scan(TRUE, &alloc))
1645 DBUG_RETURN(1);
1646 scans_inited= TRUE;
1647 List_iterator_fast<QUICK_SELECT_WITH_RECORD> it(quick_selects);
1648 QUICK_SELECT_WITH_RECORD *qr;
1649 while ((qr= it++))
1650 qr->quick->reset();
1651 DBUG_RETURN(0);
1652}
1653
1654
1655/*
1656 Add a merged quick select to this ROR-intersection quick select.
1657
1658 SYNOPSIS
1659 QUICK_ROR_INTERSECT_SELECT::push_quick_back()
1660 alloc Mem root to create auxiliary structures on
1661 quick Quick select to be added. The quick select must return
1662 rows in rowid order.
1663 NOTES
1664 This call can only be made before init() is called.
1665
1666 RETURN
1667 FALSE OK
1668 TRUE Out of memory.
1669*/
1670
1671bool
1672QUICK_ROR_INTERSECT_SELECT::push_quick_back(MEM_ROOT *local_alloc,
1673 QUICK_RANGE_SELECT *quick)
1674{
1675 QUICK_SELECT_WITH_RECORD *qr;
1676 if (!(qr= new QUICK_SELECT_WITH_RECORD) ||
1677 !(qr->key_tuple= (uchar*)alloc_root(local_alloc,
1678 quick->max_used_key_length)))
1679 return TRUE;
1680 qr->quick= quick;
1681 return quick_selects.push_back(qr);
1682}
1683
1684
1685QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
1686{
1687 DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
1688 quick_selects.delete_elements();
1689 delete cpk_quick;
1690 free_root(&alloc,MYF(0));
1691 if (need_to_fetch_row && head->file->inited != handler::NONE)
1692 head->file->ha_rnd_end();
1693 DBUG_VOID_RETURN;
1694}
1695
1696
1697QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
1698 TABLE *table)
1699 : thd(thd_param), scans_inited(FALSE)
1700{
1701 index= MAX_KEY;
1702 head= table;
1703 rowid_length= table->file->ref_length;
1704 record= head->record[0];
1705 init_sql_alloc(&alloc, "QUICK_ROR_UNION_SELECT",
1706 thd->variables.range_alloc_block_size, 0,
1707 MYF(MY_THREAD_SPECIFIC));
1708 thd_param->mem_root= &alloc;
1709}
1710
1711
1712/*
1713 Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority
1714 queue.
1715
1716 SYNPOSIS
1717 QUICK_ROR_UNION_SELECT_queue_cmp()
1718 arg Pointer to QUICK_ROR_UNION_SELECT
1719 val1 First merged select
1720 val2 Second merged select
1721*/
1722
1723C_MODE_START
1724
1725static int QUICK_ROR_UNION_SELECT_queue_cmp(void *arg, uchar *val1, uchar *val2)
1726{
1727 QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;
1728 return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
1729 ((QUICK_SELECT_I*)val2)->last_rowid);
1730}
1731
1732C_MODE_END
1733
1734
1735/*
1736 Do post-constructor initialization.
1737 SYNOPSIS
1738 QUICK_ROR_UNION_SELECT::init()
1739
1740 RETURN
1741 0 OK
1742 other Error code
1743*/
1744
1745int QUICK_ROR_UNION_SELECT::init()
1746{
1747 DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
1748 if (init_queue(&queue, quick_selects.elements, 0,
1749 FALSE , QUICK_ROR_UNION_SELECT_queue_cmp,
1750 (void*) this, 0, 0))
1751 {
1752 bzero(&queue, sizeof(QUEUE));
1753 DBUG_RETURN(1);
1754 }
1755
1756 if (!(cur_rowid= (uchar*) alloc_root(&alloc, 2*head->file->ref_length)))
1757 DBUG_RETURN(1);
1758 prev_rowid= cur_rowid + head->file->ref_length;
1759 DBUG_RETURN(0);
1760}
1761
1762
1763/*
1764 Initialize quick select for row retrieval.
1765 SYNOPSIS
1766 reset()
1767
1768 RETURN
1769 0 OK
1770 other Error code
1771*/
1772
1773int QUICK_ROR_UNION_SELECT::reset()
1774{
1775 QUICK_SELECT_I *quick;
1776 int error;
1777 DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
1778 have_prev_rowid= FALSE;
1779 if (!scans_inited)
1780 {
1781 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1782 while ((quick= it++))
1783 {
1784 if (quick->init_ror_merged_scan(FALSE, &alloc))
1785 DBUG_RETURN(1);
1786 }
1787 scans_inited= TRUE;
1788 }
1789 queue_remove_all(&queue);
1790 /*
1791 Initialize scans for merged quick selects and put all merged quick
1792 selects into the queue.
1793 */
1794 List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
1795 while ((quick= it++))
1796 {
1797 if (unlikely((error= quick->reset())))
1798 DBUG_RETURN(error);
1799 if (unlikely((error= quick->get_next())))
1800 {
1801 if (error == HA_ERR_END_OF_FILE)
1802 continue;
1803 DBUG_RETURN(error);
1804 }
1805 quick->save_last_pos();
1806 queue_insert(&queue, (uchar*)quick);
1807 }
1808 /* Prepare for ha_rnd_pos calls. */
1809 if (head->file->inited && unlikely((error= head->file->ha_rnd_end())))
1810 {
1811 DBUG_PRINT("error", ("ROR index_merge rnd_end call failed"));
1812 DBUG_RETURN(error);
1813 }
1814 if (unlikely((error= head->file->ha_rnd_init(false))))
1815 {
1816 DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
1817 DBUG_RETURN(error);
1818 }
1819
1820 DBUG_RETURN(0);
1821}
1822
1823
1824bool
1825QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
1826{
1827 return quick_selects.push_back(quick_sel_range);
1828}
1829
1830QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
1831{
1832 DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
1833 delete_queue(&queue);
1834 quick_selects.delete_elements();
1835 if (head->file->inited != handler::NONE)
1836 head->file->ha_rnd_end();
1837 free_root(&alloc,MYF(0));
1838 DBUG_VOID_RETURN;
1839}
1840
1841
1842QUICK_RANGE::QUICK_RANGE()
1843 :min_key(0),max_key(0),min_length(0),max_length(0),
1844 flag(NO_MIN_RANGE | NO_MAX_RANGE),
1845 min_keypart_map(0), max_keypart_map(0)
1846{}
1847
1848SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
1849{
1850 type=arg.type;
1851 min_flag=arg.min_flag;
1852 max_flag=arg.max_flag;
1853 maybe_flag=arg.maybe_flag;
1854 maybe_null=arg.maybe_null;
1855 part=arg.part;
1856 field=arg.field;
1857 min_value=arg.min_value;
1858 max_value=arg.max_value;
1859 next_key_part=arg.next_key_part;
1860 max_part_no= arg.max_part_no;
1861 use_count=1; elements=1;
1862}
1863
1864
1865inline void SEL_ARG::make_root()
1866{
1867 left=right= &null_element;
1868 color=BLACK;
1869 next=prev=0;
1870 use_count=0; elements=1;
1871}
1872
1873SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
1874 const uchar *max_value_arg)
1875 :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
1876 elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg),
1877 max_value((uchar*) max_value_arg), next(0),prev(0),
1878 next_key_part(0), color(BLACK), type(KEY_RANGE)
1879{
1880 left=right= &null_element;
1881 max_part_no= 1;
1882}
1883
1884SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
1885 uchar *min_value_, uchar *max_value_,
1886 uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)
1887 :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_),
1888 part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1),
1889 field(field_), min_value(min_value_), max_value(max_value_),
1890 next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE)
1891{
1892 max_part_no= part+1;
1893 left=right= &null_element;
1894}
1895
1896SEL_ARG *SEL_ARG::clone(RANGE_OPT_PARAM *param, SEL_ARG *new_parent,
1897 SEL_ARG **next_arg)
1898{
1899 SEL_ARG *tmp;
1900
1901 /* Bail out if we have already generated too many SEL_ARGs */
1902 if (++param->alloced_sel_args > MAX_SEL_ARGS)
1903 return 0;
1904
1905 if (type != KEY_RANGE)
1906 {
1907 if (!(tmp= new (param->mem_root) SEL_ARG(type)))
1908 return 0; // out of memory
1909 tmp->prev= *next_arg; // Link into next/prev chain
1910 (*next_arg)->next=tmp;
1911 (*next_arg)= tmp;
1912 tmp->part= this->part;
1913 }
1914 else
1915 {
1916 if (!(tmp= new (param->mem_root) SEL_ARG(field,part, min_value,max_value,
1917 min_flag, max_flag, maybe_flag)))
1918 return 0; // OOM
1919 tmp->parent=new_parent;
1920 tmp->next_key_part=next_key_part;
1921 if (left != &null_element)
1922 if (!(tmp->left=left->clone(param, tmp, next_arg)))
1923 return 0; // OOM
1924
1925 tmp->prev= *next_arg; // Link into next/prev chain
1926 (*next_arg)->next=tmp;
1927 (*next_arg)= tmp;
1928
1929 if (right != &null_element)
1930 if (!(tmp->right= right->clone(param, tmp, next_arg)))
1931 return 0; // OOM
1932 }
1933 increment_use_count(1);
1934 tmp->color= color;
1935 tmp->elements= this->elements;
1936 tmp->max_part_no= max_part_no;
1937 return tmp;
1938}
1939
1940/**
1941 This gives the first SEL_ARG in the interval list, and the minimal element
1942 in the red-black tree
1943
1944 @return
1945 SEL_ARG first SEL_ARG in the interval list
1946*/
1947SEL_ARG *SEL_ARG::first()
1948{
1949 SEL_ARG *next_arg=this;
1950 if (!next_arg->left)
1951 return 0; // MAYBE_KEY
1952 while (next_arg->left != &null_element)
1953 next_arg=next_arg->left;
1954 return next_arg;
1955}
1956
1957const SEL_ARG *SEL_ARG::first() const
1958{
1959 return const_cast<SEL_ARG*>(this)->first();
1960}
1961
1962SEL_ARG *SEL_ARG::last()
1963{
1964 SEL_ARG *next_arg=this;
1965 if (!next_arg->right)
1966 return 0; // MAYBE_KEY
1967 while (next_arg->right != &null_element)
1968 next_arg=next_arg->right;
1969 return next_arg;
1970}
1971
1972
1973/*
1974 Check if a compare is ok, when one takes ranges in account
1975 Returns -2 or 2 if the ranges where 'joined' like < 2 and >= 2
1976*/
1977
1978int SEL_ARG::sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag,
1979 uint8 b_flag)
1980{
1981 int cmp;
1982 /* First check if there was a compare to a min or max element */
1983 if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
1984 {
1985 if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
1986 (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
1987 return 0;
1988 return (a_flag & NO_MIN_RANGE) ? -1 : 1;
1989 }
1990 if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
1991 return (b_flag & NO_MIN_RANGE) ? 1 : -1;
1992
1993 if (field->real_maybe_null()) // If null is part of key
1994 {
1995 if (*a != *b)
1996 {
1997 return *a ? -1 : 1;
1998 }
1999 if (*a)
2000 goto end; // NULL where equal
2001 a++; b++; // Skip NULL marker
2002 }
2003 cmp=field->key_cmp(a , b);
2004 if (cmp) return cmp < 0 ? -1 : 1; // The values differed
2005
2006 // Check if the compared equal arguments was defined with open/closed range
2007 end:
2008 if (a_flag & (NEAR_MIN | NEAR_MAX))
2009 {
2010 if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
2011 return 0;
2012 if (!(b_flag & (NEAR_MIN | NEAR_MAX)))
2013 return (a_flag & NEAR_MIN) ? 2 : -2;
2014 return (a_flag & NEAR_MIN) ? 1 : -1;
2015 }
2016 if (b_flag & (NEAR_MIN | NEAR_MAX))
2017 return (b_flag & NEAR_MIN) ? -2 : 2;
2018 return 0; // The elements where equal
2019}
2020
2021
2022SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param)
2023{
2024 SEL_ARG tmp_link,*next_arg,*root;
2025 next_arg= &tmp_link;
2026 if (!(root= clone(param, (SEL_ARG *) 0, &next_arg)))
2027 return 0;
2028 next_arg->next=0; // Fix last link
2029 tmp_link.next->prev=0; // Fix first link
2030 if (root) // If not OOM
2031 root->use_count= 0;
2032 return root;
2033}
2034
2035
2036/*
2037 Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
2038 objects from table read plans.
2039*/
2040class TABLE_READ_PLAN
2041{
2042public:
2043 /*
2044 Plan read cost, with or without cost of full row retrieval, depending
2045 on plan creation parameters.
2046 */
2047 double read_cost;
2048 ha_rows records; /* estimate of #rows to be examined */
2049
2050 /*
2051 If TRUE, the scan returns rows in rowid order. This is used only for
2052 scans that can be both ROR and non-ROR.
2053 */
2054 bool is_ror;
2055
2056 /*
2057 Create quick select for this plan.
2058 SYNOPSIS
2059 make_quick()
2060 param Parameter from test_quick_select
2061 retrieve_full_rows If TRUE, created quick select will do full record
2062 retrieval.
2063 parent_alloc Memory pool to use, if any.
2064
2065 NOTES
2066 retrieve_full_rows is ignored by some implementations.
2067
2068 RETURN
2069 created quick select
2070 NULL on any error.
2071 */
2072 virtual QUICK_SELECT_I *make_quick(PARAM *param,
2073 bool retrieve_full_rows,
2074 MEM_ROOT *parent_alloc=NULL) = 0;
2075
2076 /* Table read plans are allocated on MEM_ROOT and are never deleted */
2077 static void *operator new(size_t size, MEM_ROOT *mem_root)
2078 { return (void*) alloc_root(mem_root, (uint) size); }
2079 static void operator delete(void *ptr,size_t size) { TRASH_FREE(ptr, size); }
2080 static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ }
2081 virtual ~TABLE_READ_PLAN() {} /* Remove gcc warning */
2082
2083};
2084
2085class TRP_ROR_INTERSECT;
2086class TRP_ROR_UNION;
2087class TRP_INDEX_MERGE;
2088
2089
2090/*
2091 Plan for a QUICK_RANGE_SELECT scan.
2092 TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
2093 QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
2094 record retrieval scans.
2095*/
2096
2097class TRP_RANGE : public TABLE_READ_PLAN
2098{
2099public:
2100 SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
2101 uint key_idx; /* key number in PARAM::key */
2102 uint mrr_flags;
2103 uint mrr_buf_size;
2104
2105 TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, uint mrr_flags_arg)
2106 : key(key_arg), key_idx(idx_arg), mrr_flags(mrr_flags_arg)
2107 {}
2108 virtual ~TRP_RANGE() {} /* Remove gcc warning */
2109
2110 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2111 MEM_ROOT *parent_alloc)
2112 {
2113 DBUG_ENTER("TRP_RANGE::make_quick");
2114 QUICK_RANGE_SELECT *quick;
2115 if ((quick= get_quick_select(param, key_idx, key, mrr_flags,
2116 mrr_buf_size, parent_alloc)))
2117 {
2118 quick->records= records;
2119 quick->read_time= read_cost;
2120 }
2121 DBUG_RETURN(quick);
2122 }
2123};
2124
2125
2126/* Plan for QUICK_ROR_INTERSECT_SELECT scan. */
2127
2128class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
2129{
2130public:
2131 TRP_ROR_INTERSECT() {} /* Remove gcc warning */
2132 virtual ~TRP_ROR_INTERSECT() {} /* Remove gcc warning */
2133 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2134 MEM_ROOT *parent_alloc);
2135
2136 /* Array of pointers to ROR range scans used in this intersection */
2137 struct st_ror_scan_info **first_scan;
2138 struct st_ror_scan_info **last_scan; /* End of the above array */
2139 struct st_ror_scan_info *cpk_scan; /* Clustered PK scan, if there is one */
2140 bool is_covering; /* TRUE if no row retrieval phase is necessary */
2141 double index_scan_costs; /* SUM(cost(index_scan)) */
2142};
2143
2144
2145/*
2146 Plan for QUICK_ROR_UNION_SELECT scan.
2147 QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
2148 is ignored by make_quick.
2149*/
2150
2151class TRP_ROR_UNION : public TABLE_READ_PLAN
2152{
2153public:
2154 TRP_ROR_UNION() {} /* Remove gcc warning */
2155 virtual ~TRP_ROR_UNION() {} /* Remove gcc warning */
2156 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2157 MEM_ROOT *parent_alloc);
2158 TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
2159 TABLE_READ_PLAN **last_ror; /* end of the above array */
2160};
2161
2162
2163/*
2164 Plan for QUICK_INDEX_INTERSECT_SELECT scan.
2165 QUICK_INDEX_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
2166 is ignored by make_quick.
2167*/
2168
2169class TRP_INDEX_INTERSECT : public TABLE_READ_PLAN
2170{
2171public:
2172 TRP_INDEX_INTERSECT() {} /* Remove gcc warning */
2173 virtual ~TRP_INDEX_INTERSECT() {} /* Remove gcc warning */
2174 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2175 MEM_ROOT *parent_alloc);
2176 TRP_RANGE **range_scans; /* array of ptrs to plans of intersected scans */
2177 TRP_RANGE **range_scans_end; /* end of the array */
2178 /* keys whose scans are to be filtered by cpk conditions */
2179 key_map filtered_scans;
2180};
2181
2182
2183/*
2184 Plan for QUICK_INDEX_MERGE_SELECT scan.
2185 QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
2186 is ignored by make_quick.
2187*/
2188
2189class TRP_INDEX_MERGE : public TABLE_READ_PLAN
2190{
2191public:
2192 TRP_INDEX_MERGE() {} /* Remove gcc warning */
2193 virtual ~TRP_INDEX_MERGE() {} /* Remove gcc warning */
2194 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2195 MEM_ROOT *parent_alloc);
2196 TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
2197 TRP_RANGE **range_scans_end; /* end of the array */
2198};
2199
2200
2201/*
2202 Plan for a QUICK_GROUP_MIN_MAX_SELECT scan.
2203*/
2204
2205class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
2206{
2207private:
2208 bool have_min, have_max, have_agg_distinct;
2209 KEY_PART_INFO *min_max_arg_part;
2210 uint group_prefix_len;
2211 uint used_key_parts;
2212 uint group_key_parts;
2213 KEY *index_info;
2214 uint index;
2215 uint key_infix_len;
2216 uchar key_infix[MAX_KEY_LENGTH];
2217 SEL_TREE *range_tree; /* Represents all range predicates in the query. */
2218 SEL_ARG *index_tree; /* The SEL_ARG sub-tree corresponding to index_info. */
2219 uint param_idx; /* Index of used key in param->key. */
2220 bool is_index_scan; /* Use index_next() instead of random read */
2221public:
2222 /* Number of records selected by the ranges in index_tree. */
2223 ha_rows quick_prefix_records;
2224public:
2225 TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
2226 bool have_agg_distinct_arg,
2227 KEY_PART_INFO *min_max_arg_part_arg,
2228 uint group_prefix_len_arg, uint used_key_parts_arg,
2229 uint group_key_parts_arg, KEY *index_info_arg,
2230 uint index_arg, uint key_infix_len_arg,
2231 uchar *key_infix_arg,
2232 SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
2233 uint param_idx_arg, ha_rows quick_prefix_records_arg)
2234 : have_min(have_min_arg), have_max(have_max_arg),
2235 have_agg_distinct(have_agg_distinct_arg),
2236 min_max_arg_part(min_max_arg_part_arg),
2237 group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
2238 group_key_parts(group_key_parts_arg), index_info(index_info_arg),
2239 index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
2240 index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE),
2241 quick_prefix_records(quick_prefix_records_arg)
2242 {
2243 if (key_infix_len)
2244 memcpy(this->key_infix, key_infix_arg, key_infix_len);
2245 }
2246 virtual ~TRP_GROUP_MIN_MAX() {} /* Remove gcc warning */
2247
2248 QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
2249 MEM_ROOT *parent_alloc);
2250 void use_index_scan() { is_index_scan= TRUE; }
2251};
2252
2253
2254typedef struct st_index_scan_info
2255{
2256 uint idx; /* # of used key in param->keys */
2257 uint keynr; /* # of used key in table */
2258 uint range_count;
2259 ha_rows records; /* estimate of # records this scan will return */
2260
2261 /* Set of intervals over key fields that will be used for row retrieval. */
2262 SEL_ARG *sel_arg;
2263
2264 KEY *key_info;
2265 uint used_key_parts;
2266
2267 /* Estimate of # records filtered out by intersection with cpk */
2268 ha_rows filtered_out;
2269 /* Bitmap of fields used in index intersection */
2270 MY_BITMAP used_fields;
2271
2272 /* Fields used in the query and covered by ROR scan. */
2273 MY_BITMAP covered_fields;
2274 uint used_fields_covered; /* # of set bits in covered_fields */
2275 int key_rec_length; /* length of key record (including rowid) */
2276
2277 /*
2278 Cost of reading all index records with values in sel_arg intervals set
2279 (assuming there is no need to access full table records)
2280 */
2281 double index_read_cost;
2282 uint first_uncovered_field; /* first unused bit in covered_fields */
2283 uint key_components; /* # of parts in the key */
2284} INDEX_SCAN_INFO;
2285
2286/*
2287 Fill param->needed_fields with bitmap of fields used in the query.
2288 SYNOPSIS
2289 fill_used_fields_bitmap()
2290 param Parameter from test_quick_select function.
2291
2292 NOTES
2293 Clustered PK members are not put into the bitmap as they are implicitly
2294 present in all keys (and it is impossible to avoid reading them).
2295 RETURN
2296 0 Ok
2297 1 Out of memory.
2298*/
2299
2300static int fill_used_fields_bitmap(PARAM *param)
2301{
2302 TABLE *table= param->table;
2303 my_bitmap_map *tmp;
2304 uint pk;
2305 param->tmp_covered_fields.bitmap= 0;
2306 param->fields_bitmap_size= table->s->column_bitmap_size;
2307 if (!(tmp= (my_bitmap_map*) alloc_root(param->mem_root,
2308 param->fields_bitmap_size)) ||
2309 my_bitmap_init(&param->needed_fields, tmp, table->s->fields, FALSE))
2310 return 1;
2311
2312 bitmap_copy(&param->needed_fields, table->read_set);
2313 bitmap_union(&param->needed_fields, table->write_set);
2314
2315 pk= param->table->s->primary_key;
2316 if (pk != MAX_KEY && param->table->file->primary_key_is_clustered())
2317 {
2318 /* The table uses clustered PK and it is not internally generated */
2319 KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
2320 KEY_PART_INFO *key_part_end= key_part +
2321 param->table->key_info[pk].user_defined_key_parts;
2322 for (;key_part != key_part_end; ++key_part)
2323 bitmap_clear_bit(&param->needed_fields, key_part->fieldnr-1);
2324 }
2325 return 0;
2326}
2327
2328
2329/*
2330 Test if a key can be used in different ranges
2331
2332 SYNOPSIS
2333 SQL_SELECT::test_quick_select()
2334 thd Current thread
2335 keys_to_use Keys to use for range retrieval
2336 prev_tables Tables assumed to be already read when the scan is
2337 performed (but not read at the moment of this call)
2338 limit Query limit
2339 force_quick_range Prefer to use range (instead of full table scan) even
2340 if it is more expensive.
2341
2342 NOTES
2343 Updates the following in the select parameter:
2344 needed_reg - Bits for keys with may be used if all prev regs are read
2345 quick - Parameter to use when reading records.
2346
2347 In the table struct the following information is updated:
2348 quick_keys - Which keys can be used
2349 quick_rows - How many rows the key matches
2350 quick_condition_rows - E(# rows that will satisfy the table condition)
2351
2352 IMPLEMENTATION
2353 quick_condition_rows value is obtained as follows:
2354
2355 It is a minimum of E(#output rows) for all considered table access
2356 methods (range and index_merge accesses over various indexes).
2357
2358 The obtained value is not a true E(#rows that satisfy table condition)
2359 but rather a pessimistic estimate. To obtain a true E(#...) one would
2360 need to combine estimates of various access methods, taking into account
2361 correlations between sets of rows they will return.
2362
2363 For example, if values of tbl.key1 and tbl.key2 are independent (a right
2364 assumption if we have no information about their correlation) then the
2365 correct estimate will be:
2366
2367 E(#rows("tbl.key1 < c1 AND tbl.key2 < c2")) =
2368 = E(#rows(tbl.key1 < c1)) / total_rows(tbl) * E(#rows(tbl.key2 < c2)
2369
2370 which is smaller than
2371
2372 MIN(E(#rows(tbl.key1 < c1), E(#rows(tbl.key2 < c2)))
2373
2374 which is currently produced.
2375
2376 TODO
2377 * Change the value returned in quick_condition_rows from a pessimistic
2378 estimate to true E(#rows that satisfy table condition).
2379 (we can re-use some of E(#rows) calcuation code from index_merge/intersection
2380 for this)
2381
2382 * Check if this function really needs to modify keys_to_use, and change the
2383 code to pass it by reference if it doesn't.
2384
2385 * In addition to force_quick_range other means can be (an usually are) used
2386 to make this function prefer range over full table scan. Figure out if
2387 force_quick_range is really needed.
2388
2389 RETURN
2390 -1 if impossible select (i.e. certainly no rows will be selected)
2391 0 if can't use quick_select
2392 1 if found usable ranges and quick select has been successfully created.
2393*/
2394
2395int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
2396 table_map prev_tables,
2397 ha_rows limit, bool force_quick_range,
2398 bool ordered_output,
2399 bool remove_false_parts_of_where)
2400{
2401 uint idx;
2402 double scan_time;
2403 DBUG_ENTER("SQL_SELECT::test_quick_select");
2404 DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu",
2405 (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables,
2406 (ulong) const_tables));
2407 DBUG_PRINT("info", ("records: %lu", (ulong) head->stat_records()));
2408 delete quick;
2409 quick=0;
2410 needed_reg.clear_all();
2411 quick_keys.clear_all();
2412 DBUG_ASSERT(!head->is_filled_at_execution());
2413 if (keys_to_use.is_clear_all() || head->is_filled_at_execution())
2414 DBUG_RETURN(0);
2415 records= head->stat_records();
2416 if (!records)
2417 records++; /* purecov: inspected */
2418 scan_time= (double) records / TIME_FOR_COMPARE + 1;
2419 read_time= (double) head->file->scan_time() + scan_time + 1.1;
2420 if (head->force_index)
2421 scan_time= read_time= DBL_MAX;
2422 if (limit < records)
2423 read_time= (double) records + scan_time + 1; // Force to use index
2424
2425 possible_keys.clear_all();
2426
2427 DBUG_PRINT("info",("Time to scan table: %g", read_time));
2428
2429 keys_to_use.intersect(head->keys_in_use_for_query);
2430 if (!keys_to_use.is_clear_all())
2431 {
2432 uchar buff[STACK_BUFF_ALLOC];
2433 MEM_ROOT alloc;
2434 SEL_TREE *tree= NULL;
2435 KEY_PART *key_parts;
2436 KEY *key_info;
2437 PARAM param;
2438
2439 if (check_stack_overrun(thd, 2*STACK_MIN_SIZE + sizeof(PARAM), buff))
2440 DBUG_RETURN(0); // Fatal error flag is set
2441
2442 /* set up parameter that is passed to all functions */
2443 param.thd= thd;
2444 param.baseflag= head->file->ha_table_flags();
2445 param.prev_tables=prev_tables | const_tables;
2446 param.read_tables=read_tables;
2447 param.current_table= head->map;
2448 param.table=head;
2449 param.keys=0;
2450 param.mem_root= &alloc;
2451 param.old_root= thd->mem_root;
2452 param.needed_reg= &needed_reg;
2453 param.imerge_cost_buff_size= 0;
2454 param.using_real_indexes= TRUE;
2455 param.remove_jump_scans= TRUE;
2456 param.remove_false_where_parts= remove_false_parts_of_where;
2457 param.force_default_mrr= ordered_output;
2458 param.possible_keys.clear_all();
2459
2460 thd->no_errors=1; // Don't warn about NULL
2461 init_sql_alloc(&alloc, "test_quick_select",
2462 thd->variables.range_alloc_block_size, 0,
2463 MYF(MY_THREAD_SPECIFIC));
2464 if (!(param.key_parts=
2465 (KEY_PART*) alloc_root(&alloc,
2466 sizeof(KEY_PART) *
2467 head->s->actual_n_key_parts(thd))) ||
2468 fill_used_fields_bitmap(&param))
2469 {
2470 thd->no_errors=0;
2471 free_root(&alloc,MYF(0)); // Return memory & allocator
2472 DBUG_RETURN(0); // Can't use range
2473 }
2474 key_parts= param.key_parts;
2475
2476 /*
2477 Make an array with description of all key parts of all table keys.
2478 This is used in get_mm_parts function.
2479 */
2480 key_info= head->key_info;
2481 uint max_key_len= 0;
2482 for (idx=0 ; idx < head->s->keys ; idx++, key_info++)
2483 {
2484 KEY_PART_INFO *key_part_info;
2485 uint n_key_parts= head->actual_n_key_parts(key_info);
2486
2487 if (!keys_to_use.is_set(idx))
2488 continue;
2489 if (key_info->flags & HA_FULLTEXT)
2490 continue; // ToDo: ft-keys in non-ft ranges, if possible SerG
2491
2492 param.key[param.keys]=key_parts;
2493 key_part_info= key_info->key_part;
2494 uint cur_key_len= 0;
2495 for (uint part= 0 ; part < n_key_parts ;
2496 part++, key_parts++, key_part_info++)
2497 {
2498 key_parts->key= param.keys;
2499 key_parts->part= part;
2500 key_parts->length= key_part_info->length;
2501 key_parts->store_length= key_part_info->store_length;
2502 cur_key_len += key_part_info->store_length;
2503 key_parts->field= key_part_info->field;
2504 key_parts->null_bit= key_part_info->null_bit;
2505 key_parts->image_type =
2506 (key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
2507 /* Only HA_PART_KEY_SEG is used */
2508 key_parts->flag= (uint8) key_part_info->key_part_flag;
2509 }
2510 param.real_keynr[param.keys++]=idx;
2511 if (cur_key_len > max_key_len)
2512 max_key_len= cur_key_len;
2513 }
2514 param.key_parts_end=key_parts;
2515 param.alloced_sel_args= 0;
2516
2517 max_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */
2518 if (!(param.min_key= (uchar*)alloc_root(&alloc,max_key_len)) ||
2519 !(param.max_key= (uchar*)alloc_root(&alloc,max_key_len)))
2520 {
2521 thd->no_errors=0;
2522 free_root(&alloc,MYF(0)); // Return memory & allocator
2523 DBUG_RETURN(0); // Can't use range
2524 }
2525
2526 thd->mem_root= &alloc;
2527 /* Calculate cost of full index read for the shortest covering index */
2528 if (!force_quick_range && !head->covering_keys.is_clear_all())
2529 {
2530 int key_for_use= find_shortest_key(head, &head->covering_keys);
2531 double key_read_time= head->file->keyread_time(key_for_use, 1, records) +
2532 (double) records / TIME_FOR_COMPARE;
2533 DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, "
2534 "read time %g", key_for_use, key_read_time));
2535 if (key_read_time < read_time)
2536 read_time= key_read_time;
2537 }
2538
2539 TABLE_READ_PLAN *best_trp= NULL;
2540 TRP_GROUP_MIN_MAX *group_trp;
2541 double best_read_time= read_time;
2542
2543 if (cond)
2544 {
2545 if ((tree= cond->get_mm_tree(&param, &cond)))
2546 {
2547 if (tree->type == SEL_TREE::IMPOSSIBLE)
2548 {
2549 records=0L; /* Return -1 from this function. */
2550 read_time= (double) HA_POS_ERROR;
2551 goto free_mem;
2552 }
2553 /*
2554 If the tree can't be used for range scans, proceed anyway, as we
2555 can construct a group-min-max quick select
2556 */
2557 if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
2558 tree= NULL;
2559 }
2560 }
2561
2562 /*
2563 Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
2564 Notice that it can be constructed no matter if there is a range tree.
2565 */
2566 group_trp= get_best_group_min_max(&param, tree, best_read_time);
2567 if (group_trp)
2568 {
2569 param.table->quick_condition_rows= MY_MIN(group_trp->records,
2570 head->stat_records());
2571 if (group_trp->read_cost < best_read_time)
2572 {
2573 best_trp= group_trp;
2574 best_read_time= best_trp->read_cost;
2575 }
2576 }
2577
2578 if (tree)
2579 {
2580 /*
2581 It is possible to use a range-based quick select (but it might be
2582 slower than 'all' table scan).
2583 */
2584 TRP_RANGE *range_trp;
2585 TRP_ROR_INTERSECT *rori_trp;
2586 TRP_INDEX_INTERSECT *intersect_trp;
2587 bool can_build_covering= FALSE;
2588
2589 remove_nonrange_trees(&param, tree);
2590
2591 /* Get best 'range' plan and prepare data for making other plans */
2592 if ((range_trp= get_key_scans_params(&param, tree, FALSE, TRUE,
2593 best_read_time)))
2594 {
2595 best_trp= range_trp;
2596 best_read_time= best_trp->read_cost;
2597 }
2598
2599 /*
2600 Simultaneous key scans and row deletes on several handler
2601 objects are not allowed so don't use ROR-intersection for
2602 table deletes.
2603 */
2604 if ((thd->lex->sql_command != SQLCOM_DELETE) &&
2605 optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE))
2606 {
2607 /*
2608 Get best non-covering ROR-intersection plan and prepare data for
2609 building covering ROR-intersection.
2610 */
2611 if ((rori_trp= get_best_ror_intersect(&param, tree, best_read_time,
2612 &can_build_covering)))
2613 {
2614 best_trp= rori_trp;
2615 best_read_time= best_trp->read_cost;
2616 /*
2617 Try constructing covering ROR-intersect only if it looks possible
2618 and worth doing.
2619 */
2620 if (!rori_trp->is_covering && can_build_covering &&
2621 (rori_trp= get_best_covering_ror_intersect(&param, tree,
2622 best_read_time)))
2623 best_trp= rori_trp;
2624 }
2625 }
2626 /*
2627 Do not look for an index intersection plan if there is a covering
2628 index. The scan by this covering index will be always cheaper than
2629 any index intersection.
2630 */
2631 if (param.table->covering_keys.is_clear_all() &&
2632 optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) &&
2633 optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT))
2634 {
2635 if ((intersect_trp= get_best_index_intersect(&param, tree,
2636 best_read_time)))
2637 {
2638 best_trp= intersect_trp;
2639 best_read_time= best_trp->read_cost;
2640 set_if_smaller(param.table->quick_condition_rows,
2641 intersect_trp->records);
2642 }
2643 }
2644
2645 if (optimizer_flag(thd, OPTIMIZER_SWITCH_INDEX_MERGE) &&
2646 head->stat_records() != 0)
2647 {
2648 /* Try creating index_merge/ROR-union scan. */
2649 SEL_IMERGE *imerge;
2650 TABLE_READ_PLAN *best_conj_trp= NULL,
2651 *UNINIT_VAR(new_conj_trp); /* no empty index_merge lists possible */
2652 DBUG_PRINT("info",("No range reads possible,"
2653 " trying to construct index_merge"));
2654 List_iterator_fast<SEL_IMERGE> it(tree->merges);
2655 while ((imerge= it++))
2656 {
2657 new_conj_trp= get_best_disjunct_quick(&param, imerge, best_read_time);
2658 if (new_conj_trp)
2659 set_if_smaller(param.table->quick_condition_rows,
2660 new_conj_trp->records);
2661 if (new_conj_trp &&
2662 (!best_conj_trp ||
2663 new_conj_trp->read_cost < best_conj_trp->read_cost))
2664 {
2665 best_conj_trp= new_conj_trp;
2666 best_read_time= best_conj_trp->read_cost;
2667 }
2668 }
2669 if (best_conj_trp)
2670 best_trp= best_conj_trp;
2671 }
2672 }
2673
2674 thd->mem_root= param.old_root;
2675
2676 /* If we got a read plan, create a quick select from it. */
2677 if (best_trp)
2678 {
2679 records= best_trp->records;
2680 if (!(quick= best_trp->make_quick(&param, TRUE)) || quick->init())
2681 {
2682 delete quick;
2683 quick= NULL;
2684 }
2685 }
2686 possible_keys= param.possible_keys;
2687
2688 free_mem:
2689 free_root(&alloc,MYF(0)); // Return memory & allocator
2690 thd->mem_root= param.old_root;
2691 thd->no_errors=0;
2692 }
2693
2694 DBUG_EXECUTE("info", print_quick(quick, &needed_reg););
2695
2696 /*
2697 Assume that if the user is using 'limit' we will only need to scan
2698 limit rows if we are using a key
2699 */
2700 DBUG_RETURN(records ? MY_TEST(quick) : -1);
2701}
2702
2703/****************************************************************************
2704 * Condition selectivity module
2705 ****************************************************************************/
2706
2707
2708/*
2709 Build descriptors of pseudo-indexes over columns to perform range analysis
2710
2711 SYNOPSIS
2712 create_key_parts_for_pseudo_indexes()
2713 param IN/OUT data structure for the descriptors to be built
2714 used_fields bitmap of columns for which the descriptors are to be built
2715
2716 DESCRIPTION
2717 For each column marked in the bitmap used_fields the function builds
2718 a descriptor of a single-component pseudo-index over this column that
2719 can be used for the range analysis of the predicates over this columns.
2720 The descriptors are created in the memory of param->mem_root.
2721
2722 RETURN
2723 FALSE in the case of success
2724 TRUE otherwise
2725*/
2726
2727static
2728bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
2729 MY_BITMAP *used_fields)
2730{
2731 Field **field_ptr;
2732 TABLE *table= param->table;
2733 uint parts= 0;
2734
2735 for (field_ptr= table->field; *field_ptr; field_ptr++)
2736 {
2737 if (bitmap_is_set(used_fields, (*field_ptr)->field_index))
2738 parts++;
2739 }
2740
2741 KEY_PART *key_part;
2742 uint keys= 0;
2743
2744 if (!(key_part= (KEY_PART *) alloc_root(param->mem_root,
2745 sizeof(KEY_PART) * parts)))
2746 return TRUE;
2747
2748 param->key_parts= key_part;
2749 uint max_key_len= 0;
2750 for (field_ptr= table->field; *field_ptr; field_ptr++)
2751 {
2752 if (bitmap_is_set(used_fields, (*field_ptr)->field_index))
2753 {
2754 Field *field= *field_ptr;
2755 uint16 store_length;
2756 uint16 max_key_part_length= (uint16) table->file->max_key_part_length();
2757 key_part->key= keys;
2758 key_part->part= 0;
2759 if (field->flags & BLOB_FLAG)
2760 key_part->length= max_key_part_length;
2761 else
2762 {
2763 key_part->length= (uint16) field->key_length();
2764 set_if_smaller(key_part->length, max_key_part_length);
2765 }
2766 store_length= key_part->length;
2767 if (field->real_maybe_null())
2768 store_length+= HA_KEY_NULL_LENGTH;
2769 if (field->real_type() == MYSQL_TYPE_VARCHAR)
2770 store_length+= HA_KEY_BLOB_LENGTH;
2771 if (max_key_len < store_length)
2772 max_key_len= store_length;
2773 key_part->store_length= store_length;
2774 key_part->field= field;
2775 key_part->image_type= Field::itRAW;
2776 key_part->flag= 0;
2777 param->key[keys]= key_part;
2778 keys++;
2779 key_part++;
2780 }
2781 }
2782
2783 max_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */
2784 if (!(param->min_key= (uchar*)alloc_root(param->mem_root, max_key_len)) ||
2785 !(param->max_key= (uchar*)alloc_root(param->mem_root, max_key_len)))
2786 {
2787 return true;
2788 }
2789 param->keys= keys;
2790 param->key_parts_end= key_part;
2791
2792 return FALSE;
2793}
2794
2795
2796/*
2797 Estimate the number of rows in all ranges built for a column
2798 by the range optimizer
2799
2800 SYNOPSIS
2801 records_in_column_ranges()
2802 param the data structure to access descriptors of pseudo indexes
2803 built over columns used in the condition of the processed query
2804 idx the index of the descriptor of interest in param
2805 tree the tree representing ranges built for the interesting column
2806
2807 DESCRIPTION
2808 This function retrieves the ranges represented by the SEL_ARG 'tree' and
2809 for each of them r it calls the function get_column_range_cardinality()
2810 that estimates the number of expected rows in r. It is assumed that param
2811 is the data structure containing the descriptors of pseudo-indexes that
2812 has been built to perform range analysis of the range conditions imposed
2813 on the columns used in the processed query, while idx is the index of the
2814 descriptor created in 'param' exactly for the column for which 'tree'
2815 has been built by the range optimizer.
2816
2817 RETURN
2818 the number of rows in the retrieved ranges
2819*/
2820
2821static
2822double records_in_column_ranges(PARAM *param, uint idx,
2823 SEL_ARG *tree)
2824{
2825 SEL_ARG_RANGE_SEQ seq;
2826 KEY_MULTI_RANGE range;
2827 range_seq_t seq_it;
2828 double rows;
2829 Field *field;
2830 uint flags= 0;
2831 double total_rows= 0;
2832 RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init,
2833 sel_arg_range_seq_next, 0, 0};
2834
2835 /* Handle cases when we don't have a valid non-empty list of range */
2836 if (!tree)
2837 return DBL_MAX;
2838 if (tree->type == SEL_ARG::IMPOSSIBLE)
2839 return (0L);
2840
2841 field= tree->field;
2842
2843 seq.keyno= idx;
2844 seq.real_keyno= MAX_KEY;
2845 seq.param= param;
2846 seq.start= tree;
2847
2848 seq_it= seq_if.init((void *) &seq, 0, flags);
2849
2850 while (!seq_if.next(seq_it, &range))
2851 {
2852 key_range *min_endp, *max_endp;
2853 min_endp= range.start_key.length? &range.start_key : NULL;
2854 max_endp= range.end_key.length? &range.end_key : NULL;
2855 rows= get_column_range_cardinality(field, min_endp, max_endp,
2856 range.range_flag);
2857 if (DBL_MAX == rows)
2858 {
2859 total_rows= DBL_MAX;
2860 break;
2861 }
2862 total_rows += rows;
2863 }
2864 return total_rows;
2865}
2866
2867
2868/*
2869 Calculate the selectivity of the condition imposed on the rows of a table
2870
2871 SYNOPSIS
2872 calculate_cond_selectivity_for_table()
2873 thd the context handle
2874 table the table of interest
2875 cond conditions imposed on the rows of the table
2876
2877 DESCRIPTION
2878 This function calculates the selectivity of range conditions cond imposed
2879 on the rows of 'table' in the processed query.
2880 The calculated selectivity is assigned to the field table->cond_selectivity.
2881
2882 Selectivity is calculated as a product of selectivities imposed by:
2883
2884 1. possible range accesses. (if multiple range accesses use the same
2885 restrictions on the same field, we make adjustments for that)
2886 2. Sargable conditions on fields for which we have column statistics (if
2887 a field is used in a possible range access, we assume that selectivity
2888 is already provided by the range access' estimates)
2889 3. Reading a few records from the table pages and checking the condition
2890 selectivity (this is used for conditions like "column LIKE '%val%'"
2891 where approaches #1 and #2 do not provide selectivity data).
2892
2893 NOTE
2894 Currently the selectivities of range conditions over different columns are
2895 considered independent.
2896
2897 RETURN
2898 FALSE on success
2899 TRUE otherwise
2900*/
2901
2902bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
2903{
2904 uint keynr;
2905 uint max_quick_key_parts= 0;
2906 MY_BITMAP *used_fields= &table->cond_set;
2907 double table_records= (double)table->stat_records();
2908 DBUG_ENTER("calculate_cond_selectivity_for_table");
2909
2910 table->cond_selectivity= 1.0;
2911
2912 if (!*cond || table_records == 0)
2913 DBUG_RETURN(FALSE);
2914
2915 if (table->pos_in_table_list->schema_table)
2916 DBUG_RETURN(FALSE);
2917
2918 MY_BITMAP handled_columns;
2919 my_bitmap_map* buf;
2920 if (!(buf= (my_bitmap_map*)thd->alloc(table->s->column_bitmap_size)))
2921 DBUG_RETURN(TRUE);
2922 my_bitmap_init(&handled_columns, buf, table->s->fields, FALSE);
2923
2924 /*
2925 Calculate the selectivity of the range conditions supported by indexes.
2926
2927 First, take into account possible range accesses.
2928 range access estimates are the most precise, we prefer them to any other
2929 estimate sources.
2930 */
2931
2932 for (keynr= 0; keynr < table->s->keys; keynr++)
2933 {
2934 if (table->quick_keys.is_set(keynr))
2935 set_if_bigger(max_quick_key_parts, table->quick_key_parts[keynr]);
2936 }
2937
2938 /*
2939 Walk through all indexes, indexes where range access uses more keyparts
2940 go first.
2941 */
2942 for (uint quick_key_parts= max_quick_key_parts;
2943 quick_key_parts; quick_key_parts--)
2944 {
2945 for (keynr= 0; keynr < table->s->keys; keynr++)
2946 {
2947 if (table->quick_keys.is_set(keynr) &&
2948 table->quick_key_parts[keynr] == quick_key_parts)
2949 {
2950 uint i;
2951 uint used_key_parts= table->quick_key_parts[keynr];
2952 double quick_cond_selectivity= table->quick_rows[keynr] /
2953 table_records;
2954 KEY *key_info= table->key_info + keynr;
2955 KEY_PART_INFO* key_part= key_info->key_part;
2956 /*
2957 Suppose, there are range conditions on two keys
2958 KEY1 (col1, col2)
2959 KEY2 (col3, col2)
2960
2961 we don't want to count selectivity of condition on col2 twice.
2962
2963 First, find the longest key prefix that's made of columns whose
2964 selectivity wasn't already accounted for.
2965 */
2966 for (i= 0; i < used_key_parts; i++, key_part++)
2967 {
2968 if (bitmap_is_set(&handled_columns, key_part->fieldnr-1))
2969 break;
2970 bitmap_set_bit(&handled_columns, key_part->fieldnr-1);
2971 }
2972 if (i)
2973 {
2974 double UNINIT_VAR(selectivity_mult);
2975
2976 /*
2977 There is at least 1-column prefix of columns whose selectivity has
2978 not yet been accounted for.
2979 */
2980 table->cond_selectivity*= quick_cond_selectivity;
2981 if (i != used_key_parts)
2982 {
2983 /*
2984 Range access got us estimate for #used_key_parts.
2985 We need estimate for #(i-1) key parts.
2986 */
2987 double f1= key_info->actual_rec_per_key(i-1);
2988 double f2= key_info->actual_rec_per_key(i);
2989 if (f1 > 0 && f2 > 0)
2990 selectivity_mult= f1 / f2;
2991 else
2992 {
2993 /*
2994 No statistics available, assume the selectivity is proportional
2995 to the number of key parts.
2996 (i=0 means 1 keypart, i=1 means 2 keyparts, so use i+1)
2997 */
2998 selectivity_mult= ((double)(i+1)) / i;
2999 }
3000 table->cond_selectivity*= selectivity_mult;
3001 }
3002 /*
3003 We need to set selectivity for fields supported by indexes.
3004 For single-component indexes and for some first components
3005 of other indexes we do it here. For the remaining fields
3006 we do it later in this function, in the same way as for the
3007 fields not used in any indexes.
3008 */
3009 if (i == 1)
3010 {
3011 uint fieldnr= key_info->key_part[0].fieldnr;
3012 table->field[fieldnr-1]->cond_selectivity= quick_cond_selectivity;
3013 if (i != used_key_parts)
3014 table->field[fieldnr-1]->cond_selectivity*= selectivity_mult;
3015 bitmap_clear_bit(used_fields, fieldnr-1);
3016 }
3017 }
3018 }
3019 }
3020 }
3021
3022 /*
3023 Second step: calculate the selectivity of the range conditions not
3024 supported by any index and selectivity of the range condition
3025 over the fields whose selectivity has not been set yet.
3026 */
3027
3028 if (thd->variables.optimizer_use_condition_selectivity > 2 &&
3029 !bitmap_is_clear_all(used_fields))
3030 {
3031 PARAM param;
3032 MEM_ROOT alloc;
3033 SEL_TREE *tree;
3034 double rows;
3035
3036 init_sql_alloc(&alloc, "calculate_cond_selectivity_for_table",
3037 thd->variables.range_alloc_block_size, 0,
3038 MYF(MY_THREAD_SPECIFIC));
3039 param.thd= thd;
3040 param.mem_root= &alloc;
3041 param.old_root= thd->mem_root;
3042 param.table= table;
3043 param.is_ror_scan= FALSE;
3044 param.remove_false_where_parts= true;
3045
3046 if (create_key_parts_for_pseudo_indexes(&param, used_fields))
3047 goto free_alloc;
3048
3049 param.prev_tables= param.read_tables= 0;
3050 param.current_table= table->map;
3051 param.using_real_indexes= FALSE;
3052 param.real_keynr[0]= 0;
3053 param.alloced_sel_args= 0;
3054
3055 thd->no_errors=1;
3056
3057 tree= cond[0]->get_mm_tree(&param, cond);
3058
3059 if (!tree)
3060 goto free_alloc;
3061
3062 table->reginfo.impossible_range= 0;
3063 if (tree->type == SEL_TREE::IMPOSSIBLE)
3064 {
3065 rows= 0;
3066 table->reginfo.impossible_range= 1;
3067 goto free_alloc;
3068 }
3069 else if (tree->type == SEL_TREE::ALWAYS)
3070 {
3071 rows= table_records;
3072 goto free_alloc;
3073 }
3074 else if (tree->type == SEL_TREE::MAYBE)
3075 {
3076 rows= table_records;
3077 goto free_alloc;
3078 }
3079
3080 for (uint idx= 0; idx < param.keys; idx++)
3081 {
3082 SEL_ARG *key= tree->keys[idx];
3083 if (key)
3084 {
3085 if (key->type == SEL_ARG::IMPOSSIBLE)
3086 {
3087 rows= 0;
3088 table->reginfo.impossible_range= 1;
3089 goto free_alloc;
3090 }
3091 else
3092 {
3093 rows= records_in_column_ranges(&param, idx, key);
3094 if (rows != DBL_MAX)
3095 key->field->cond_selectivity= rows/table_records;
3096 }
3097 }
3098 }
3099
3100 for (Field **field_ptr= table->field; *field_ptr; field_ptr++)
3101 {
3102 Field *table_field= *field_ptr;
3103 if (bitmap_is_set(used_fields, table_field->field_index) &&
3104 table_field->cond_selectivity < 1.0)
3105 {
3106 if (!bitmap_is_set(&handled_columns, table_field->field_index))
3107 table->cond_selectivity*= table_field->cond_selectivity;
3108 }
3109 }
3110
3111 free_alloc:
3112 thd->no_errors= 0;
3113 thd->mem_root= param.old_root;
3114 free_root(&alloc, MYF(0));
3115
3116 }
3117
3118 bitmap_union(used_fields, &handled_columns);
3119
3120 /* Check if we can improve selectivity estimates by using sampling */
3121 ulong check_rows=
3122 MY_MIN(thd->variables.optimizer_selectivity_sampling_limit,
3123 (ulong) (table_records * SELECTIVITY_SAMPLING_SHARE));
3124 if (*cond && check_rows > SELECTIVITY_SAMPLING_THRESHOLD &&
3125 thd->variables.optimizer_use_condition_selectivity > 4)
3126 {
3127 find_selective_predicates_list_processor_data *dt=
3128 (find_selective_predicates_list_processor_data *)
3129 alloc_root(thd->mem_root,
3130 sizeof(find_selective_predicates_list_processor_data));
3131 if (!dt)
3132 DBUG_RETURN(TRUE);
3133 dt->list.empty();
3134 dt->table= table;
3135 if ((*cond)->walk(&Item::find_selective_predicates_list_processor, 0, dt))
3136 DBUG_RETURN(TRUE);
3137 if (dt->list.elements > 0)
3138 {
3139 check_rows= check_selectivity(thd, check_rows, table, &dt->list);
3140 if (check_rows > SELECTIVITY_SAMPLING_THRESHOLD)
3141 {
3142 COND_STATISTIC *stat;
3143 List_iterator_fast<COND_STATISTIC> it(dt->list);
3144 double examined_rows= check_rows;
3145 while ((stat= it++))
3146 {
3147 if (!stat->positive)
3148 {
3149 DBUG_PRINT("info", ("To avoid 0 assigned 1 to the counter"));
3150 stat->positive= 1; // avoid 0
3151 }
3152 DBUG_PRINT("info", ("The predicate selectivity : %g",
3153 (double)stat->positive / examined_rows));
3154 double selectivity= ((double)stat->positive) / examined_rows;
3155 table->cond_selectivity*= selectivity;
3156 /*
3157 If a field is involved then we register its selectivity in case
3158 there in an equality with the field.
3159 For example in case
3160 t1.a LIKE "%bla%" and t1.a = t2.b
3161 the selectivity we have found could be used also for t2.
3162 */
3163 if (stat->field_arg)
3164 {
3165 stat->field_arg->cond_selectivity*= selectivity;
3166
3167 if (stat->field_arg->next_equal_field)
3168 {
3169 for (Field *next_field= stat->field_arg->next_equal_field;
3170 next_field != stat->field_arg;
3171 next_field= next_field->next_equal_field)
3172 {
3173 next_field->cond_selectivity*= selectivity;
3174 next_field->table->cond_selectivity*= selectivity;
3175 }
3176 }
3177 }
3178 }
3179
3180 }
3181 /* This list and its elements put to mem_root so should not be freed */
3182 table->cond_selectivity_sampling_explain= &dt->list;
3183 }
3184 }
3185
3186 DBUG_RETURN(FALSE);
3187}
3188
3189/****************************************************************************
3190 * Condition selectivity code ends
3191 ****************************************************************************/
3192
3193/****************************************************************************
3194 * Partition pruning module
3195 ****************************************************************************/
3196
3197/*
3198 Store field key image to table record
3199
3200 SYNOPSIS
3201 store_key_image_to_rec()
3202 field Field which key image should be stored
3203 ptr Field value in key format
3204 len Length of the value, in bytes
3205
3206 ATTENTION
3207 len is the length of the value not counting the NULL-byte (at the same
3208 time, ptr points to the key image, which starts with NULL-byte for
3209 nullable columns)
3210
3211 DESCRIPTION
3212 Copy the field value from its key image to the table record. The source
3213 is the value in key image format, occupying len bytes in buffer pointed
3214 by ptr. The destination is table record, in "field value in table record"
3215 format.
3216*/
3217
3218void store_key_image_to_rec(Field *field, uchar *ptr, uint len)
3219{
3220 /* Do the same as print_key() does */
3221 my_bitmap_map *old_map;
3222
3223 if (field->real_maybe_null())
3224 {
3225 if (*ptr)
3226 {
3227 field->set_null();
3228 return;
3229 }
3230 field->set_notnull();
3231 ptr++;
3232 }
3233 old_map= dbug_tmp_use_all_columns(field->table,
3234 field->table->write_set);
3235 field->set_key_image(ptr, len);
3236 dbug_tmp_restore_column_map(field->table->write_set, old_map);
3237}
3238
3239#ifdef WITH_PARTITION_STORAGE_ENGINE
3240
3241/*
3242 PartitionPruningModule
3243
3244 This part of the code does partition pruning. Partition pruning solves the
3245 following problem: given a query over partitioned tables, find partitions
3246 that we will not need to access (i.e. partitions that we can assume to be
3247 empty) when executing the query.
3248 The set of partitions to prune doesn't depend on which query execution
3249 plan will be used to execute the query.
3250
3251 HOW IT WORKS
3252
3253 Partition pruning module makes use of RangeAnalysisModule. The following
3254 examples show how the problem of partition pruning can be reduced to the
3255 range analysis problem:
3256
3257 EXAMPLE 1
3258 Consider a query:
3259
3260 SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z'
3261
3262 where table t1 is partitioned using PARTITION BY RANGE(t1.a). An apparent
3263 way to find the used (i.e. not pruned away) partitions is as follows:
3264
3265 1. analyze the WHERE clause and extract the list of intervals over t1.a
3266 for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)}
3267
3268 2. for each interval I
3269 {
3270 find partitions that have non-empty intersection with I;
3271 mark them as used;
3272 }
3273
3274 EXAMPLE 2
3275 Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then
3276 we need to:
3277
3278 1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b).
3279 The list of intervals we'll obtain will look like this:
3280 ((t1.a, t1.b) = (1,'foo')),
3281 ((t1.a, t1.b) = (2,'bar')),
3282 ((t1,a, t1.b) > (10,'zz'))
3283
3284 2. for each interval I
3285 {
3286 if (the interval has form "(t1.a, t1.b) = (const1, const2)" )
3287 {
3288 calculate HASH(part_func(t1.a, t1.b));
3289 find which partition has records with this hash value and mark
3290 it as used;
3291 }
3292 else
3293 {
3294 mark all partitions as used;
3295 break;
3296 }
3297 }
3298
3299 For both examples the step #1 is exactly what RangeAnalysisModule could
3300 be used to do, if it was provided with appropriate index description
3301 (array of KEY_PART structures).
3302 In example #1, we need to provide it with description of index(t1.a),
3303 in example #2, we need to provide it with description of index(t1.a, t1.b).
3304
3305 These index descriptions are further called "partitioning index
3306 descriptions". Note that it doesn't matter if such indexes really exist,
3307 as range analysis module only uses the description.
3308
3309 Putting it all together, partitioning module works as follows:
3310
3311 prune_partitions() {
3312 call create_partition_index_description();
3313
3314 call get_mm_tree(); // invoke the RangeAnalysisModule
3315
3316 // analyze the obtained interval list and get used partitions
3317 call find_used_partitions();
3318 }
3319
3320*/
3321
3322struct st_part_prune_param;
3323struct st_part_opt_info;
3324
3325typedef void (*mark_full_part_func)(partition_info*, uint32);
3326
3327/*
3328 Partition pruning operation context
3329*/
3330typedef struct st_part_prune_param
3331{
3332 RANGE_OPT_PARAM range_param; /* Range analyzer parameters */
3333
3334 /***************************************************************
3335 Following fields are filled in based solely on partitioning
3336 definition and not modified after that:
3337 **************************************************************/
3338 partition_info *part_info; /* Copy of table->part_info */
3339 /* Function to get partition id from partitioning fields only */
3340 get_part_id_func get_top_partition_id_func;
3341 /* Function to mark a partition as used (w/all subpartitions if they exist)*/
3342 mark_full_part_func mark_full_partition_used;
3343
3344 /* Partitioning 'index' description, array of key parts */
3345 KEY_PART *key;
3346
3347 /*
3348 Number of fields in partitioning 'index' definition created for
3349 partitioning (0 if partitioning 'index' doesn't include partitioning
3350 fields)
3351 */
3352 uint part_fields;
3353 uint subpart_fields; /* Same as above for subpartitioning */
3354
3355 /*
3356 Number of the last partitioning field keypart in the index, or -1 if
3357 partitioning index definition doesn't include partitioning fields.
3358 */
3359 int last_part_partno;
3360 int last_subpart_partno; /* Same as above for supartitioning */
3361
3362 /*
3363 is_part_keypart[i] == MY_TEST(keypart #i in partitioning index is a member
3364 used in partitioning)
3365 Used to maintain current values of cur_part_fields and cur_subpart_fields
3366 */
3367 my_bool *is_part_keypart;
3368 /* Same as above for subpartitioning */
3369 my_bool *is_subpart_keypart;
3370
3371 my_bool ignore_part_fields; /* Ignore rest of partioning fields */
3372
3373 /***************************************************************
3374 Following fields form find_used_partitions() recursion context:
3375 **************************************************************/
3376 SEL_ARG **arg_stack; /* "Stack" of SEL_ARGs */
3377 SEL_ARG **arg_stack_end; /* Top of the stack */
3378 /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */
3379 uint cur_part_fields;
3380 /* Same as cur_part_fields, but for subpartitioning */
3381 uint cur_subpart_fields;
3382
3383 /* Iterator to be used to obtain the "current" set of used partitions */
3384 PARTITION_ITERATOR part_iter;
3385
3386 /* Initialized bitmap of num_subparts size */
3387 MY_BITMAP subparts_bitmap;
3388
3389 uchar *cur_min_key;
3390 uchar *cur_max_key;
3391
3392 uint cur_min_flag, cur_max_flag;
3393} PART_PRUNE_PARAM;
3394
3395static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par);
3396static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree);
3397static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar,
3398 SEL_IMERGE *imerge);
3399static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3400 List<SEL_IMERGE> &merges);
3401static void mark_all_partitions_as_used(partition_info *part_info);
3402
3403#ifndef DBUG_OFF
3404static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end);
3405static void dbug_print_field(Field *field);
3406static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part);
3407static void dbug_print_singlepoint_range(SEL_ARG **start, uint num);
3408#endif
3409
3410
3411/**
3412 Perform partition pruning for a given table and condition.
3413
3414 @param thd Thread handle
3415 @param table Table to perform partition pruning for
3416 @param pprune_cond Condition to use for partition pruning
3417
3418 @note This function assumes that lock_partitions are setup when it
3419 is invoked. The function analyzes the condition, finds partitions that
3420 need to be used to retrieve the records that match the condition, and
3421 marks them as used by setting appropriate bit in part_info->read_partitions
3422 In the worst case all partitions are marked as used. If the table is not
3423 yet locked, it will also unset bits in part_info->lock_partitions that is
3424 not set in read_partitions.
3425
3426 This function returns promptly if called for non-partitioned table.
3427
3428 @return Operation status
3429 @retval true Failure
3430 @retval false Success
3431*/
3432
3433bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
3434{
3435 bool retval= FALSE;
3436 partition_info *part_info = table->part_info;
3437 DBUG_ENTER("prune_partitions");
3438
3439 if (!part_info)
3440 DBUG_RETURN(FALSE); /* not a partitioned table */
3441
3442 if (!pprune_cond)
3443 {
3444 mark_all_partitions_as_used(part_info);
3445 DBUG_RETURN(FALSE);
3446 }
3447
3448 PART_PRUNE_PARAM prune_param;
3449 MEM_ROOT alloc;
3450 RANGE_OPT_PARAM *range_par= &prune_param.range_param;
3451 my_bitmap_map *old_sets[2];
3452
3453 prune_param.part_info= part_info;
3454 init_sql_alloc(&alloc, "prune_partitions",
3455 thd->variables.range_alloc_block_size, 0,
3456 MYF(MY_THREAD_SPECIFIC));
3457 range_par->mem_root= &alloc;
3458 range_par->old_root= thd->mem_root;
3459
3460 if (create_partition_index_description(&prune_param))
3461 {
3462 mark_all_partitions_as_used(part_info);
3463 free_root(&alloc,MYF(0)); // Return memory & allocator
3464 DBUG_RETURN(FALSE);
3465 }
3466
3467 dbug_tmp_use_all_columns(table, old_sets,
3468 table->read_set, table->write_set);
3469 range_par->thd= thd;
3470 range_par->table= table;
3471 /* range_par->cond doesn't need initialization */
3472 range_par->prev_tables= range_par->read_tables= 0;
3473 range_par->current_table= table->map;
3474 /* It should be possible to switch the following ON: */
3475 range_par->remove_false_where_parts= false;
3476
3477 range_par->keys= 1; // one index
3478 range_par->using_real_indexes= FALSE;
3479 range_par->remove_jump_scans= FALSE;
3480 range_par->real_keynr[0]= 0;
3481 range_par->alloced_sel_args= 0;
3482
3483 thd->no_errors=1; // Don't warn about NULL
3484 thd->mem_root=&alloc;
3485
3486 bitmap_clear_all(&part_info->read_partitions);
3487
3488 prune_param.key= prune_param.range_param.key_parts;
3489 SEL_TREE *tree;
3490 int res;
3491
3492 tree= pprune_cond->get_mm_tree(range_par, &pprune_cond);
3493 if (!tree)
3494 goto all_used;
3495
3496 if (tree->type == SEL_TREE::IMPOSSIBLE)
3497 {
3498 retval= TRUE;
3499 goto end;
3500 }
3501
3502 if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
3503 goto all_used;
3504
3505 if (tree->merges.is_empty())
3506 {
3507 /* Range analysis has produced a single list of intervals. */
3508 prune_param.arg_stack_end= prune_param.arg_stack;
3509 prune_param.cur_part_fields= 0;
3510 prune_param.cur_subpart_fields= 0;
3511
3512 prune_param.cur_min_key= prune_param.range_param.min_key;
3513 prune_param.cur_max_key= prune_param.range_param.max_key;
3514 prune_param.cur_min_flag= prune_param.cur_max_flag= 0;
3515
3516 init_all_partitions_iterator(part_info, &prune_param.part_iter);
3517 if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param,
3518 tree->keys[0]))))
3519 goto all_used;
3520 }
3521 else
3522 {
3523 if (tree->merges.elements == 1)
3524 {
3525 /*
3526 Range analysis has produced a "merge" of several intervals lists, a
3527 SEL_TREE that represents an expression in form
3528 sel_imerge = (tree1 OR tree2 OR ... OR treeN)
3529 that cannot be reduced to one tree. This can only happen when
3530 partitioning index has several keyparts and the condition is OR of
3531 conditions that refer to different key parts. For example, we'll get
3532 here for "partitioning_field=const1 OR subpartitioning_field=const2"
3533 */
3534 if (-1 == (res= find_used_partitions_imerge(&prune_param,
3535 tree->merges.head())))
3536 goto all_used;
3537 }
3538 else
3539 {
3540 /*
3541 Range analysis has produced a list of several imerges, i.e. a
3542 structure that represents a condition in form
3543 imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN)
3544 This is produced for complicated WHERE clauses that range analyzer
3545 can't really analyze properly.
3546 */
3547 if (-1 == (res= find_used_partitions_imerge_list(&prune_param,
3548 tree->merges)))
3549 goto all_used;
3550 }
3551 }
3552
3553 /*
3554 res == 0 => no used partitions => retval=TRUE
3555 res == 1 => some used partitions => retval=FALSE
3556 res == -1 - we jump over this line to all_used:
3557 */
3558 retval= MY_TEST(!res);
3559 goto end;
3560
3561all_used:
3562 retval= FALSE; // some partitions are used
3563 mark_all_partitions_as_used(prune_param.part_info);
3564end:
3565 dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets);
3566 thd->no_errors=0;
3567 thd->mem_root= range_par->old_root;
3568 free_root(&alloc,MYF(0)); // Return memory & allocator
3569 /*
3570 Must be a subset of the locked partitions.
3571 lock_partitions contains the partitions marked by explicit partition
3572 selection (... t PARTITION (pX) ...) and we must only use partitions
3573 within that set.
3574 */
3575 bitmap_intersect(&prune_param.part_info->read_partitions,
3576 &prune_param.part_info->lock_partitions);
3577 /*
3578 If not yet locked, also prune partitions to lock if not UPDATEing
3579 partition key fields. This will also prune lock_partitions if we are under
3580 LOCK TABLES, so prune away calls to start_stmt().
3581 TODO: enhance this prune locking to also allow pruning of
3582 'UPDATE t SET part_key = const WHERE cond_is_prunable' so it adds
3583 a lock for part_key partition.
3584 */
3585 if (table->file->get_lock_type() == F_UNLCK &&
3586 !partition_key_modified(table, table->write_set))
3587 {
3588 bitmap_copy(&prune_param.part_info->lock_partitions,
3589 &prune_param.part_info->read_partitions);
3590 }
3591 if (bitmap_is_clear_all(&(prune_param.part_info->read_partitions)))
3592 {
3593 table->all_partitions_pruned_away= true;
3594 retval= TRUE;
3595 }
3596 DBUG_RETURN(retval);
3597}
3598
3599
3600/*
3601 For SEL_ARG* array, store sel_arg->min values into table record buffer
3602
3603 SYNOPSIS
3604 store_selargs_to_rec()
3605 ppar Partition pruning context
3606 start Array of SEL_ARG* for which the minimum values should be stored
3607 num Number of elements in the array
3608
3609 DESCRIPTION
3610 For each SEL_ARG* interval in the specified array, store the left edge
3611 field value (sel_arg->min, key image format) into the table record.
3612*/
3613
3614static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start,
3615 int num)
3616{
3617 KEY_PART *parts= ppar->range_param.key_parts;
3618 for (SEL_ARG **end= start + num; start != end; start++)
3619 {
3620 SEL_ARG *sel_arg= (*start);
3621 store_key_image_to_rec(sel_arg->field, sel_arg->min_value,
3622 parts[sel_arg->part].length);
3623 }
3624}
3625
3626
3627/* Mark a partition as used in the case when there are no subpartitions */
3628static void mark_full_partition_used_no_parts(partition_info* part_info,
3629 uint32 part_id)
3630{
3631 DBUG_ENTER("mark_full_partition_used_no_parts");
3632 DBUG_PRINT("enter", ("Mark partition %u as used", part_id));
3633 bitmap_set_bit(&part_info->read_partitions, part_id);
3634 DBUG_VOID_RETURN;
3635}
3636
3637
3638/* Mark a partition as used in the case when there are subpartitions */
3639static void mark_full_partition_used_with_parts(partition_info *part_info,
3640 uint32 part_id)
3641{
3642 uint32 start= part_id * part_info->num_subparts;
3643 uint32 end= start + part_info->num_subparts;
3644 DBUG_ENTER("mark_full_partition_used_with_parts");
3645
3646 for (; start != end; start++)
3647 {
3648 DBUG_PRINT("info", ("1:Mark subpartition %u as used", start));
3649 bitmap_set_bit(&part_info->read_partitions, start);
3650 }
3651 DBUG_VOID_RETURN;
3652}
3653
3654/*
3655 Find the set of used partitions for List<SEL_IMERGE>
3656 SYNOPSIS
3657 find_used_partitions_imerge_list
3658 ppar Partition pruning context.
3659 key_tree Intervals tree to perform pruning for.
3660
3661 DESCRIPTION
3662 List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...".
3663 The set of used partitions is an intersection of used partitions sets
3664 for imerge_{i}.
3665 We accumulate this intersection in a separate bitmap.
3666
3667 RETURN
3668 See find_used_partitions()
3669*/
3670
3671static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
3672 List<SEL_IMERGE> &merges)
3673{
3674 MY_BITMAP all_merges;
3675 uint bitmap_bytes;
3676 my_bitmap_map *bitmap_buf;
3677 uint n_bits= ppar->part_info->read_partitions.n_bits;
3678 bitmap_bytes= bitmap_buffer_size(n_bits);
3679 if (!(bitmap_buf= (my_bitmap_map*) alloc_root(ppar->range_param.mem_root,
3680 bitmap_bytes)))
3681 {
3682 /*
3683 Fallback, process just the first SEL_IMERGE. This can leave us with more
3684 partitions marked as used then actually needed.
3685 */
3686 return find_used_partitions_imerge(ppar, merges.head());
3687 }
3688 my_bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE);
3689 bitmap_set_prefix(&all_merges, n_bits);
3690
3691 List_iterator<SEL_IMERGE> it(merges);
3692 SEL_IMERGE *imerge;
3693 while ((imerge=it++))
3694 {
3695 int res= find_used_partitions_imerge(ppar, imerge);
3696 if (!res)
3697 {
3698 /* no used partitions on one ANDed imerge => no used partitions at all */
3699 return 0;
3700 }
3701
3702 if (res != -1)
3703 bitmap_intersect(&all_merges, &ppar->part_info->read_partitions);
3704
3705
3706 if (bitmap_is_clear_all(&all_merges))
3707 return 0;
3708
3709 bitmap_clear_all(&ppar->part_info->read_partitions);
3710 }
3711 memcpy(ppar->part_info->read_partitions.bitmap, all_merges.bitmap,
3712 bitmap_bytes);
3713 return 1;
3714}
3715
3716
3717/*
3718 Find the set of used partitions for SEL_IMERGE structure
3719 SYNOPSIS
3720 find_used_partitions_imerge()
3721 ppar Partition pruning context.
3722 key_tree Intervals tree to perform pruning for.
3723
3724 DESCRIPTION
3725 SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is
3726 trivial - just use mark used partitions for each tree and bail out early
3727 if for some tree_{i} all partitions are used.
3728
3729 RETURN
3730 See find_used_partitions().
3731*/
3732
3733static
3734int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge)
3735{
3736 int res= 0;
3737 for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
3738 {
3739 ppar->arg_stack_end= ppar->arg_stack;
3740 ppar->cur_part_fields= 0;
3741 ppar->cur_subpart_fields= 0;
3742
3743 ppar->cur_min_key= ppar->range_param.min_key;
3744 ppar->cur_max_key= ppar->range_param.max_key;
3745 ppar->cur_min_flag= ppar->cur_max_flag= 0;
3746
3747 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
3748 SEL_ARG *key_tree= (*ptree)->keys[0];
3749 if (!key_tree || (-1 == (res |= find_used_partitions(ppar, key_tree))))
3750 return -1;
3751 }
3752 return res;
3753}
3754
3755
3756/*
3757 Collect partitioning ranges for the SEL_ARG tree and mark partitions as used
3758
3759 SYNOPSIS
3760 find_used_partitions()
3761 ppar Partition pruning context.
3762 key_tree SEL_ARG range tree to perform pruning for
3763
3764 DESCRIPTION
3765 This function
3766 * recursively walks the SEL_ARG* tree collecting partitioning "intervals"
3767 * finds the partitions one needs to use to get rows in these intervals
3768 * marks these partitions as used.
3769 The next session desribes the process in greater detail.
3770
3771 IMPLEMENTATION
3772 TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR
3773 We can find out which [sub]partitions to use if we obtain restrictions on
3774 [sub]partitioning fields in the following form:
3775 1. "partition_field1=const1 AND ... AND partition_fieldN=constN"
3776 1.1 Same as (1) but for subpartition fields
3777
3778 If partitioning supports interval analysis (i.e. partitioning is a
3779 function of a single table field, and partition_info::
3780 get_part_iter_for_interval != NULL), then we can also use condition in
3781 this form:
3782 2. "const1 <=? partition_field <=? const2"
3783 2.1 Same as (2) but for subpartition_field
3784
3785 INFERRING THE RESTRICTIONS FROM SEL_ARG TREE
3786
3787 The below is an example of what SEL_ARG tree may represent:
3788
3789 (start)
3790 | $
3791 | Partitioning keyparts $ subpartitioning keyparts
3792 | $
3793 | ... ... $
3794 | | | $
3795 | +---------+ +---------+ $ +-----------+ +-----------+
3796 \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|
3797 +---------+ +---------+ $ +-----------+ +-----------+
3798 | $ | |
3799 | $ | +-----------+
3800 | $ | | subpar2=c6|
3801 | $ | +-----------+
3802 | $ |
3803 | $ +-----------+ +-----------+
3804 | $ | subpar1=c4|--| subpar2=c8|
3805 | $ +-----------+ +-----------+
3806 | $
3807 | $
3808 +---------+ $ +------------+ +------------+
3809 | par1=c2 |------------------| subpar1=c10|--| subpar2=c12|
3810 +---------+ $ +------------+ +------------+
3811 | $
3812 ... $
3813
3814 The up-down connections are connections via SEL_ARG::left and
3815 SEL_ARG::right. A horizontal connection to the right is the
3816 SEL_ARG::next_key_part connection.
3817
3818 find_used_partitions() traverses the entire tree via recursion on
3819 * SEL_ARG::next_key_part (from left to right on the picture)
3820 * SEL_ARG::left|right (up/down on the pic). Left-right recursion is
3821 performed for each depth level.
3822
3823 Recursion descent on SEL_ARG::next_key_part is used to accumulate (in
3824 ppar->arg_stack) constraints on partitioning and subpartitioning fields.
3825 For the example in the above picture, one of stack states is:
3826 in find_used_partitions(key_tree = "subpar2=c5") (***)
3827 in find_used_partitions(key_tree = "subpar1=c3")
3828 in find_used_partitions(key_tree = "par2=c2") (**)
3829 in find_used_partitions(key_tree = "par1=c1")
3830 in prune_partitions(...)
3831 We apply partitioning limits as soon as possible, e.g. when we reach the
3832 depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2",
3833 and save them in ppar->part_iter.
3834 When we reach the depth (***), we find which subpartition(s) correspond to
3835 "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in
3836 appropriate subpartitions as used.
3837
3838 It is possible that constraints on some partitioning fields are missing.
3839 For the above example, consider this stack state:
3840 in find_used_partitions(key_tree = "subpar2=c12") (***)
3841 in find_used_partitions(key_tree = "subpar1=c10")
3842 in find_used_partitions(key_tree = "par1=c2")
3843 in prune_partitions(...)
3844 Here we don't have constraints for all partitioning fields. Since we've
3845 never set the ppar->part_iter to contain used set of partitions, we use
3846 its default "all partitions" value. We get subpartition id for
3847 "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every
3848 partition.
3849
3850 The inverse is also possible: we may get constraints on partitioning
3851 fields, but not constraints on subpartitioning fields. In that case,
3852 calls to find_used_partitions() with depth below (**) will return -1,
3853 and we will mark entire partition as used.
3854
3855 TODO
3856 Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop
3857
3858 RETURN
3859 1 OK, one or more [sub]partitions are marked as used.
3860 0 The passed condition doesn't match any partitions
3861 -1 Couldn't infer any partition pruning "intervals" from the passed
3862 SEL_ARG* tree (which means that all partitions should be marked as
3863 used) Marking partitions as used is the responsibility of the caller.
3864*/
3865
3866static
3867int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
3868{
3869 int res, left_res=0, right_res=0;
3870 int key_tree_part= (int)key_tree->part;
3871 bool set_full_part_if_bad_ret= FALSE;
3872 bool ignore_part_fields= ppar->ignore_part_fields;
3873 bool did_set_ignore_part_fields= FALSE;
3874 RANGE_OPT_PARAM *range_par= &(ppar->range_param);
3875
3876 if (check_stack_overrun(range_par->thd, 3*STACK_MIN_SIZE, NULL))
3877 return -1;
3878
3879 if (key_tree->left != &null_element)
3880 {
3881 if (-1 == (left_res= find_used_partitions(ppar,key_tree->left)))
3882 return -1;
3883 }
3884
3885 /* Push SEL_ARG's to stack to enable looking backwards as well */
3886 ppar->cur_part_fields+= ppar->is_part_keypart[key_tree_part];
3887 ppar->cur_subpart_fields+= ppar->is_subpart_keypart[key_tree_part];
3888 *(ppar->arg_stack_end++)= key_tree;
3889
3890 if (ignore_part_fields)
3891 {
3892 /*
3893 We come here when a condition on the first partitioning
3894 fields led to evaluating the partitioning condition
3895 (due to finding a condition of the type a < const or
3896 b > const). Thus we must ignore the rest of the
3897 partitioning fields but we still want to analyse the
3898 subpartitioning fields.
3899 */
3900 if (key_tree->next_key_part)
3901 res= find_used_partitions(ppar, key_tree->next_key_part);
3902 else
3903 res= -1;
3904 goto pop_and_go_right;
3905 }
3906
3907 if (key_tree->type == SEL_ARG::KEY_RANGE)
3908 {
3909 if (ppar->part_info->get_part_iter_for_interval &&
3910 key_tree->part <= ppar->last_part_partno)
3911 {
3912 /* Collect left and right bound, their lengths and flags */
3913 uchar *min_key= ppar->cur_min_key;
3914 uchar *max_key= ppar->cur_max_key;
3915 uchar *tmp_min_key= min_key;
3916 uchar *tmp_max_key= max_key;
3917 key_tree->store_min(ppar->key[key_tree->part].store_length,
3918 &tmp_min_key, ppar->cur_min_flag);
3919 key_tree->store_max(ppar->key[key_tree->part].store_length,
3920 &tmp_max_key, ppar->cur_max_flag);
3921 uint flag;
3922 if (key_tree->next_key_part &&
3923 key_tree->next_key_part->part == key_tree->part+1 &&
3924 key_tree->next_key_part->part <= ppar->last_part_partno &&
3925 key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
3926 {
3927 /*
3928 There are more key parts for partition pruning to handle
3929 This mainly happens when the condition is an equality
3930 condition.
3931 */
3932 if ((tmp_min_key - min_key) == (tmp_max_key - max_key) &&
3933 (memcmp(min_key, max_key, (uint)(tmp_max_key - max_key)) == 0) &&
3934 !key_tree->min_flag && !key_tree->max_flag)
3935 {
3936 /* Set 'parameters' */
3937 ppar->cur_min_key= tmp_min_key;
3938 ppar->cur_max_key= tmp_max_key;
3939 uint save_min_flag= ppar->cur_min_flag;
3940 uint save_max_flag= ppar->cur_max_flag;
3941
3942 ppar->cur_min_flag|= key_tree->min_flag;
3943 ppar->cur_max_flag|= key_tree->max_flag;
3944
3945 res= find_used_partitions(ppar, key_tree->next_key_part);
3946
3947 /* Restore 'parameters' back */
3948 ppar->cur_min_key= min_key;
3949 ppar->cur_max_key= max_key;
3950
3951 ppar->cur_min_flag= save_min_flag;
3952 ppar->cur_max_flag= save_max_flag;
3953 goto pop_and_go_right;
3954 }
3955 /* We have arrived at the last field in the partition pruning */
3956 uint tmp_min_flag= key_tree->min_flag,
3957 tmp_max_flag= key_tree->max_flag;
3958 if (!tmp_min_flag)
3959 key_tree->next_key_part->store_min_key(ppar->key,
3960 &tmp_min_key,
3961 &tmp_min_flag,
3962 ppar->last_part_partno);
3963 if (!tmp_max_flag)
3964 key_tree->next_key_part->store_max_key(ppar->key,
3965 &tmp_max_key,
3966 &tmp_max_flag,
3967 ppar->last_part_partno);
3968 flag= tmp_min_flag | tmp_max_flag;
3969 }
3970 else
3971 flag= key_tree->min_flag | key_tree->max_flag;
3972
3973 if (tmp_min_key != range_par->min_key)
3974 flag&= ~NO_MIN_RANGE;
3975 else
3976 flag|= NO_MIN_RANGE;
3977 if (tmp_max_key != range_par->max_key)
3978 flag&= ~NO_MAX_RANGE;
3979 else
3980 flag|= NO_MAX_RANGE;
3981
3982 /*
3983 We need to call the interval mapper if we have a condition which
3984 makes sense to prune on. In the example of COLUMNS on a and
3985 b it makes sense if we have a condition on a, or conditions on
3986 both a and b. If we only have conditions on b it might make sense
3987 but this is a harder case we will solve later. For the harder case
3988 this clause then turns into use of all partitions and thus we
3989 simply set res= -1 as if the mapper had returned that.
3990 TODO: What to do here is defined in WL#4065.
3991 */
3992 if (ppar->arg_stack[0]->part == 0 || ppar->part_info->part_type == VERSIONING_PARTITION)
3993 {
3994 uint32 i;
3995 uint32 store_length_array[MAX_KEY];
3996 uint32 num_keys= ppar->part_fields;
3997
3998 for (i= 0; i < num_keys; i++)
3999 store_length_array[i]= ppar->key[i].store_length;
4000 res= ppar->part_info->
4001 get_part_iter_for_interval(ppar->part_info,
4002 FALSE,
4003 store_length_array,
4004 range_par->min_key,
4005 range_par->max_key,
4006 (uint)(tmp_min_key - range_par->min_key),
4007 (uint)(tmp_max_key - range_par->max_key),
4008 flag,
4009 &ppar->part_iter);
4010 if (!res)
4011 goto pop_and_go_right; /* res==0 --> no satisfying partitions */
4012 }
4013 else
4014 res= -1;
4015
4016 if (res == -1)
4017 {
4018 /* get a full range iterator */
4019 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4020 }
4021 /*
4022 Save our intent to mark full partition as used if we will not be able
4023 to obtain further limits on subpartitions
4024 */
4025 if (key_tree_part < ppar->last_part_partno)
4026 {
4027 /*
4028 We need to ignore the rest of the partitioning fields in all
4029 evaluations after this
4030 */
4031 did_set_ignore_part_fields= TRUE;
4032 ppar->ignore_part_fields= TRUE;
4033 }
4034 set_full_part_if_bad_ret= TRUE;
4035 goto process_next_key_part;
4036 }
4037
4038 if (key_tree_part == ppar->last_subpart_partno &&
4039 (NULL != ppar->part_info->get_subpart_iter_for_interval))
4040 {
4041 PARTITION_ITERATOR subpart_iter;
4042 DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
4043 range_par->key_parts););
4044 res= ppar->part_info->
4045 get_subpart_iter_for_interval(ppar->part_info,
4046 TRUE,
4047 NULL, /* Currently not used here */
4048 key_tree->min_value,
4049 key_tree->max_value,
4050 0, 0, /* Those are ignored here */
4051 key_tree->min_flag |
4052 key_tree->max_flag,
4053 &subpart_iter);
4054 if (res == 0)
4055 {
4056 /*
4057 The only case where we can get "no satisfying subpartitions"
4058 returned from the above call is when an error has occurred.
4059 */
4060 DBUG_ASSERT(range_par->thd->is_error());
4061 return 0;
4062 }
4063
4064 if (res == -1)
4065 goto pop_and_go_right; /* all subpartitions satisfy */
4066
4067 uint32 subpart_id;
4068 bitmap_clear_all(&ppar->subparts_bitmap);
4069 while ((subpart_id= subpart_iter.get_next(&subpart_iter)) !=
4070 NOT_A_PARTITION_ID)
4071 bitmap_set_bit(&ppar->subparts_bitmap, subpart_id);
4072
4073 /* Mark each partition as used in each subpartition. */
4074 uint32 part_id;
4075 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4076 NOT_A_PARTITION_ID)
4077 {
4078 for (uint i= 0; i < ppar->part_info->num_subparts; i++)
4079 if (bitmap_is_set(&ppar->subparts_bitmap, i))
4080 bitmap_set_bit(&ppar->part_info->read_partitions,
4081 part_id * ppar->part_info->num_subparts + i);
4082 }
4083 goto pop_and_go_right;
4084 }
4085
4086 if (key_tree->is_singlepoint())
4087 {
4088 if (key_tree_part == ppar->last_part_partno &&
4089 ppar->cur_part_fields == ppar->part_fields &&
4090 ppar->part_info->get_part_iter_for_interval == NULL)
4091 {
4092 /*
4093 Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning
4094 fields. Save all constN constants into table record buffer.
4095 */
4096 store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields);
4097 DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack,
4098 ppar->part_fields););
4099 uint32 part_id;
4100 longlong func_value;
4101 /* Find in which partition the {const1, ...,constN} tuple goes */
4102 if (ppar->get_top_partition_id_func(ppar->part_info, &part_id,
4103 &func_value))
4104 {
4105 res= 0; /* No satisfying partitions */
4106 goto pop_and_go_right;
4107 }
4108 /* Rembember the limit we got - single partition #part_id */
4109 init_single_partition_iterator(part_id, &ppar->part_iter);
4110
4111 /*
4112 If there are no subpartitions/we fail to get any limit for them,
4113 then we'll mark full partition as used.
4114 */
4115 set_full_part_if_bad_ret= TRUE;
4116 goto process_next_key_part;
4117 }
4118
4119 if (key_tree_part == ppar->last_subpart_partno &&
4120 ppar->cur_subpart_fields == ppar->subpart_fields)
4121 {
4122 /*
4123 Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning
4124 fields. Save all constN constants into table record buffer.
4125 */
4126 store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields,
4127 ppar->subpart_fields);
4128 DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end-
4129 ppar->subpart_fields,
4130 ppar->subpart_fields););
4131 /* Find the subpartition (it's HASH/KEY so we always have one) */
4132 partition_info *part_info= ppar->part_info;
4133 uint32 part_id, subpart_id;
4134
4135 if (part_info->get_subpartition_id(part_info, &subpart_id))
4136 return 0;
4137
4138 /* Mark this partition as used in each subpartition. */
4139 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4140 NOT_A_PARTITION_ID)
4141 {
4142 bitmap_set_bit(&part_info->read_partitions,
4143 part_id * part_info->num_subparts + subpart_id);
4144 }
4145 res= 1; /* Some partitions were marked as used */
4146 goto pop_and_go_right;
4147 }
4148 }
4149 else
4150 {
4151 /*
4152 Can't handle condition on current key part. If we're that deep that
4153 we're processing subpartititoning's key parts, this means we'll not be
4154 able to infer any suitable condition, so bail out.
4155 */
4156 if (key_tree_part >= ppar->last_part_partno)
4157 {
4158 res= -1;
4159 goto pop_and_go_right;
4160 }
4161 /*
4162 No meaning in continuing with rest of partitioning key parts.
4163 Will try to continue with subpartitioning key parts.
4164 */
4165 ppar->ignore_part_fields= true;
4166 did_set_ignore_part_fields= true;
4167 goto process_next_key_part;
4168 }
4169 }
4170
4171process_next_key_part:
4172 if (key_tree->next_key_part)
4173 res= find_used_partitions(ppar, key_tree->next_key_part);
4174 else
4175 res= -1;
4176
4177 if (did_set_ignore_part_fields)
4178 {
4179 /*
4180 We have returned from processing all key trees linked to our next
4181 key part. We are ready to be moving down (using right pointers) and
4182 this tree is a new evaluation requiring its own decision on whether
4183 to ignore partitioning fields.
4184 */
4185 ppar->ignore_part_fields= FALSE;
4186 }
4187 if (set_full_part_if_bad_ret)
4188 {
4189 if (res == -1)
4190 {
4191 /* Got "full range" for subpartitioning fields */
4192 uint32 part_id;
4193 bool found= FALSE;
4194 while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
4195 NOT_A_PARTITION_ID)
4196 {
4197 ppar->mark_full_partition_used(ppar->part_info, part_id);
4198 found= TRUE;
4199 }
4200 res= MY_TEST(found);
4201 }
4202 /*
4203 Restore the "used partitions iterator" to the default setting that
4204 specifies iteration over all partitions.
4205 */
4206 init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
4207 }
4208
4209pop_and_go_right:
4210 /* Pop this key part info off the "stack" */
4211 ppar->arg_stack_end--;
4212 ppar->cur_part_fields-= ppar->is_part_keypart[key_tree_part];
4213 ppar->cur_subpart_fields-= ppar->is_subpart_keypart[key_tree_part];
4214
4215 if (res == -1)
4216 return -1;
4217 if (key_tree->right != &null_element)
4218 {
4219 if (-1 == (right_res= find_used_partitions(ppar,key_tree->right)))
4220 return -1;
4221 }
4222 return (left_res || right_res || res);
4223}
4224
4225
4226static void mark_all_partitions_as_used(partition_info *part_info)
4227{
4228 bitmap_copy(&(part_info->read_partitions),
4229 &(part_info->lock_partitions));
4230}
4231
4232
4233/*
4234 Check if field types allow to construct partitioning index description
4235
4236 SYNOPSIS
4237 fields_ok_for_partition_index()
4238 pfield NULL-terminated array of pointers to fields.
4239
4240 DESCRIPTION
4241 For an array of fields, check if we can use all of the fields to create
4242 partitioning index description.
4243
4244 We can't process GEOMETRY fields - for these fields singlepoint intervals
4245 cant be generated, and non-singlepoint are "special" kinds of intervals
4246 to which our processing logic can't be applied.
4247
4248 It is not known if we could process ENUM fields, so they are disabled to be
4249 on the safe side.
4250
4251 RETURN
4252 TRUE Yes, fields can be used in partitioning index
4253 FALSE Otherwise
4254*/
4255
4256static bool fields_ok_for_partition_index(Field **pfield)
4257{
4258 if (!pfield)
4259 return FALSE;
4260 for (; (*pfield); pfield++)
4261 {
4262 enum_field_types ftype= (*pfield)->real_type();
4263 if (ftype == MYSQL_TYPE_ENUM || ftype == MYSQL_TYPE_GEOMETRY)
4264 return FALSE;
4265 }
4266 return TRUE;
4267}
4268
4269
4270/*
4271 Create partition index description and fill related info in the context
4272 struct
4273
4274 SYNOPSIS
4275 create_partition_index_description()
4276 prune_par INOUT Partition pruning context
4277
4278 DESCRIPTION
4279 Create partition index description. Partition index description is:
4280
4281 part_index(used_fields_list(part_expr), used_fields_list(subpart_expr))
4282
4283 If partitioning/sub-partitioning uses BLOB or Geometry fields, then
4284 corresponding fields_list(...) is not included into index description
4285 and we don't perform partition pruning for partitions/subpartitions.
4286
4287 RETURN
4288 TRUE Out of memory or can't do partition pruning at all
4289 FALSE OK
4290*/
4291
4292static bool create_partition_index_description(PART_PRUNE_PARAM *ppar)
4293{
4294 RANGE_OPT_PARAM *range_par= &(ppar->range_param);
4295 partition_info *part_info= ppar->part_info;
4296 uint used_part_fields, used_subpart_fields;
4297
4298 used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ?
4299 part_info->num_part_fields : 0;
4300 used_subpart_fields=
4301 fields_ok_for_partition_index(part_info->subpart_field_array)?
4302 part_info->num_subpart_fields : 0;
4303
4304 uint total_parts= used_part_fields + used_subpart_fields;
4305
4306 ppar->ignore_part_fields= FALSE;
4307 ppar->part_fields= used_part_fields;
4308 ppar->last_part_partno= (int)used_part_fields - 1;
4309
4310 ppar->subpart_fields= used_subpart_fields;
4311 ppar->last_subpart_partno=
4312 used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1;
4313
4314 if (part_info->is_sub_partitioned())
4315 {
4316 ppar->mark_full_partition_used= mark_full_partition_used_with_parts;
4317 ppar->get_top_partition_id_func= part_info->get_part_partition_id;
4318 }
4319 else
4320 {
4321 ppar->mark_full_partition_used= mark_full_partition_used_no_parts;
4322 ppar->get_top_partition_id_func= part_info->get_partition_id;
4323 }
4324
4325 KEY_PART *key_part;
4326 MEM_ROOT *alloc= range_par->mem_root;
4327 if (!total_parts ||
4328 !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)*
4329 total_parts)) ||
4330 !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)*
4331 total_parts)) ||
4332 !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4333 total_parts)) ||
4334 !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
4335 total_parts)))
4336 return TRUE;
4337
4338 if (ppar->subpart_fields)
4339 {
4340 my_bitmap_map *buf;
4341 uint32 bufsize= bitmap_buffer_size(ppar->part_info->num_subparts);
4342 if (!(buf= (my_bitmap_map*) alloc_root(alloc, bufsize)))
4343 return TRUE;
4344 my_bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->num_subparts,
4345 FALSE);
4346 }
4347 range_par->key_parts= key_part;
4348 Field **field= (ppar->part_fields)? part_info->part_field_array :
4349 part_info->subpart_field_array;
4350 bool in_subpart_fields= FALSE;
4351 uint total_key_len= 0;
4352 for (uint part= 0; part < total_parts; part++, key_part++)
4353 {
4354 key_part->key= 0;
4355 key_part->part= part;
4356 key_part->length= (uint16)(*field)->key_length();
4357 key_part->store_length= (uint16)get_partition_field_store_length(*field);
4358 total_key_len += key_part->store_length;
4359
4360 DBUG_PRINT("info", ("part %u length %u store_length %u", part,
4361 key_part->length, key_part->store_length));
4362
4363 key_part->field= (*field);
4364 key_part->image_type = Field::itRAW;
4365 /*
4366 We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked
4367 in the RangeAnalysisModule.
4368 */
4369 key_part->flag= 0;
4370 /* We don't set key_parts->null_bit as it will not be used */
4371
4372 ppar->is_part_keypart[part]= !in_subpart_fields;
4373 ppar->is_subpart_keypart[part]= in_subpart_fields;
4374
4375 /*
4376 Check if this was last field in this array, in this case we
4377 switch to subpartitioning fields. (This will only happens if
4378 there are subpartitioning fields to cater for).
4379 */
4380 if (!*(++field))
4381 {
4382 field= part_info->subpart_field_array;
4383 in_subpart_fields= TRUE;
4384 }
4385 }
4386 range_par->key_parts_end= key_part;
4387
4388 total_key_len++; /* Take into account the "+1" in QUICK_RANGE::QUICK_RANGE */
4389 if (!(range_par->min_key= (uchar*)alloc_root(alloc,total_key_len)) ||
4390 !(range_par->max_key= (uchar*)alloc_root(alloc,total_key_len)))
4391 {
4392 return true;
4393 }
4394
4395 DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts,
4396 range_par->key_parts_end););
4397 return FALSE;
4398}
4399
4400
4401#ifndef DBUG_OFF
4402
4403static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end)
4404{
4405 DBUG_ENTER("print_partitioning_index");
4406 DBUG_LOCK_FILE;
4407 fprintf(DBUG_FILE, "partitioning INDEX(");
4408 for (KEY_PART *p=parts; p != parts_end; p++)
4409 {
4410 fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name.str);
4411 }
4412 fputs(");\n", DBUG_FILE);
4413 DBUG_UNLOCK_FILE;
4414 DBUG_VOID_RETURN;
4415}
4416
4417/* Print field value into debug trace, in NULL-aware way. */
4418static void dbug_print_field(Field *field)
4419{
4420 if (field->is_real_null())
4421 fprintf(DBUG_FILE, "NULL");
4422 else
4423 {
4424 char buf[256];
4425 String str(buf, sizeof(buf), &my_charset_bin);
4426 str.length(0);
4427 String *pstr;
4428 pstr= field->val_str(&str);
4429 fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe());
4430 }
4431}
4432
4433
4434/* Print a "c1 < keypartX < c2" - type interval into debug trace. */
4435static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
4436{
4437 DBUG_ENTER("dbug_print_segment_range");
4438 DBUG_LOCK_FILE;
4439 if (!(arg->min_flag & NO_MIN_RANGE))
4440 {
4441 store_key_image_to_rec(part->field, arg->min_value, part->length);
4442 dbug_print_field(part->field);
4443 if (arg->min_flag & NEAR_MIN)
4444 fputs(" < ", DBUG_FILE);
4445 else
4446 fputs(" <= ", DBUG_FILE);
4447 }
4448
4449 fprintf(DBUG_FILE, "%s", part->field->field_name.str);
4450
4451 if (!(arg->max_flag & NO_MAX_RANGE))
4452 {
4453 if (arg->max_flag & NEAR_MAX)
4454 fputs(" < ", DBUG_FILE);
4455 else
4456 fputs(" <= ", DBUG_FILE);
4457 store_key_image_to_rec(part->field, arg->max_value, part->length);
4458 dbug_print_field(part->field);
4459 }
4460 fputs("\n", DBUG_FILE);
4461 DBUG_UNLOCK_FILE;
4462 DBUG_VOID_RETURN;
4463}
4464
4465
4466/*
4467 Print a singlepoint multi-keypart range interval to debug trace
4468
4469 SYNOPSIS
4470 dbug_print_singlepoint_range()
4471 start Array of SEL_ARG* ptrs representing conditions on key parts
4472 num Number of elements in the array.
4473
4474 DESCRIPTION
4475 This function prints a "keypartN=constN AND ... AND keypartK=constK"-type
4476 interval to debug trace.
4477*/
4478
4479static void dbug_print_singlepoint_range(SEL_ARG **start, uint num)
4480{
4481 DBUG_ENTER("dbug_print_singlepoint_range");
4482 DBUG_LOCK_FILE;
4483 SEL_ARG **end= start + num;
4484
4485 for (SEL_ARG **arg= start; arg != end; arg++)
4486 {
4487 Field *field= (*arg)->field;
4488 fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name.str);
4489 dbug_print_field(field);
4490 }
4491 fputs("\n", DBUG_FILE);
4492 DBUG_UNLOCK_FILE;
4493 DBUG_VOID_RETURN;
4494}
4495#endif
4496
4497/****************************************************************************
4498 * Partition pruning code ends
4499 ****************************************************************************/
4500#endif
4501
4502
4503/*
4504 Get cost of 'sweep' full records retrieval.
4505 SYNOPSIS
4506 get_sweep_read_cost()
4507 param Parameter from test_quick_select
4508 records # of records to be retrieved
4509 RETURN
4510 cost of sweep
4511*/
4512
4513double get_sweep_read_cost(const PARAM *param, ha_rows records)
4514{
4515 double result;
4516 DBUG_ENTER("get_sweep_read_cost");
4517 if (param->table->file->primary_key_is_clustered())
4518 {
4519 /*
4520 We are using the primary key to find the rows.
4521 Calculate the cost for this.
4522 */
4523 result= param->table->file->read_time(param->table->s->primary_key,
4524 (uint)records, records);
4525 }
4526 else
4527 {
4528 /*
4529 Rows will be retreived with rnd_pos(). Caluclate the expected
4530 cost for this.
4531 */
4532 double n_blocks=
4533 ceil(ulonglong2double(param->table->file->stats.data_file_length) /
4534 IO_SIZE);
4535 double busy_blocks=
4536 n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records)));
4537 if (busy_blocks < 1.0)
4538 busy_blocks= 1.0;
4539 DBUG_PRINT("info",("sweep: nblocks: %g, busy_blocks: %g", n_blocks,
4540 busy_blocks));
4541 /*
4542 Disabled: Bail out if # of blocks to read is bigger than # of blocks in
4543 table data file.
4544 if (max_cost != DBL_MAX && (busy_blocks+index_reads_cost) >= n_blocks)
4545 return 1;
4546 */
4547 JOIN *join= param->thd->lex->select_lex.join;
4548 if (!join || join->table_count == 1)
4549 {
4550 /* No join, assume reading is done in one 'sweep' */
4551 result= busy_blocks*(DISK_SEEK_BASE_COST +
4552 DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
4553 }
4554 else
4555 {
4556 /*
4557 Possibly this is a join with source table being non-last table, so
4558 assume that disk seeks are random here.
4559 */
4560 result= busy_blocks;
4561 }
4562 }
4563 DBUG_PRINT("return",("cost: %g", result));
4564 DBUG_RETURN(result);
4565}
4566
4567
4568/*
4569 Get best plan for a SEL_IMERGE disjunctive expression.
4570 SYNOPSIS
4571 get_best_disjunct_quick()
4572 param Parameter from check_quick_select function
4573 imerge Expression to use
4574 read_time Don't create scans with cost > read_time
4575
4576 NOTES
4577 index_merge cost is calculated as follows:
4578 index_merge_cost =
4579 cost(index_reads) + (see #1)
4580 cost(rowid_to_row_scan) + (see #2)
4581 cost(unique_use) (see #3)
4582
4583 1. cost(index_reads) =SUM_i(cost(index_read_i))
4584 For non-CPK scans,
4585 cost(index_read_i) = {cost of ordinary 'index only' scan}
4586 For CPK scan,
4587 cost(index_read_i) = {cost of non-'index only' scan}
4588
4589 2. cost(rowid_to_row_scan)
4590 If table PK is clustered then
4591 cost(rowid_to_row_scan) =
4592 {cost of ordinary clustered PK scan with n_ranges=n_rows}
4593
4594 Otherwise, we use the following model to calculate costs:
4595 We need to retrieve n_rows rows from file that occupies n_blocks blocks.
4596 We assume that offsets of rows we need are independent variates with
4597 uniform distribution in [0..max_file_offset] range.
4598
4599 We'll denote block as "busy" if it contains row(s) we need to retrieve
4600 and "empty" if doesn't contain rows we need.
4601
4602 Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
4603 applies to any block in file). Let x_i be a variate taking value 1 if
4604 block #i is empty and 0 otherwise.
4605
4606 Then E(x_i) = (1 - 1/n_blocks)^n_rows;
4607
4608 E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
4609 = n_blocks * ((1 - 1/n_blocks)^n_rows) =
4610 ~= n_blocks * exp(-n_rows/n_blocks).
4611
4612 E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
4613 ~= n_blocks * (1 - exp(-n_rows/n_blocks)).
4614
4615 Average size of "hole" between neighbor non-empty blocks is
4616 E(hole_size) = n_blocks/E(n_busy_blocks).
4617
4618 The total cost of reading all needed blocks in one "sweep" is:
4619
4620 E(n_busy_blocks)*
4621 (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).
4622
4623 3. Cost of Unique use is calculated in Unique::get_use_cost function.
4624
4625 ROR-union cost is calculated in the same way index_merge, but instead of
4626 Unique a priority queue is used.
4627
4628 RETURN
4629 Created read plan
4630 NULL - Out of memory or no read scan could be built.
4631*/
4632
4633static
4634TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
4635 double read_time)
4636{
4637 SEL_TREE **ptree;
4638 TRP_INDEX_MERGE *imerge_trp= NULL;
4639 TRP_RANGE **range_scans;
4640 TRP_RANGE **cur_child;
4641 TRP_RANGE **cpk_scan= NULL;
4642 bool imerge_too_expensive= FALSE;
4643 double imerge_cost= 0.0;
4644 ha_rows cpk_scan_records= 0;
4645 ha_rows non_cpk_scan_records= 0;
4646 bool pk_is_clustered= param->table->file->primary_key_is_clustered();
4647 bool all_scans_ror_able= TRUE;
4648 bool all_scans_rors= TRUE;
4649 uint unique_calc_buff_size;
4650 TABLE_READ_PLAN **roru_read_plans;
4651 TABLE_READ_PLAN **cur_roru_plan;
4652 double roru_index_costs;
4653 ha_rows roru_total_records;
4654 double roru_intersect_part= 1.0;
4655 size_t n_child_scans;
4656 DBUG_ENTER("get_best_disjunct_quick");
4657 DBUG_PRINT("info", ("Full table scan cost: %g", read_time));
4658
4659 /*
4660 In every tree of imerge remove SEL_ARG trees that do not make ranges.
4661 If after this removal some SEL_ARG tree becomes empty discard imerge.
4662 */
4663 for (ptree= imerge->trees; ptree != imerge->trees_next; ptree++)
4664 {
4665 if (remove_nonrange_trees(param, *ptree))
4666 {
4667 imerge->trees_next= imerge->trees;
4668 break;
4669 }
4670 }
4671
4672 n_child_scans= imerge->trees_next - imerge->trees;
4673
4674 if (!n_child_scans)
4675 DBUG_RETURN(NULL);
4676
4677 if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
4678 sizeof(TRP_RANGE*)*
4679 n_child_scans)))
4680 DBUG_RETURN(NULL);
4681 /*
4682 Collect best 'range' scan for each of disjuncts, and, while doing so,
4683 analyze possibility of ROR scans. Also calculate some values needed by
4684 other parts of the code.
4685 */
4686 for (ptree= imerge->trees, cur_child= range_scans;
4687 ptree != imerge->trees_next;
4688 ptree++, cur_child++)
4689 {
4690 DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
4691 "tree in SEL_IMERGE"););
4692 if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE, read_time)))
4693 {
4694 /*
4695 One of index scans in this index_merge is more expensive than entire
4696 table read for another available option. The entire index_merge (and
4697 any possible ROR-union) will be more expensive then, too. We continue
4698 here only to update SQL_SELECT members.
4699 */
4700 imerge_too_expensive= TRUE;
4701 }
4702 if (imerge_too_expensive)
4703 continue;
4704
4705 imerge_cost += (*cur_child)->read_cost;
4706 all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
4707 all_scans_rors &= (*cur_child)->is_ror;
4708 if (pk_is_clustered &&
4709 param->real_keynr[(*cur_child)->key_idx] ==
4710 param->table->s->primary_key)
4711 {
4712 cpk_scan= cur_child;
4713 cpk_scan_records= (*cur_child)->records;
4714 }
4715 else
4716 non_cpk_scan_records += (*cur_child)->records;
4717 }
4718
4719 DBUG_PRINT("info", ("index_merge scans cost %g", imerge_cost));
4720 if (imerge_too_expensive || (imerge_cost > read_time) ||
4721 ((non_cpk_scan_records+cpk_scan_records >=
4722 param->table->stat_records()) &&
4723 read_time != DBL_MAX))
4724 {
4725 /*
4726 Bail out if it is obvious that both index_merge and ROR-union will be
4727 more expensive
4728 */
4729 DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
4730 "full table scan, bailing out"));
4731 DBUG_RETURN(NULL);
4732 }
4733
4734 /*
4735 If all scans happen to be ROR, proceed to generate a ROR-union plan (it's
4736 guaranteed to be cheaper than non-ROR union), unless ROR-unions are
4737 disabled in @@optimizer_switch
4738 */
4739 if (all_scans_rors &&
4740 optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_UNION))
4741 {
4742 roru_read_plans= (TABLE_READ_PLAN**)range_scans;
4743 goto skip_to_ror_scan;
4744 }
4745
4746 if (cpk_scan)
4747 {
4748 /*
4749 Add one ROWID comparison for each row retrieved on non-CPK scan. (it
4750 is done in QUICK_RANGE_SELECT::row_in_ranges)
4751 */
4752 imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID;
4753 }
4754
4755 /* Calculate cost(rowid_to_row_scan) */
4756 imerge_cost += get_sweep_read_cost(param, non_cpk_scan_records);
4757 DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
4758 imerge_cost));
4759 if (imerge_cost > read_time ||
4760 !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION))
4761 {
4762 goto build_ror_index_merge;
4763 }
4764
4765 /* Add Unique operations cost */
4766 unique_calc_buff_size=
4767 Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records,
4768 param->table->file->ref_length,
4769 (size_t)param->thd->variables.sortbuff_size);
4770 if (param->imerge_cost_buff_size < unique_calc_buff_size)
4771 {
4772 if (!(param->imerge_cost_buff= (uint*)alloc_root(param->