1 | /* Copyright (c) 2000, 2011, Oracle and/or its affiliates. |
2 | Copyright (c) 2008, 2017, MariaDB Corporation. |
3 | |
4 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published by |
6 | the Free Software Foundation; version 2 of the License. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
16 | |
17 | |
18 | /** |
19 | @file |
20 | |
21 | Optimising of MIN(), MAX() and COUNT(*) queries without 'group by' clause |
22 | by replacing the aggregate expression with a constant. |
23 | |
24 | Given a table with a compound key on columns (a,b,c), the following |
25 | types of queries are optimised (assuming the table handler supports |
26 | the required methods) |
27 | |
28 | @verbatim |
29 | SELECT COUNT(*) FROM t1[,t2,t3,...] |
30 | SELECT MIN(b) FROM t1 WHERE a=const |
31 | SELECT MAX(c) FROM t1 WHERE a=const AND b=const |
32 | SELECT MAX(b) FROM t1 WHERE a=const AND b<const |
33 | SELECT MIN(b) FROM t1 WHERE a=const AND b>const |
34 | SELECT MIN(b) FROM t1 WHERE a=const AND b BETWEEN const AND const |
35 | SELECT MAX(b) FROM t1 WHERE a=const AND b BETWEEN const AND const |
36 | @endverbatim |
37 | |
38 | Instead of '<' one can use '<=', '>', '>=' and '=' as well. |
39 | Instead of 'a=const' the condition 'a IS NULL' can be used. |
40 | |
41 | If all selected fields are replaced then we will also remove all |
42 | involved tables and return the answer without any join. Thus, the |
43 | following query will be replaced with a row of two constants: |
44 | @verbatim |
45 | SELECT MAX(b), MIN(d) FROM t1,t2 |
46 | WHERE a=const AND b<const AND d>const |
47 | @endverbatim |
48 | (assuming a index for column d of table t2 is defined) |
49 | */ |
50 | |
51 | #include "mariadb.h" |
52 | #include "sql_priv.h" |
53 | #include "key.h" // key_cmp_if_same |
54 | #include "sql_select.h" |
55 | |
56 | static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref, Field* field, |
57 | COND *cond, uint *range_fl, |
58 | uint *key_prefix_length); |
59 | static int reckey_in_range(bool max_fl, TABLE_REF *ref, Field* field, |
60 | COND *cond, uint range_fl, uint prefix_len); |
61 | static int maxmin_in_range(bool max_fl, Field* field, COND *cond); |
62 | |
63 | |
64 | /* |
65 | Get exact count of rows in all tables |
66 | |
67 | SYNOPSIS |
68 | get_exact_records() |
69 | tables List of tables |
70 | |
71 | NOTES |
72 | When this is called, we know all table handlers supports HA_HAS_RECORDS |
73 | or HA_STATS_RECORDS_IS_EXACT |
74 | |
75 | RETURN |
76 | ULONGLONG_MAX Error: Could not calculate number of rows |
77 | # Multiplication of number of rows in all tables |
78 | */ |
79 | |
80 | static ulonglong get_exact_record_count(List<TABLE_LIST> &tables) |
81 | { |
82 | ulonglong count= 1; |
83 | TABLE_LIST *tl; |
84 | List_iterator<TABLE_LIST> ti(tables); |
85 | while ((tl= ti++)) |
86 | { |
87 | ha_rows tmp= tl->table->file->records(); |
88 | if (tmp == HA_POS_ERROR) |
89 | return ULONGLONG_MAX; |
90 | count*= tmp; |
91 | } |
92 | return count; |
93 | } |
94 | |
95 | |
96 | /** |
97 | Use index to read MIN(field) value. |
98 | |
99 | @param table Table object |
100 | @param ref Reference to the structure where we store the key value |
101 | @item_field Field used in MIN() |
102 | @range_fl Whether range endpoint is strict less than |
103 | @prefix_len Length of common key part for the range |
104 | |
105 | @retval |
106 | 0 No errors |
107 | HA_ERR_... Otherwise |
108 | */ |
109 | |
110 | static int get_index_min_value(TABLE *table, TABLE_REF *ref, |
111 | Item_field *item_field, uint range_fl, |
112 | uint prefix_len) |
113 | { |
114 | int error; |
115 | |
116 | if (!ref->key_length) |
117 | error= table->file->ha_index_first(table->record[0]); |
118 | else |
119 | { |
120 | /* |
121 | Use index to replace MIN/MAX functions with their values |
122 | according to the following rules: |
123 | |
124 | 1) Insert the minimum non-null values where the WHERE clause still |
125 | matches, or |
126 | 2) a NULL value if there are only NULL values for key_part_k. |
127 | 3) Fail, producing a row of nulls |
128 | |
129 | Implementation: Read the smallest value using the search key. If |
130 | the interval is open, read the next value after the search |
131 | key. If read fails, and we're looking for a MIN() value for a |
132 | nullable column, test if there is an exact match for the key. |
133 | */ |
134 | if (!(range_fl & NEAR_MIN)) |
135 | /* |
136 | Closed interval: Either The MIN argument is non-nullable, or |
137 | we have a >= predicate for the MIN argument. |
138 | */ |
139 | error= table->file->ha_index_read_map(table->record[0], |
140 | ref->key_buff, |
141 | make_prev_keypart_map(ref->key_parts), |
142 | HA_READ_KEY_OR_NEXT); |
143 | else |
144 | { |
145 | /* |
146 | Open interval: There are two cases: |
147 | 1) We have only MIN() and the argument column is nullable, or |
148 | 2) there is a > predicate on it, nullability is irrelevant. |
149 | We need to scan the next bigger record first. |
150 | Open interval is not used if the search key involves the last keypart, |
151 | and it would not work. |
152 | */ |
153 | DBUG_ASSERT(prefix_len < ref->key_length); |
154 | error= table->file->ha_index_read_map(table->record[0], |
155 | ref->key_buff, |
156 | make_prev_keypart_map(ref->key_parts), |
157 | HA_READ_AFTER_KEY); |
158 | /* |
159 | If the found record is outside the group formed by the search |
160 | prefix, or there is no such record at all, check if all |
161 | records in that group have NULL in the MIN argument |
162 | column. If that is the case return that NULL. |
163 | |
164 | Check if case 1 from above holds. If it does, we should read |
165 | the skipped tuple. |
166 | */ |
167 | if (item_field->field->real_maybe_null() && |
168 | ref->key_buff[prefix_len] == 1 && |
169 | /* |
170 | Last keypart (i.e. the argument to MIN) is set to NULL by |
171 | find_key_for_maxmin only if all other keyparts are bound |
172 | to constants in a conjunction of equalities. Hence, we |
173 | can detect this by checking only if the last keypart is |
174 | NULL. |
175 | */ |
176 | (error == HA_ERR_KEY_NOT_FOUND || |
177 | key_cmp_if_same(table, ref->key_buff, ref->key, prefix_len))) |
178 | { |
179 | DBUG_ASSERT(item_field->field->real_maybe_null()); |
180 | error= table->file->ha_index_read_map(table->record[0], |
181 | ref->key_buff, |
182 | make_prev_keypart_map(ref->key_parts), |
183 | HA_READ_KEY_EXACT); |
184 | } |
185 | } |
186 | } |
187 | return error; |
188 | } |
189 | |
190 | |
191 | /** |
192 | Use index to read MAX(field) value. |
193 | |
194 | @param table Table object |
195 | @param ref Reference to the structure where we store the key value |
196 | @range_fl Whether range endpoint is strict greater than |
197 | |
198 | @retval |
199 | 0 No errors |
200 | HA_ERR_... Otherwise |
201 | */ |
202 | |
203 | static int get_index_max_value(TABLE *table, TABLE_REF *ref, uint range_fl) |
204 | { |
205 | return (ref->key_length ? |
206 | table->file->ha_index_read_map(table->record[0], ref->key_buff, |
207 | make_prev_keypart_map(ref->key_parts), |
208 | range_fl & NEAR_MAX ? |
209 | HA_READ_BEFORE_KEY : |
210 | HA_READ_PREFIX_LAST_OR_PREV) : |
211 | table->file->ha_index_last(table->record[0])); |
212 | } |
213 | |
214 | |
215 | |
216 | /** |
217 | Substitutes constants for some COUNT(), MIN() and MAX() functions. |
218 | |
219 | @param thd thread handler |
220 | @param tables list of leaves of join table tree |
221 | @param all_fields All fields to be returned |
222 | @param conds WHERE clause |
223 | |
224 | @note |
225 | This function is only called for queries with aggregate functions and no |
226 | GROUP BY part. This means that the result set shall contain a single |
227 | row only |
228 | |
229 | @retval |
230 | 0 no errors |
231 | @retval |
232 | 1 if all items were resolved |
233 | @retval |
234 | HA_ERR_KEY_NOT_FOUND on impossible conditions |
235 | @retval |
236 | HA_ERR_... if a deadlock or a lock wait timeout happens, for example |
237 | @retval |
238 | ER_... e.g. ER_SUBQUERY_NO_1_ROW |
239 | */ |
240 | |
241 | int opt_sum_query(THD *thd, |
242 | List<TABLE_LIST> &tables, List<Item> &all_fields, COND *conds) |
243 | { |
244 | List_iterator_fast<Item> it(all_fields); |
245 | List_iterator<TABLE_LIST> ti(tables); |
246 | TABLE_LIST *tl; |
247 | int const_result= 1; |
248 | bool recalc_const_item= 0; |
249 | ulonglong count= 1; |
250 | bool is_exact_count= TRUE, maybe_exact_count= TRUE; |
251 | table_map removed_tables= 0, outer_tables= 0, used_tables= 0; |
252 | table_map where_tables= 0; |
253 | Item *item; |
254 | int error= 0; |
255 | DBUG_ENTER("opt_sum_query" ); |
256 | |
257 | thd->lex->current_select->min_max_opt_list.empty(); |
258 | |
259 | if (conds) |
260 | where_tables= conds->used_tables(); |
261 | |
262 | /* |
263 | Analyze outer join dependencies, and, if possible, compute the number |
264 | of returned rows. |
265 | */ |
266 | while ((tl= ti++)) |
267 | { |
268 | TABLE_LIST *embedded; |
269 | for (embedded= tl ; embedded; embedded= embedded->embedding) |
270 | { |
271 | if (embedded->on_expr) |
272 | break; |
273 | } |
274 | if (embedded) |
275 | /* Don't replace expression on a table that is part of an outer join */ |
276 | { |
277 | outer_tables|= tl->table->map; |
278 | |
279 | /* |
280 | We can't optimise LEFT JOIN in cases where the WHERE condition |
281 | restricts the table that is used, like in: |
282 | SELECT MAX(t1.a) FROM t1 LEFT JOIN t2 join-condition |
283 | WHERE t2.field IS NULL; |
284 | */ |
285 | if (tl->table->map & where_tables) |
286 | DBUG_RETURN(0); |
287 | } |
288 | else |
289 | used_tables|= tl->table->map; |
290 | |
291 | /* |
292 | If the storage manager of 'tl' gives exact row count as part of |
293 | statistics (cheap), compute the total number of rows. If there are |
294 | no outer table dependencies, this count may be used as the real count. |
295 | Schema tables are filled after this function is invoked, so we can't |
296 | get row count |
297 | */ |
298 | if (!(tl->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) || |
299 | tl->schema_table) |
300 | { |
301 | maybe_exact_count&= MY_TEST(!tl->schema_table && |
302 | (tl->table->file->ha_table_flags() & |
303 | HA_HAS_RECORDS)); |
304 | is_exact_count= FALSE; |
305 | count= 1; // ensure count != 0 |
306 | } |
307 | else if (tl->is_materialized_derived() || |
308 | tl->jtbm_subselect) |
309 | { |
310 | /* |
311 | Can't remove a derived table as it's number of rows is just an |
312 | estimate. |
313 | */ |
314 | DBUG_RETURN(0); |
315 | } |
316 | else |
317 | { |
318 | error= tl->table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); |
319 | if (unlikely(error)) |
320 | { |
321 | tl->table->file->print_error(error, MYF(ME_FATALERROR)); |
322 | DBUG_RETURN(error); |
323 | } |
324 | count*= tl->table->file->stats.records; |
325 | } |
326 | } |
327 | |
328 | /* |
329 | Iterate through all items in the SELECT clause and replace |
330 | COUNT(), MIN() and MAX() with constants (if possible). |
331 | */ |
332 | |
333 | while ((item= it++)) |
334 | { |
335 | if (item->type() == Item::SUM_FUNC_ITEM) |
336 | { |
337 | Item_sum *item_sum= (((Item_sum*) item)); |
338 | switch (item_sum->sum_func()) { |
339 | case Item_sum::COUNT_FUNC: |
340 | /* |
341 | If the expr in COUNT(expr) can never be null we can change this |
342 | to the number of rows in the tables if this number is exact and |
343 | there are no outer joins. |
344 | */ |
345 | if (!conds && !((Item_sum_count*) item)->get_arg(0)->maybe_null && |
346 | !outer_tables && maybe_exact_count && |
347 | ((item->used_tables() & OUTER_REF_TABLE_BIT) == 0)) |
348 | { |
349 | if (!is_exact_count) |
350 | { |
351 | if ((count= get_exact_record_count(tables)) == ULONGLONG_MAX) |
352 | { |
353 | /* Error from handler in counting rows. Don't optimize count() */ |
354 | const_result= 0; |
355 | continue; |
356 | } |
357 | is_exact_count= 1; // count is now exact |
358 | } |
359 | ((Item_sum_count*) item)->make_const((longlong) count); |
360 | recalc_const_item= 1; |
361 | } |
362 | else |
363 | const_result= 0; |
364 | break; |
365 | case Item_sum::MIN_FUNC: |
366 | case Item_sum::MAX_FUNC: |
367 | { |
368 | int is_max= MY_TEST(item_sum->sum_func() == Item_sum::MAX_FUNC); |
369 | /* |
370 | If MIN/MAX(expr) is the first part of a key or if all previous |
371 | parts of the key is found in the COND, then we can use |
372 | indexes to find the key. |
373 | */ |
374 | Item *expr=item_sum->get_arg(0); |
375 | if (((expr->used_tables() & OUTER_REF_TABLE_BIT) == 0) && |
376 | expr->real_item()->type() == Item::FIELD_ITEM) |
377 | { |
378 | uchar key_buff[MAX_KEY_LENGTH]; |
379 | TABLE_REF ref; |
380 | uint range_fl, prefix_len; |
381 | |
382 | ref.key_buff= key_buff; |
383 | Item_field *item_field= (Item_field*) (expr->real_item()); |
384 | TABLE *table= item_field->field->table; |
385 | |
386 | /* |
387 | Look for a partial key that can be used for optimization. |
388 | If we succeed, ref.key_length will contain the length of |
389 | this key, while prefix_len will contain the length of |
390 | the beginning of this key without field used in MIN/MAX(). |
391 | Type of range for the key part for this field will be |
392 | returned in range_fl. |
393 | */ |
394 | if (table->file->inited || (outer_tables & table->map) || |
395 | !find_key_for_maxmin(is_max, &ref, item_field->field, conds, |
396 | &range_fl, &prefix_len)) |
397 | { |
398 | const_result= 0; |
399 | break; |
400 | } |
401 | longlong info_limit= 1; |
402 | table->file->info_push(INFO_KIND_FORCE_LIMIT_BEGIN, &info_limit); |
403 | if (likely(!(error= table->file->ha_index_init((uint) ref.key, 1)))) |
404 | error= (is_max ? |
405 | get_index_max_value(table, &ref, range_fl) : |
406 | get_index_min_value(table, &ref, item_field, range_fl, |
407 | prefix_len)); |
408 | |
409 | /* Verify that the read tuple indeed matches the search key */ |
410 | if (!error && |
411 | reckey_in_range(is_max, &ref, item_field->field, |
412 | conds, range_fl, prefix_len)) |
413 | error= HA_ERR_KEY_NOT_FOUND; |
414 | table->file->ha_end_keyread(); |
415 | table->file->ha_index_end(); |
416 | table->file->info_push(INFO_KIND_FORCE_LIMIT_END, NULL); |
417 | if (error) |
418 | { |
419 | if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE) |
420 | DBUG_RETURN(HA_ERR_KEY_NOT_FOUND); // No rows matching WHERE |
421 | /* HA_ERR_LOCK_DEADLOCK or some other error */ |
422 | table->file->print_error(error, MYF(0)); |
423 | DBUG_RETURN(error); |
424 | } |
425 | removed_tables|= table->map; |
426 | } |
427 | else if (!expr->const_item() || !is_exact_count || conds) |
428 | { |
429 | /* |
430 | The optimization is not applicable in both cases: |
431 | (a) 'expr' is a non-constant expression. Then we can't |
432 | replace 'expr' by a constant. |
433 | (b) 'expr' is a costant. According to ANSI, MIN/MAX must return |
434 | NULL if the query does not return any rows. Thus, if we are not |
435 | able to determine if the query returns any rows, we can't apply |
436 | the optimization and replace MIN/MAX with a constant. |
437 | (c) there is a WHERE clause. The WHERE conditions may result in |
438 | an empty result, but the clause cannot be taken into account here. |
439 | */ |
440 | const_result= 0; |
441 | break; |
442 | } |
443 | item_sum->set_aggregator(item_sum->has_with_distinct() ? |
444 | Aggregator::DISTINCT_AGGREGATOR : |
445 | Aggregator::SIMPLE_AGGREGATOR); |
446 | /* |
447 | If count == 0 (so is_exact_count == TRUE) and |
448 | there're no outer joins, set to NULL, |
449 | otherwise set to the constant value. |
450 | */ |
451 | if (!count && !outer_tables) |
452 | { |
453 | item_sum->aggregator_clear(); |
454 | } |
455 | else |
456 | { |
457 | item_sum->reset_and_add(); |
458 | /* |
459 | Save a reference to the item for possible rollback |
460 | of the min/max optimizations for this select |
461 | */ |
462 | thd->lex->current_select->min_max_opt_list.push_back(item_sum); |
463 | } |
464 | item_sum->make_const(); |
465 | recalc_const_item= 1; |
466 | break; |
467 | } |
468 | default: |
469 | const_result= 0; |
470 | break; |
471 | } |
472 | } |
473 | else if (const_result) |
474 | { |
475 | if (recalc_const_item) |
476 | item->update_used_tables(); |
477 | if (!item->const_item() && item->type() != Item::WINDOW_FUNC_ITEM) |
478 | const_result= 0; |
479 | } |
480 | } |
481 | |
482 | if (unlikely(thd->is_error())) |
483 | DBUG_RETURN(thd->get_stmt_da()->sql_errno()); |
484 | |
485 | /* |
486 | If we have a where clause, we can only ignore searching in the |
487 | tables if MIN/MAX optimisation replaced all used tables |
488 | We do not use replaced values in case of: |
489 | SELECT MIN(key) FROM table_1, empty_table |
490 | removed_tables is != 0 if we have used MIN() or MAX(). |
491 | */ |
492 | if (removed_tables && used_tables != removed_tables) |
493 | const_result= 0; // We didn't remove all tables |
494 | DBUG_RETURN(const_result); |
495 | } |
496 | |
497 | |
498 | /* |
499 | Check if both item1 and item2 are strings, and item1 has fewer characters |
500 | than item2. |
501 | */ |
502 | |
503 | static bool check_item1_shorter_item2(Item *item1, Item *item2) |
504 | { |
505 | if (item1->cmp_type() == STRING_RESULT && |
506 | item2->cmp_type() == STRING_RESULT) |
507 | { |
508 | int len1= item1->max_length / item1->collation.collation->mbmaxlen; |
509 | int len2= item2->max_length / item2->collation.collation->mbmaxlen; |
510 | return len1 < len2; |
511 | } |
512 | return false; /* When the check is not applicable, it means "not bigger" */ |
513 | } |
514 | |
515 | |
516 | /** |
517 | Test if the predicate compares a field with constants. |
518 | |
519 | @param func_item Predicate item |
520 | @param[out] args Here we store the field followed by constants |
521 | @param[out] inv_order Is set to 1 if the predicate is of the form |
522 | 'const op field' |
523 | |
524 | @retval |
525 | 0 func_item is a simple predicate: a field is compared with a constant |
526 | whose length does not exceed the max length of the field values |
527 | @retval |
528 | 1 Otherwise |
529 | */ |
530 | |
531 | bool simple_pred(Item_func *func_item, Item **args, bool *inv_order) |
532 | { |
533 | Item *item; |
534 | *inv_order= 0; |
535 | switch (func_item->argument_count()) { |
536 | case 0: |
537 | /* MULT_EQUAL_FUNC */ |
538 | { |
539 | Item_equal *item_equal= (Item_equal *) func_item; |
540 | if (!(args[1]= item_equal->get_const())) |
541 | return 0; |
542 | Item_equal_fields_iterator it(*item_equal); |
543 | if (!(item= it++)) |
544 | return 0; |
545 | args[0]= item->real_item(); |
546 | if (check_item1_shorter_item2(args[0], args[1])) |
547 | return 0; |
548 | if (it++) |
549 | return 0; |
550 | } |
551 | break; |
552 | case 1: |
553 | /* field IS NULL */ |
554 | item= func_item->arguments()[0]->real_item(); |
555 | if (item->type() != Item::FIELD_ITEM) |
556 | return 0; |
557 | args[0]= item; |
558 | break; |
559 | case 2: |
560 | /* 'field op const' or 'const op field' */ |
561 | item= func_item->arguments()[0]->real_item(); |
562 | if (item->type() == Item::FIELD_ITEM) |
563 | { |
564 | args[0]= item; |
565 | item= func_item->arguments()[1]->real_item(); |
566 | if (!item->const_item()) |
567 | return 0; |
568 | args[1]= item; |
569 | } |
570 | else if (item->const_item()) |
571 | { |
572 | args[1]= item; |
573 | item= func_item->arguments()[1]->real_item(); |
574 | if (item->type() != Item::FIELD_ITEM) |
575 | return 0; |
576 | args[0]= item; |
577 | *inv_order= 1; |
578 | } |
579 | else |
580 | return 0; |
581 | if (check_item1_shorter_item2(args[0], args[1])) |
582 | return 0; |
583 | break; |
584 | case 3: |
585 | /* field BETWEEN const AND const */ |
586 | item= func_item->arguments()[0]->real_item(); |
587 | if (item->type() == Item::FIELD_ITEM) |
588 | { |
589 | args[0]= item; |
590 | for (int i= 1 ; i <= 2; i++) |
591 | { |
592 | item= func_item->arguments()[i]->real_item(); |
593 | if (!item->const_item()) |
594 | return 0; |
595 | args[i]= item; |
596 | if (check_item1_shorter_item2(args[0], args[i])) |
597 | return 0; |
598 | } |
599 | } |
600 | else |
601 | return 0; |
602 | } |
603 | return 1; |
604 | } |
605 | |
606 | |
607 | /** |
608 | Check whether a condition matches a key to get {MAX|MIN}(field):. |
609 | |
610 | For the index specified by the keyinfo parameter and an index that |
611 | contains the field as its component (field_part), the function |
612 | checks whether |
613 | |
614 | - the condition cond is a conjunction, |
615 | - all of its conjuncts refer to columns of the same table, and |
616 | - each conjunct is on one of the following forms: |
617 | - f_i = const_i or const_i = f_i or f_i IS NULL, |
618 | where f_i is part of the index |
619 | - field {<|<=|>=|>|=} const |
620 | - const {<|<=|>=|>|=} field |
621 | - field BETWEEN const_1 AND const_2 |
622 | |
623 | As a side-effect, the key value to be used for looking up the MIN/MAX value |
624 | is actually stored inside the Field object. An interesting feature is that |
625 | the function will find the most restrictive endpoint by over-eager |
626 | evaluation of the @c WHERE condition. It continually stores the current |
627 | endpoint inside the Field object. For a query such as |
628 | |
629 | @code |
630 | SELECT MIN(a) FROM t1 WHERE a > 3 AND a > 5; |
631 | @endcode |
632 | |
633 | the algorithm will recurse over the conjuction, storing first a 3 in the |
634 | field. In the next recursive invocation the expression a > 5 is evaluated |
635 | as 3 > 5 (Due to the dual nature of Field objects as value carriers and |
636 | field identifiers), which will obviously fail, leading to 5 being stored in |
637 | the Field object. |
638 | |
639 | @param[in] max_fl Set to true if we are optimizing MAX(), |
640 | false means we are optimizing %MIN() |
641 | @param[in, out] ref Reference to the structure where the function |
642 | stores the key value |
643 | @param[in] keyinfo Reference to the key info |
644 | @param[in] field_part Pointer to the key part for the field |
645 | @param[in] cond WHERE condition |
646 | @param[in,out] key_part_used Map of matchings parts. The function will output |
647 | the set of key parts actually being matched in |
648 | this set, yet it relies on the caller to |
649 | initialize the value to zero. This is due |
650 | to the fact that this value is passed |
651 | recursively. |
652 | @param[in,out] range_fl Says whether endpoints use strict greater/less |
653 | than. |
654 | @param[out] prefix_len Length of common key part for the range |
655 | where MAX/MIN is searched for |
656 | |
657 | @retval |
658 | false Index can't be used. |
659 | @retval |
660 | true We can use the index to get MIN/MAX value |
661 | */ |
662 | |
663 | static bool matching_cond(bool max_fl, TABLE_REF *ref, KEY *keyinfo, |
664 | KEY_PART_INFO *field_part, COND *cond, |
665 | key_part_map *key_part_used, uint *range_fl, |
666 | uint *prefix_len) |
667 | { |
668 | DBUG_ENTER("matching_cond" ); |
669 | if (!cond) |
670 | DBUG_RETURN(TRUE); |
671 | Field *field= field_part->field; |
672 | table_map cond_used_tables= cond->used_tables(); |
673 | if (cond_used_tables & OUTER_REF_TABLE_BIT) |
674 | { |
675 | DBUG_RETURN(FALSE); |
676 | } |
677 | if (!(cond_used_tables & field->table->map) && |
678 | MY_TEST(cond_used_tables & ~PSEUDO_TABLE_BITS)) |
679 | { |
680 | /* Condition doesn't restrict the used table */ |
681 | DBUG_RETURN(!cond->const_item()); |
682 | } |
683 | else if (cond->is_expensive()) |
684 | DBUG_RETURN(FALSE); |
685 | if (cond->type() == Item::COND_ITEM) |
686 | { |
687 | if (((Item_cond*) cond)->functype() == Item_func::COND_OR_FUNC) |
688 | DBUG_RETURN(FALSE); |
689 | |
690 | /* AND */ |
691 | List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list()); |
692 | Item *item; |
693 | while ((item= li++)) |
694 | { |
695 | if (!matching_cond(max_fl, ref, keyinfo, field_part, item, |
696 | key_part_used, range_fl, prefix_len)) |
697 | DBUG_RETURN(FALSE); |
698 | } |
699 | DBUG_RETURN(TRUE); |
700 | } |
701 | |
702 | if (cond->type() != Item::FUNC_ITEM) |
703 | DBUG_RETURN(FALSE); // Not operator, can't optimize |
704 | |
705 | bool eq_type= 0; // =, <=> or IS NULL |
706 | bool is_null_safe_eq= FALSE; // The operator is NULL safe, e.g. <=> |
707 | bool noeq_type= 0; // < or > |
708 | bool less_fl= 0; // < or <= |
709 | bool is_null= 0; // IS NULL |
710 | bool between= 0; // BETWEEN ... AND ... |
711 | |
712 | switch (((Item_func*) cond)->functype()) { |
713 | case Item_func::ISNULL_FUNC: |
714 | is_null= 1; /* fall through */ |
715 | case Item_func::EQ_FUNC: |
716 | eq_type= TRUE; |
717 | break; |
718 | case Item_func::EQUAL_FUNC: |
719 | eq_type= is_null_safe_eq= TRUE; |
720 | break; |
721 | case Item_func::LT_FUNC: |
722 | noeq_type= 1; /* fall through */ |
723 | case Item_func::LE_FUNC: |
724 | less_fl= 1; |
725 | break; |
726 | case Item_func::GT_FUNC: |
727 | noeq_type= 1; /* fall through */ |
728 | case Item_func::GE_FUNC: |
729 | break; |
730 | case Item_func::BETWEEN: |
731 | if (((Item_func_between*) cond)->negated) |
732 | DBUG_RETURN(FALSE); |
733 | between= 1; |
734 | break; |
735 | case Item_func::MULT_EQUAL_FUNC: |
736 | eq_type= 1; |
737 | break; |
738 | default: |
739 | DBUG_RETURN(FALSE); // Can't optimize function |
740 | } |
741 | |
742 | Item *args[3]; |
743 | bool inv; |
744 | |
745 | /* Test if this is a comparison of a field and constant */ |
746 | if (!simple_pred((Item_func*) cond, args, &inv)) |
747 | DBUG_RETURN(FALSE); |
748 | |
749 | if (!is_null_safe_eq && !is_null && |
750 | (args[1]->is_null() || (between && args[2]->is_null()))) |
751 | DBUG_RETURN(FALSE); |
752 | |
753 | if (inv && !eq_type) |
754 | less_fl= 1-less_fl; // Convert '<' -> '>' (etc) |
755 | |
756 | /* Check if field is part of the tested partial key */ |
757 | uchar *key_ptr= ref->key_buff; |
758 | KEY_PART_INFO *part; |
759 | for (part= keyinfo->key_part; ; key_ptr+= part++->store_length) |
760 | |
761 | { |
762 | if (part > field_part) |
763 | DBUG_RETURN(FALSE); // Field is beyond the tested parts |
764 | if (part->field->eq(((Item_field*) args[0])->field)) |
765 | break; // Found a part of the key for the field |
766 | } |
767 | |
768 | bool is_field_part= part == field_part; |
769 | if (!(is_field_part || eq_type)) |
770 | DBUG_RETURN(FALSE); |
771 | |
772 | key_part_map org_key_part_used= *key_part_used; |
773 | if (eq_type || between || max_fl == less_fl) |
774 | { |
775 | uint length= (uint)(key_ptr-ref->key_buff)+part->store_length; |
776 | if (ref->key_length < length) |
777 | { |
778 | /* Ultimately ref->key_length will contain the length of the search key */ |
779 | ref->key_length= length; |
780 | ref->key_parts= (uint)(part - keyinfo->key_part) + 1; |
781 | } |
782 | if (!*prefix_len && part+1 == field_part) |
783 | *prefix_len= length; |
784 | if (is_field_part && eq_type) |
785 | *prefix_len= ref->key_length; |
786 | |
787 | *key_part_used|= (key_part_map) 1 << (part - keyinfo->key_part); |
788 | } |
789 | |
790 | if (org_key_part_used == *key_part_used && |
791 | /* |
792 | The current search key is not being extended with a new key part. This |
793 | means that the a condition is added a key part for which there was a |
794 | previous condition. We can only overwrite such key parts in some special |
795 | cases, e.g. a > 2 AND a > 1 (here range_fl must be set to something). In |
796 | all other cases the WHERE condition is always false anyway. |
797 | */ |
798 | (eq_type || *range_fl == 0)) |
799 | DBUG_RETURN(FALSE); |
800 | |
801 | if (org_key_part_used != *key_part_used || |
802 | (is_field_part && |
803 | (between || eq_type || max_fl == less_fl) && !cond->val_int())) |
804 | { |
805 | /* |
806 | It's the first predicate for this part or a predicate of the |
807 | following form that moves upper/lower bounds for max/min values: |
808 | - field BETWEEN const AND const |
809 | - field = const |
810 | - field {<|<=} const, when searching for MAX |
811 | - field {>|>=} const, when searching for MIN |
812 | */ |
813 | |
814 | if (is_null || (is_null_safe_eq && args[1]->is_null())) |
815 | { |
816 | /* |
817 | If we have a non-nullable index, we cannot use it, |
818 | since set_null will be ignored, and we will compare uninitialized data. |
819 | */ |
820 | if (!part->field->real_maybe_null()) |
821 | DBUG_RETURN(FALSE); |
822 | part->field->set_null(); |
823 | *key_ptr= (uchar) 1; |
824 | } |
825 | else |
826 | { |
827 | /* Update endpoints for MAX/MIN, see function comment. */ |
828 | Item *value= args[between && max_fl ? 2 : 1]; |
829 | value->save_in_field_no_warnings(part->field, 1); |
830 | if (part->null_bit) |
831 | *key_ptr++= (uchar) MY_TEST(part->field->is_null()); |
832 | part->field->get_key_image(key_ptr, part->length, Field::itRAW); |
833 | } |
834 | if (is_field_part) |
835 | { |
836 | if (between || eq_type) |
837 | *range_fl&= ~(NO_MAX_RANGE | NO_MIN_RANGE); |
838 | else |
839 | { |
840 | *range_fl&= ~(max_fl ? NO_MAX_RANGE : NO_MIN_RANGE); |
841 | if (noeq_type) |
842 | *range_fl|= (max_fl ? NEAR_MAX : NEAR_MIN); |
843 | else |
844 | *range_fl&= ~(max_fl ? NEAR_MAX : NEAR_MIN); |
845 | } |
846 | } |
847 | } |
848 | else if (eq_type) |
849 | { |
850 | if ((!is_null && !cond->val_int()) || |
851 | (is_null && !MY_TEST(part->field->is_null()))) |
852 | DBUG_RETURN(FALSE); // Impossible test |
853 | } |
854 | else if (is_field_part) |
855 | *range_fl&= ~(max_fl ? NO_MIN_RANGE : NO_MAX_RANGE); |
856 | DBUG_RETURN(TRUE); |
857 | } |
858 | |
859 | |
860 | /** |
861 | Check whether we can get value for {max|min}(field) by using a key. |
862 | |
863 | If where-condition is not a conjunction of 0 or more conjuct the |
864 | function returns false, otherwise it checks whether there is an |
865 | index including field as its k-th component/part such that: |
866 | |
867 | -# for each previous component f_i there is one and only one conjunct |
868 | of the form: f_i= const_i or const_i= f_i or f_i is null |
869 | -# references to field occur only in conjucts of the form: |
870 | field {<|<=|>=|>|=} const or const {<|<=|>=|>|=} field or |
871 | field BETWEEN const1 AND const2 |
872 | -# all references to the columns from the same table as column field |
873 | occur only in conjucts mentioned above. |
874 | -# each of k first components the index is not partial, i.e. is not |
875 | defined on a fixed length proper prefix of the field. |
876 | |
877 | If such an index exists the function through the ref parameter |
878 | returns the key value to find max/min for the field using the index, |
879 | the length of first (k-1) components of the key and flags saying |
880 | how to apply the key for the search max/min value. |
881 | (if we have a condition field = const, prefix_len contains the length |
882 | of the whole search key) |
883 | |
884 | @param[in] max_fl 0 for MIN(field) / 1 for MAX(field) |
885 | @param[in,out] ref Reference to the structure we store the key value |
886 | @param[in] field Field used inside MIN() / MAX() |
887 | @param[in] cond WHERE condition |
888 | @param[out] range_fl Bit flags for how to search if key is ok |
889 | @param[out] prefix_len Length of prefix for the search range |
890 | |
891 | @note |
892 | This function may set field->table->key_read to true, |
893 | which must be reset after index is used! |
894 | (This can only happen when function returns 1) |
895 | |
896 | @retval |
897 | 0 Index can not be used to optimize MIN(field)/MAX(field) |
898 | @retval |
899 | 1 Can use key to optimize MIN()/MAX(). |
900 | In this case ref, range_fl and prefix_len are updated |
901 | */ |
902 | |
903 | static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref, |
904 | Field* field, COND *cond, |
905 | uint *range_fl, uint *prefix_len) |
906 | { |
907 | if (!(field->flags & PART_KEY_FLAG)) |
908 | return FALSE; // Not key field |
909 | |
910 | DBUG_ENTER("find_key_for_maxmin" ); |
911 | |
912 | TABLE *table= field->table; |
913 | uint idx= 0; |
914 | |
915 | KEY *keyinfo,*keyinfo_end; |
916 | for (keyinfo= table->key_info, keyinfo_end= keyinfo+table->s->keys ; |
917 | keyinfo != keyinfo_end; |
918 | keyinfo++,idx++) |
919 | { |
920 | KEY_PART_INFO *part,*part_end; |
921 | key_part_map key_part_to_use= 0; |
922 | /* |
923 | Perform a check if index is not disabled by ALTER TABLE |
924 | or IGNORE INDEX. |
925 | */ |
926 | if (!table->keys_in_use_for_query.is_set(idx)) |
927 | continue; |
928 | uint jdx= 0; |
929 | *prefix_len= 0; |
930 | part_end= keyinfo->key_part+table->actual_n_key_parts(keyinfo); |
931 | for (part= keyinfo->key_part ; |
932 | part != part_end ; |
933 | part++, jdx++, key_part_to_use= (key_part_to_use << 1) | 1) |
934 | { |
935 | if (!(table->file->index_flags(idx, jdx, 0) & HA_READ_ORDER)) |
936 | DBUG_RETURN(FALSE); |
937 | |
938 | /* Check whether the index component is partial */ |
939 | Field *part_field= table->field[part->fieldnr-1]; |
940 | if ((part_field->flags & BLOB_FLAG) || |
941 | part->length < part_field->key_length()) |
942 | break; |
943 | |
944 | if (field->eq(part->field)) |
945 | { |
946 | ref->key= idx; |
947 | ref->key_length= 0; |
948 | ref->key_parts= 0; |
949 | key_part_map key_part_used= 0; |
950 | *range_fl= NO_MIN_RANGE | NO_MAX_RANGE; |
951 | if (matching_cond(max_fl, ref, keyinfo, part, cond, |
952 | &key_part_used, range_fl, prefix_len) && |
953 | !(key_part_to_use & ~key_part_used)) |
954 | { |
955 | if (!max_fl && key_part_used == key_part_to_use && part->null_bit) |
956 | { |
957 | /* |
958 | The query is on this form: |
959 | |
960 | SELECT MIN(key_part_k) |
961 | FROM t1 |
962 | WHERE key_part_1 = const and ... and key_part_k-1 = const |
963 | |
964 | If key_part_k is nullable, we want to find the first matching row |
965 | where key_part_k is not null. The key buffer is now {const, ..., |
966 | NULL}. This will be passed to the handler along with a flag |
967 | indicating open interval. If a tuple is read that does not match |
968 | these search criteria, an attempt will be made to read an exact |
969 | match for the key buffer. |
970 | */ |
971 | /* Set the first byte of key_part_k to 1, that means NULL */ |
972 | ref->key_buff[ref->key_length]= 1; |
973 | ref->key_length+= part->store_length; |
974 | ref->key_parts++; |
975 | DBUG_ASSERT(ref->key_parts == jdx+1); |
976 | *range_fl&= ~NO_MIN_RANGE; |
977 | *range_fl|= NEAR_MIN; // Open interval |
978 | } |
979 | /* |
980 | The following test is false when the key in the key tree is |
981 | converted (for example to upper case) |
982 | */ |
983 | if (field->part_of_key.is_set(idx)) |
984 | table->file->ha_start_keyread(idx); |
985 | DBUG_RETURN(TRUE); |
986 | } |
987 | } |
988 | } |
989 | } |
990 | DBUG_RETURN(FALSE); |
991 | } |
992 | |
993 | |
994 | /** |
995 | Check whether found key is in range specified by conditions. |
996 | |
997 | @param[in] max_fl 0 for MIN(field) / 1 for MAX(field) |
998 | @param[in] ref Reference to the key value and info |
999 | @param[in] field Field used the MIN/MAX expression |
1000 | @param[in] cond WHERE condition |
1001 | @param[in] range_fl Says whether there is a condition to to be checked |
1002 | @param[in] prefix_len Length of the constant part of the key |
1003 | |
1004 | @retval |
1005 | 0 ok |
1006 | @retval |
1007 | 1 WHERE was not true for the found row |
1008 | */ |
1009 | |
1010 | static int reckey_in_range(bool max_fl, TABLE_REF *ref, Field* field, |
1011 | COND *cond, uint range_fl, uint prefix_len) |
1012 | { |
1013 | if (key_cmp_if_same(field->table, ref->key_buff, ref->key, prefix_len)) |
1014 | return 1; |
1015 | if (!cond || (range_fl & (max_fl ? NO_MIN_RANGE : NO_MAX_RANGE))) |
1016 | return 0; |
1017 | return maxmin_in_range(max_fl, field, cond); |
1018 | } |
1019 | |
1020 | |
1021 | /** |
1022 | Check whether {MAX|MIN}(field) is in range specified by conditions. |
1023 | |
1024 | @param[in] max_fl 0 for MIN(field) / 1 for MAX(field) |
1025 | @param[in] field Field used the MIN/MAX expression |
1026 | @param[in] cond WHERE condition |
1027 | |
1028 | @retval |
1029 | 0 ok |
1030 | @retval |
1031 | 1 WHERE was not true for the found row |
1032 | */ |
1033 | |
1034 | static int maxmin_in_range(bool max_fl, Field* field, COND *cond) |
1035 | { |
1036 | /* If AND/OR condition */ |
1037 | if (cond->type() == Item::COND_ITEM) |
1038 | { |
1039 | List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list()); |
1040 | Item *item; |
1041 | while ((item= li++)) |
1042 | { |
1043 | if (maxmin_in_range(max_fl, field, item)) |
1044 | return 1; |
1045 | } |
1046 | return 0; |
1047 | } |
1048 | |
1049 | if (cond->used_tables() != field->table->map) |
1050 | return 0; |
1051 | bool less_fl= 0; |
1052 | switch (((Item_func*) cond)->functype()) { |
1053 | case Item_func::BETWEEN: |
1054 | return cond->val_int() == 0; // Return 1 if WHERE is false |
1055 | case Item_func::LT_FUNC: |
1056 | case Item_func::LE_FUNC: |
1057 | less_fl= 1; |
1058 | /* fall through */ |
1059 | case Item_func::GT_FUNC: |
1060 | case Item_func::GE_FUNC: |
1061 | { |
1062 | Item *item= ((Item_func*) cond)->arguments()[1]; |
1063 | /* In case of 'const op item' we have to swap the operator */ |
1064 | if (!item->const_item()) |
1065 | less_fl= 1-less_fl; |
1066 | /* |
1067 | We only have to check the expression if we are using an expression like |
1068 | SELECT MAX(b) FROM t1 WHERE a=const AND b>const |
1069 | not for |
1070 | SELECT MAX(b) FROM t1 WHERE a=const AND b<const |
1071 | */ |
1072 | if (max_fl != less_fl) |
1073 | return cond->val_int() == 0; // Return 1 if WHERE is false |
1074 | return 0; |
1075 | } |
1076 | default: |
1077 | break; // Ignore |
1078 | } |
1079 | return 0; |
1080 | } |
1081 | |
1082 | |