1/* Copyright (C) 2010, 2011 Monty Program Ab
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
15
16#include "mariadb.h"
17#include "sql_parse.h"
18#include <my_bit.h>
19#include "sql_select.h"
20#include "key.h"
21
22/****************************************************************************
23 * Default MRR implementation (MRR to non-MRR converter)
24 ***************************************************************************/
25
26/**
27 Get cost and other information about MRR scan over a known list of ranges
28
29 Calculate estimated cost and other information about an MRR scan for given
30 sequence of ranges.
31
32 @param keyno Index number
33 @param seq Range sequence to be traversed
34 @param seq_init_param First parameter for seq->init()
35 @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller
36 can't efficiently determine it
37 @param bufsz INOUT IN: Size of the buffer available for use
38 OUT: Size of the buffer that is expected to be actually
39 used, or 0 if buffer is not needed.
40 @param flags INOUT A combination of HA_MRR_* flags
41 @param cost OUT Estimated cost of MRR access
42
43 @note
44 This method (or an overriding one in a derived class) must check for
45 thd->killed and return HA_POS_ERROR if it is not zero. This is required
46 for a user to be able to interrupt the calculation by killing the
47 connection/query.
48
49 @retval
50 HA_POS_ERROR Error or the engine is unable to perform the requested
51 scan. Values of OUT parameters are undefined.
52 @retval
53 other OK, *cost contains cost of the scan, *bufsz and *flags
54 contain scan parameters.
55*/
56
57ha_rows
58handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
59 void *seq_init_param, uint n_ranges_arg,
60 uint *bufsz, uint *flags, Cost_estimate *cost)
61{
62 KEY_MULTI_RANGE range;
63 range_seq_t seq_it;
64 ha_rows rows, total_rows= 0;
65 uint n_ranges=0;
66 THD *thd= table->in_use;
67
68 /* Default MRR implementation doesn't need buffer */
69 *bufsz= 0;
70
71 seq_it= seq->init(seq_init_param, n_ranges, *flags);
72 while (!seq->next(seq_it, &range))
73 {
74 if (unlikely(thd->killed != 0))
75 return HA_POS_ERROR;
76
77 n_ranges++;
78 key_range *min_endp, *max_endp;
79 if (range.range_flag & GEOM_FLAG)
80 {
81 /* In this case tmp_min_flag contains the handler-read-function */
82 range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG);
83 min_endp= &range.start_key;
84 max_endp= NULL;
85 }
86 else
87 {
88 min_endp= range.start_key.length? &range.start_key : NULL;
89 max_endp= range.end_key.length? &range.end_key : NULL;
90 }
91 if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
92 rows= 1; /* there can be at most one row */
93 else
94 {
95 if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
96 max_endp)))
97 {
98 /* Can't scan one range => can't do MRR scan at all */
99 total_rows= HA_POS_ERROR;
100 break;
101 }
102 }
103 total_rows += rows;
104 }
105
106 if (total_rows != HA_POS_ERROR)
107 {
108 /* The following calculation is the same as in multi_range_read_info(): */
109 *flags |= HA_MRR_USE_DEFAULT_IMPL;
110 cost->reset();
111 cost->avg_io_cost= 1; /* assume random seeks */
112 if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
113 cost->io_count= keyread_time(keyno, n_ranges, (uint)total_rows);
114 else
115 cost->io_count= read_time(keyno, n_ranges, total_rows);
116 cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
117 }
118 return total_rows;
119}
120
121
122/**
123 Get cost and other information about MRR scan over some sequence of ranges
124
125 Calculate estimated cost and other information about an MRR scan for some
126 sequence of ranges.
127
128 The ranges themselves will be known only at execution phase. When this
129 function is called we only know number of ranges and a (rough) E(#records)
130 within those ranges.
131
132 Currently this function is only called for "n-keypart singlepoint" ranges,
133 i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
134
135 The flags parameter is a combination of those flags: HA_MRR_SORTED,
136 HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
137
138 @param keyno Index number
139 @param n_ranges Estimated number of ranges (i.e. intervals) in the
140 range sequence.
141 @param n_rows Estimated total number of records contained within all
142 of the ranges
143 @param bufsz INOUT IN: Size of the buffer available for use
144 OUT: Size of the buffer that will be actually used, or
145 0 if buffer is not needed.
146 @param flags INOUT A combination of HA_MRR_* flags
147 @param cost OUT Estimated cost of MRR access
148
149 @retval
150 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan
151 parameters.
152 @retval
153 other Error or can't perform the requested scan
154*/
155
156ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
157 uint key_parts, uint *bufsz,
158 uint *flags, Cost_estimate *cost)
159{
160 /*
161 Currently we expect this function to be called only in preparation of scan
162 with HA_MRR_SINGLE_POINT property.
163 */
164 DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
165
166 *bufsz= 0; /* Default implementation doesn't need a buffer */
167 *flags |= HA_MRR_USE_DEFAULT_IMPL;
168
169 cost->reset();
170 cost->avg_io_cost= 1; /* assume random seeks */
171
172 /* Produce the same cost as non-MRR code does */
173 if (*flags & HA_MRR_INDEX_ONLY)
174 cost->io_count= keyread_time(keyno, n_ranges, n_rows);
175 else
176 cost->io_count= read_time(keyno, n_ranges, n_rows);
177 return 0;
178}
179
180
181/**
182 Initialize the MRR scan
183
184 Initialize the MRR scan. This function may do heavyweight scan
185 initialization like row prefetching/sorting/etc (NOTE: but better not do
186 it here as we may not need it, e.g. if we never satisfy WHERE clause on
187 previous tables. For many implementations it would be natural to do such
188 initializations in the first multi_read_range_next() call)
189
190 mode is a combination of the following flags: HA_MRR_SORTED,
191 HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
192
193 @param seq Range sequence to be traversed
194 @param seq_init_param First parameter for seq->init()
195 @param n_ranges Number of ranges in the sequence
196 @param mode Flags, see the description section for the details
197 @param buf INOUT: memory buffer to be used
198
199 @note
200 One must have called index_init() before calling this function. Several
201 multi_range_read_init() calls may be made in course of one query.
202
203 Buffer memory management is done according to the following scenario:
204 The caller allocates the buffer and provides it to the callee by filling
205 the members of HANDLER_BUFFER structure.
206 The callee consumes all or some fraction of the provided buffer space, and
207 sets the HANDLER_BUFFER members accordingly.
208 The callee may use the buffer memory until the next multi_range_read_init()
209 call is made, all records have been read, or until index_end() call is
210 made, whichever comes first.
211
212 @retval 0 OK
213 @retval 1 Error
214*/
215
216int
217handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
218 uint n_ranges, uint mode, HANDLER_BUFFER *buf)
219{
220 DBUG_ENTER("handler::multi_range_read_init");
221 mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
222 mrr_funcs= *seq_funcs;
223 mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
224 mrr_have_range= FALSE;
225 DBUG_RETURN(0);
226}
227
228/**
229 Get next record in MRR scan
230
231 Default MRR implementation: read the next record
232
233 @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
234 Otherwise, the opaque value associated with the range
235 that contains the returned record.
236
237 @retval 0 OK
238 @retval other Error code
239*/
240
241int handler::multi_range_read_next(range_id_t *range_info)
242{
243 int result= HA_ERR_END_OF_FILE;
244 bool range_res;
245 DBUG_ENTER("handler::multi_range_read_next");
246
247 if (!mrr_have_range)
248 {
249 mrr_have_range= TRUE;
250 goto start;
251 }
252
253 do
254 {
255 /* Save a call if there can be only one row in range. */
256 if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
257 {
258 result= read_range_next();
259 /* On success or non-EOF errors jump to the end. */
260 if (result != HA_ERR_END_OF_FILE)
261 break;
262 }
263 else
264 {
265 if (ha_was_semi_consistent_read())
266 {
267 /*
268 The following assignment is redundant, but for extra safety and to
269 remove the compiler warning:
270 */
271 range_res= FALSE;
272 goto scan_it_again;
273 }
274 /*
275 We need to set this for the last range only, but checking this
276 condition is more expensive than just setting the result code.
277 */
278 result= HA_ERR_END_OF_FILE;
279 }
280
281start:
282 /* Try the next range(s) until one matches a record. */
283 while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
284 {
285scan_it_again:
286 result= read_range_first(mrr_cur_range.start_key.keypart_map ?
287 &mrr_cur_range.start_key : 0,
288 mrr_cur_range.end_key.keypart_map ?
289 &mrr_cur_range.end_key : 0,
290 MY_TEST(mrr_cur_range.range_flag & EQ_RANGE),
291 mrr_is_output_sorted);
292 if (result != HA_ERR_END_OF_FILE)
293 break;
294 }
295 }
296 while ((result == HA_ERR_END_OF_FILE) && !range_res);
297
298 *range_info= mrr_cur_range.ptr;
299 DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
300 DBUG_RETURN(result);
301}
302
303/****************************************************************************
304 * Mrr_*_reader classes (building blocks for DS-MRR)
305 ***************************************************************************/
306
307int Mrr_simple_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
308 void *seq_init_param, uint n_ranges,
309 uint mode, Key_parameters *key_par_arg,
310 Lifo_buffer *key_buffer_arg,
311 Buffer_manager *buf_manager_arg)
312{
313 HANDLER_BUFFER no_buffer = {NULL, NULL, NULL};
314 file= h_arg;
315 return file->handler::multi_range_read_init(seq_funcs, seq_init_param,
316 n_ranges, mode, &no_buffer);
317}
318
319
320int Mrr_simple_index_reader::get_next(range_id_t *range_info)
321{
322 int res;
323 while (!(res= file->handler::multi_range_read_next(range_info)))
324 {
325 KEY_MULTI_RANGE *curr_range= &file->handler::mrr_cur_range;
326 if (!file->mrr_funcs.skip_index_tuple ||
327 !file->mrr_funcs.skip_index_tuple(file->mrr_iter, curr_range->ptr))
328 break;
329 }
330 if (res && res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND)
331 file->print_error(res, MYF(0)); // Fatal error
332 return res;
333}
334
335
336/**
337 @brief Get next index record
338
339 @param range_info OUT identifier of range that the returned record belongs to
340
341 @note
342 We actually iterate over nested sequences:
343 - an ordered sequence of groups of identical keys
344 - each key group has key value, which has multiple matching records
345 - thus, each record matches all members of the key group
346
347 @retval 0 OK, next record was successfully read
348 @retval HA_ERR_END_OF_FILE End of records
349 @retval Other Some other error; Error is printed
350*/
351
352int Mrr_ordered_index_reader::get_next(range_id_t *range_info)
353{
354 int res;
355 DBUG_ENTER("Mrr_ordered_index_reader::get_next");
356
357 for(;;)
358 {
359 if (!scanning_key_val_iter)
360 {
361 while ((res= kv_it.init(this)))
362 {
363 if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE))
364 DBUG_RETURN(res); /* Some fatal error */
365
366 if (key_buffer->is_empty())
367 {
368 DBUG_RETURN(HA_ERR_END_OF_FILE);
369 }
370 }
371 scanning_key_val_iter= TRUE;
372 }
373
374 if ((res= kv_it.get_next(range_info)))
375 {
376 scanning_key_val_iter= FALSE;
377 if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE))
378 DBUG_RETURN(res);
379 kv_it.move_to_next_key_value();
380 continue;
381 }
382 if (!skip_index_tuple(*range_info) &&
383 !skip_record(*range_info, NULL))
384 {
385 break;
386 }
387 /* Go get another (record, range_id) combination */
388 } /* while */
389
390 DBUG_RETURN(0);
391}
392
393
394/*
395 Supply index reader with the O(1)space it needs for scan interrupt/restore
396 operation
397*/
398
399bool Mrr_ordered_index_reader::set_interruption_temp_buffer(uint rowid_length,
400 uint key_len,
401 uint saved_pk_len,
402 uchar **space_start,
403 uchar *space_end)
404{
405 if (space_end - *space_start <= (ptrdiff_t)(rowid_length + key_len + saved_pk_len))
406 return TRUE;
407 support_scan_interruptions= TRUE;
408
409 saved_rowid= *space_start;
410 *space_start += rowid_length;
411
412 if (saved_pk_len)
413 {
414 saved_primary_key= *space_start;
415 *space_start += saved_pk_len;
416 }
417 else
418 saved_primary_key= NULL;
419
420 saved_key_tuple= *space_start;
421 *space_start += key_len;
422
423 have_saved_rowid= FALSE;
424 read_was_interrupted= FALSE;
425 return FALSE;
426}
427
428void Mrr_ordered_index_reader::set_no_interruption_temp_buffer()
429{
430 support_scan_interruptions= FALSE;
431 saved_key_tuple= saved_rowid= saved_primary_key= NULL; /* safety */
432 have_saved_rowid= FALSE;
433 read_was_interrupted= FALSE;
434}
435
436void Mrr_ordered_index_reader::interrupt_read()
437{
438 DBUG_ASSERT(support_scan_interruptions);
439 TABLE *table= file->get_table();
440 KEY *used_index= &table->key_info[file->active_index];
441 /* Save the current key value */
442 key_copy(saved_key_tuple, table->record[0],
443 used_index, used_index->key_length);
444
445 if (saved_primary_key)
446 {
447 key_copy(saved_primary_key, table->record[0],
448 &table->key_info[table->s->primary_key],
449 table->key_info[table->s->primary_key].key_length);
450 }
451 read_was_interrupted= TRUE;
452
453 /* Save the last rowid */
454 memcpy(saved_rowid, file->ref, file->ref_length);
455 have_saved_rowid= TRUE;
456}
457
458void Mrr_ordered_index_reader::position()
459{
460 if (have_saved_rowid)
461 memcpy(file->ref, saved_rowid, file->ref_length);
462 else
463 Mrr_index_reader::position();
464}
465
466void Mrr_ordered_index_reader::resume_read()
467{
468 TABLE *table= file->get_table();
469
470 if (!read_was_interrupted)
471 return;
472
473 KEY *used_index= &table->key_info[file->active_index];
474 key_restore(table->record[0], saved_key_tuple,
475 used_index, used_index->key_length);
476 if (saved_primary_key)
477 {
478 key_restore(table->record[0], saved_primary_key,
479 &table->key_info[table->s->primary_key],
480 table->key_info[table->s->primary_key].key_length);
481 }
482}
483
484
485/**
486 Fill the buffer with (lookup_tuple, range_id) pairs and sort
487
488 @return
489 0 OK, the buffer is non-empty and sorted
490 HA_ERR_END_OF_FILE Source exhausted, the buffer is empty.
491*/
492
493int Mrr_ordered_index_reader::refill_buffer(bool initial)
494{
495 KEY_MULTI_RANGE cur_range;
496 DBUG_ENTER("Mrr_ordered_index_reader::refill_buffer");
497
498 DBUG_ASSERT(key_buffer->is_empty());
499
500 if (source_exhausted)
501 DBUG_RETURN(HA_ERR_END_OF_FILE);
502
503 buf_manager->reset_buffer_sizes(buf_manager->arg);
504 key_buffer->reset();
505 key_buffer->setup_writing(keypar.key_size_in_keybuf,
506 is_mrr_assoc? sizeof(range_id_t) : 0);
507
508 while (key_buffer->can_write() &&
509 !(source_exhausted= mrr_funcs.next(mrr_iter, &cur_range)))
510 {
511 DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
512
513 /* Put key, or {key, range_id} pair into the buffer */
514 key_buffer->write_ptr1= keypar.use_key_pointers ?
515 (uchar*)&cur_range.start_key.key :
516 (uchar*)cur_range.start_key.key;
517 key_buffer->write_ptr2= (uchar*)&cur_range.ptr;
518 key_buffer->write();
519 }
520
521 /* Force get_next() to start with kv_it.init() call: */
522 scanning_key_val_iter= FALSE;
523
524 if (source_exhausted && key_buffer->is_empty())
525 DBUG_RETURN(HA_ERR_END_OF_FILE);
526
527 if (!initial)
528 {
529 /* This is a non-initial buffer fill and we've got a non-empty buffer */
530 THD *thd= current_thd;
531 status_var_increment(thd->status_var.ha_mrr_key_refills_count);
532 }
533
534 key_buffer->sort((key_buffer->type() == Lifo_buffer::FORWARD)?
535 (qsort2_cmp)Mrr_ordered_index_reader::compare_keys_reverse :
536 (qsort2_cmp)Mrr_ordered_index_reader::compare_keys,
537 this);
538 DBUG_RETURN(0);
539}
540
541
542int Mrr_ordered_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
543 void *seq_init_param, uint n_ranges,
544 uint mode, Key_parameters *key_par_arg,
545 Lifo_buffer *key_buffer_arg,
546 Buffer_manager *buf_manager_arg)
547{
548 file= h_arg;
549 key_buffer= key_buffer_arg;
550 buf_manager= buf_manager_arg;
551 keypar= *key_par_arg;
552
553 KEY *key_info= &file->get_table()->key_info[file->active_index];
554 keypar.index_ranges_unique= MY_TEST(key_info->flags & HA_NOSAME &&
555 key_info->user_defined_key_parts ==
556 my_count_bits(keypar.key_tuple_map));
557
558 mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
559 is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
560 mrr_funcs= *seq_funcs;
561 source_exhausted= FALSE;
562 read_was_interrupted= false;
563 have_saved_rowid= FALSE;
564 return 0;
565}
566
567
568static int rowid_cmp_reverse(void *file, uchar *a, uchar *b)
569{
570 return - ((handler*)file)->cmp_ref(a, b);
571}
572
573
574int Mrr_ordered_rndpos_reader::init(handler *h_arg,
575 Mrr_index_reader *index_reader_arg,
576 uint mode,
577 Lifo_buffer *buf)
578{
579 file= h_arg;
580 index_reader= index_reader_arg;
581 rowid_buffer= buf;
582 is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
583 index_reader_exhausted= FALSE;
584 index_reader_needs_refill= TRUE;
585 return 0;
586}
587
588
589/**
590 DS-MRR: Fill and sort the rowid buffer
591
592 Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
593 buffer. When the buffer is full or scan is completed, sort the buffer by
594 rowid and return.
595
596 When this function returns, either rowid buffer is not empty, or the source
597 of lookup keys (i.e. ranges) is exhaused.
598
599 @retval 0 OK, the next portion of rowids is in the buffer,
600 properly ordered
601 @retval other Error
602*/
603
604int Mrr_ordered_rndpos_reader::refill_buffer(bool initial)
605{
606 int res;
607 bool first_call= initial;
608 DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_buffer");
609
610 if (index_reader_exhausted)
611 DBUG_RETURN(HA_ERR_END_OF_FILE);
612
613 while (initial || index_reader_needs_refill ||
614 (res= refill_from_index_reader()) == HA_ERR_END_OF_FILE)
615 {
616 if ((res= index_reader->refill_buffer(initial)))
617 {
618 if (res == HA_ERR_END_OF_FILE)
619 index_reader_exhausted= TRUE;
620 break;
621 }
622 initial= FALSE;
623 index_reader_needs_refill= FALSE;
624 }
625
626 if (!first_call && !index_reader_exhausted)
627 {
628 /* Ok, this was a successful buffer refill operation */
629 THD *thd= current_thd;
630 status_var_increment(thd->status_var.ha_mrr_rowid_refills_count);
631 }
632
633 DBUG_RETURN(res);
634}
635
636
637void Mrr_index_reader::position()
638{
639 file->position(file->get_table()->record[0]);
640}
641
642
643/*
644 @brief Try to refill the rowid buffer without calling
645 index_reader->refill_buffer().
646*/
647
648int Mrr_ordered_rndpos_reader::refill_from_index_reader()
649{
650 range_id_t range_info;
651 int res;
652 DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_from_index_reader");
653
654 DBUG_ASSERT(rowid_buffer->is_empty());
655 index_rowid= index_reader->get_rowid_ptr();
656 rowid_buffer->reset();
657 rowid_buffer->setup_writing(file->ref_length,
658 is_mrr_assoc? sizeof(range_id_t) : 0);
659
660 last_identical_rowid= NULL;
661
662 index_reader->resume_read();
663 while (rowid_buffer->can_write())
664 {
665 res= index_reader->get_next(&range_info);
666
667 if (res)
668 {
669 if (res != HA_ERR_END_OF_FILE)
670 DBUG_RETURN(res);
671 index_reader_needs_refill=TRUE;
672 break;
673 }
674
675 index_reader->position();
676
677 /* Put rowid, or {rowid, range_id} pair into the buffer */
678 rowid_buffer->write_ptr1= index_rowid;
679 rowid_buffer->write_ptr2= (uchar*)&range_info;
680 rowid_buffer->write();
681 }
682
683 /*
684 When index_reader_needs_refill=TRUE, this means we've got all of index
685 tuples for lookups keys that index_reader had. We are not in the middle
686 of an index read, so there is no need to call interrupt_read.
687
688 Actually, we must not call interrupt_read(), because it could be that we
689 haven't read a single row (because all index lookups returned
690 HA_ERR_KEY_NOT_FOUND). In this case, interrupt_read() will cause [harmless]
691 valgrind warnings when trying to save garbage from table->record[0].
692 */
693 if (!index_reader_needs_refill)
694 index_reader->interrupt_read();
695 /* Sort the buffer contents by rowid */
696 rowid_buffer->sort((qsort2_cmp)rowid_cmp_reverse, (void*)file);
697
698 rowid_buffer->setup_reading(file->ref_length,
699 is_mrr_assoc ? sizeof(range_id_t) : 0);
700 DBUG_RETURN(rowid_buffer->is_empty()? HA_ERR_END_OF_FILE : 0);
701}
702
703
704/*
705 Get the next {record, range_id} using ordered array of rowid+range_id pairs
706
707 @note
708 Since we have sorted rowids, we try not to make multiple rnd_pos() calls
709 with the same rowid value.
710*/
711
712int Mrr_ordered_rndpos_reader::get_next(range_id_t *range_info)
713{
714 int res;
715
716 /*
717 First, check if rowid buffer has elements with the same rowid value as
718 the previous.
719 */
720 while (last_identical_rowid)
721 {
722 /*
723 Current record (the one we've returned in previous call) was obtained
724 from a rowid that matched multiple range_ids. Return this record again,
725 with next matching range_id.
726 */
727 (void)rowid_buffer->read();
728
729 if (rowid_buffer->read_ptr1 == last_identical_rowid)
730 last_identical_rowid= NULL; /* reached the last of identical rowids */
731
732 if (!is_mrr_assoc)
733 return 0;
734
735 memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t));
736 if (!index_reader->skip_record(*range_info, rowid_buffer->read_ptr1))
737 return 0;
738 }
739
740 /*
741 Ok, last_identical_rowid==NULL, it's time to read next different rowid
742 value and get record for it.
743 */
744 for(;;)
745 {
746 /* Return eof if there are no rowids in the buffer after re-fill attempt */
747 if (rowid_buffer->read())
748 return HA_ERR_END_OF_FILE;
749
750 if (is_mrr_assoc)
751 {
752 memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t));
753 if (index_reader->skip_record(*range_info, rowid_buffer->read_ptr1))
754 continue;
755 }
756
757 res= file->ha_rnd_pos(file->get_table()->record[0],
758 rowid_buffer->read_ptr1);
759
760 if (res)
761 return res; /* Some fatal error */
762
763 break; /* Got another record */
764 }
765
766 /*
767 Check if subsequent buffer elements have the same rowid value as this
768 one. If yes, remember this fact so that we don't make any more rnd_pos()
769 calls with this value.
770
771 Note: this implies that SQL layer doesn't touch table->record[0]
772 between calls.
773 */
774 Lifo_buffer_iterator it;
775 it.init(rowid_buffer);
776 while (!it.read())
777 {
778 if (file->cmp_ref(it.read_ptr1, rowid_buffer->read_ptr1))
779 break;
780 last_identical_rowid= it.read_ptr1;
781 }
782 return 0;
783}
784
785
786/****************************************************************************
787 * Top-level DS-MRR implementation functions (the ones called by storage engine)
788 ***************************************************************************/
789
790/**
791 DS-MRR: Initialize and start MRR scan
792
793 Initialize and start the MRR scan. Depending on the mode parameter, this
794 may use default or DS-MRR implementation.
795
796 @param h_arg Table handler to be used
797 @param key Index to be used
798 @param seq_funcs Interval sequence enumeration functions
799 @param seq_init_param Interval sequence enumeration parameter
800 @param n_ranges Number of ranges in the sequence.
801 @param mode HA_MRR_* modes to use
802 @param buf INOUT Buffer to use
803
804 @retval 0 Ok, Scan started.
805 @retval other Error
806*/
807
808int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
809 void *seq_init_param, uint n_ranges, uint mode,
810 HANDLER_BUFFER *buf)
811{
812 THD *thd= h_arg->get_table()->in_use;
813 int res;
814 Key_parameters keypar;
815 uint UNINIT_VAR(key_buff_elem_size); /* set/used when do_sort_keys==TRUE */
816 handler *h_idx;
817 Mrr_ordered_rndpos_reader *disk_strategy= NULL;
818 bool do_sort_keys= FALSE;
819 DBUG_ENTER("DsMrr_impl::dsmrr_init");
820 /*
821 index_merge may invoke a scan on an object for which dsmrr_info[_const]
822 has not been called, so set the owner handler here as well.
823 */
824 primary_file= h_arg;
825 is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
826
827 strategy_exhausted= FALSE;
828
829 /* By default, have do-nothing buffer manager */
830 buf_manager.arg= this;
831 buf_manager.reset_buffer_sizes= do_nothing;
832 buf_manager.redistribute_buffer_space= do_nothing;
833
834 if (mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED))
835 goto use_default_impl;
836
837 /*
838 Determine whether we'll need to do key sorting and/or rnd_pos() scan
839 */
840 index_strategy= NULL;
841 if ((mode & HA_MRR_SINGLE_POINT) &&
842 optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
843 {
844 do_sort_keys= TRUE;
845 index_strategy= &reader_factory.ordered_index_reader;
846 }
847 else
848 index_strategy= &reader_factory.simple_index_reader;
849
850 strategy= index_strategy;
851 /*
852 We don't need a rowid-to-rndpos step if
853 - We're doing a scan on clustered primary key
854 - [In the future] We're doing an index_only read
855 */
856 DBUG_ASSERT(primary_file->inited == handler::INDEX ||
857 (primary_file->inited == handler::RND &&
858 secondary_file &&
859 secondary_file->inited == handler::INDEX));
860
861 h_idx= (primary_file->inited == handler::INDEX)? primary_file: secondary_file;
862 keyno= h_idx->active_index;
863
864 if (!(keyno == table->s->primary_key && h_idx->primary_key_is_clustered()))
865 {
866 strategy= disk_strategy= &reader_factory.ordered_rndpos_reader;
867 }
868
869 full_buf= buf->buffer;
870 full_buf_end= buf->buffer_end;
871
872 if (do_sort_keys)
873 {
874 /* Pre-calculate some parameters of key sorting */
875 keypar.use_key_pointers= MY_TEST(mode & HA_MRR_MATERIALIZED_KEYS);
876 seq_funcs->get_key_info(seq_init_param, &keypar.key_tuple_length,
877 &keypar.key_tuple_map);
878 keypar.key_size_in_keybuf= keypar.use_key_pointers?
879 sizeof(char*) : keypar.key_tuple_length;
880 key_buff_elem_size= keypar.key_size_in_keybuf + (int)is_mrr_assoc * sizeof(void*);
881
882 /* Ordered index reader needs some space to store an index tuple */
883 if (strategy != index_strategy)
884 {
885 uint saved_pk_length=0;
886 if (h_idx->primary_key_is_clustered())
887 {
888 uint pk= h_idx->get_table()->s->primary_key;
889 if (pk != MAX_KEY)
890 saved_pk_length= h_idx->get_table()->key_info[pk].key_length;
891 }
892
893 KEY *used_index= &h_idx->get_table()->key_info[h_idx->active_index];
894 if (reader_factory.ordered_index_reader.
895 set_interruption_temp_buffer(primary_file->ref_length,
896 used_index->key_length,
897 saved_pk_length,
898 &full_buf, full_buf_end))
899 goto use_default_impl;
900 }
901 else
902 reader_factory.ordered_index_reader.set_no_interruption_temp_buffer();
903 }
904
905 if (strategy == index_strategy)
906 {
907 /*
908 Index strategy alone handles the record retrieval. Give all buffer space
909 to it. Key buffer should have forward orientation so we can return the
910 end of it.
911 */
912 key_buffer= &forward_key_buf;
913 key_buffer->set_buffer_space(full_buf, full_buf_end);
914
915 /* Safety: specify that rowid buffer has zero size: */
916 rowid_buffer.set_buffer_space(full_buf_end, full_buf_end);
917
918 if (do_sort_keys && !key_buffer->have_space_for(key_buff_elem_size))
919 goto use_default_impl;
920
921 if ((res= index_strategy->init(primary_file, seq_funcs, seq_init_param, n_ranges,
922 mode, &keypar, key_buffer, &buf_manager)))
923 goto error;
924 }
925 else
926 {
927 /* We'll have both index and rndpos strategies working together */
928 if (do_sort_keys)
929 {
930 /* Both strategies will need buffer space, share the buffer */
931 if (setup_buffer_sharing(keypar.key_size_in_keybuf, keypar.key_tuple_map))
932 goto use_default_impl;
933
934 buf_manager.reset_buffer_sizes= reset_buffer_sizes;
935 buf_manager.redistribute_buffer_space= redistribute_buffer_space;
936 }
937 else
938 {
939 /* index strategy doesn't need buffer, give all space to rowids*/
940 rowid_buffer.set_buffer_space(full_buf, full_buf_end);
941 if (!rowid_buffer.have_space_for(primary_file->ref_length +
942 (int)is_mrr_assoc * sizeof(range_id_t)))
943 goto use_default_impl;
944 }
945
946 if ((res= setup_two_handlers()))
947 goto error;
948
949 if ((res= index_strategy->init(secondary_file, seq_funcs, seq_init_param,
950 n_ranges, mode, &keypar, key_buffer,
951 &buf_manager)) ||
952 (res= disk_strategy->init(primary_file, index_strategy, mode,
953 &rowid_buffer)))
954 {
955 goto error;
956 }
957 }
958
959 /*
960 At this point, we're sure that we're running a native MRR scan (i.e. we
961 didnt fall back to default implementation for some reason).
962 */
963 status_var_increment(thd->status_var.ha_mrr_init_count);
964
965 res= strategy->refill_buffer(TRUE);
966 if (res)
967 {
968 if (res != HA_ERR_END_OF_FILE)
969 goto error;
970 strategy_exhausted= TRUE;
971 }
972
973 /*
974 If we have scanned through all intervals in *seq, then adjust *buf to
975 indicate that the remaining buffer space will not be used.
976 */
977// if (dsmrr_eof)
978// buf->end_of_used_area= rowid_buffer.end_of_space();
979
980
981 DBUG_RETURN(0);
982error:
983 close_second_handler();
984 /* Safety, not really needed but: */
985 strategy= NULL;
986 DBUG_RETURN(res);
987
988use_default_impl:
989 if (primary_file->inited != handler::INDEX)
990 {
991 /* We can get here when
992 - we've previously successfully done a DS-MRR scan (and so have
993 secondary_file!= NULL, secondary_file->inited= INDEX,
994 primary_file->inited=RND)
995 - for this invocation, we haven't got enough buffer space, and so we
996 have to use the default MRR implementation.
997
998 note: primary_file->ha_index_end() will call dsmrr_close() which will
999 close/destroy the secondary_file, this is intentional.
1000 (Yes this is slow, but one can't expect performance with join buffer
1001 so small that it can accomodate one rowid and one index tuple)
1002 */
1003 if ((res= primary_file->ha_rnd_end()) ||
1004 (res= primary_file->ha_index_init(keyno, MY_TEST(mode & HA_MRR_SORTED))))
1005 {
1006 DBUG_RETURN(res);
1007 }
1008 }
1009 /* Call correct init function and assign to top level object */
1010 Mrr_simple_index_reader *s= &reader_factory.simple_index_reader;
1011 res= s->init(primary_file, seq_funcs, seq_init_param, n_ranges, mode, NULL,
1012 NULL, NULL);
1013 strategy= s;
1014 DBUG_RETURN(res);
1015}
1016
1017
1018/*
1019 Whatever the current state is, make it so that we have two handler objects:
1020 - primary_file - initialized for rnd_pos() scan
1021 - secondary_file - initialized for scanning the index specified in
1022 this->keyno
1023 RETURN
1024 0 OK
1025 HA_XXX Error code
1026*/
1027
1028int DsMrr_impl::setup_two_handlers()
1029{
1030 int res;
1031 THD *thd= primary_file->get_table()->in_use;
1032 DBUG_ENTER("DsMrr_impl::setup_two_handlers");
1033 if (!secondary_file)
1034 {
1035 handler *new_h2;
1036 Item *pushed_cond= NULL;
1037 DBUG_ASSERT(primary_file->inited == handler::INDEX);
1038 /* Create a separate handler object to do rnd_pos() calls. */
1039 /*
1040 ::clone() takes up a lot of stack, especially on 64 bit platforms.
1041 The constant 5 is an empiric result.
1042 */
1043 if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
1044 DBUG_RETURN(1);
1045
1046 /* Create a separate handler object to do rnd_pos() calls. */
1047 if (!(new_h2= primary_file->clone(primary_file->get_table()->s->
1048 normalized_path.str,
1049 thd->mem_root)) ||
1050 new_h2->ha_external_lock(thd, F_RDLCK))
1051 {
1052 delete new_h2;
1053 DBUG_RETURN(1);
1054 }
1055
1056 if (keyno == primary_file->pushed_idx_cond_keyno)
1057 pushed_cond= primary_file->pushed_idx_cond;
1058
1059 Mrr_reader *save_strategy= strategy;
1060 strategy= NULL;
1061 /*
1062 Caution: this call will invoke this->dsmrr_close(). Do not put the
1063 created secondary table handler new_h2 into this->secondary_file or it
1064 will delete it. Also, save the picked strategy
1065 */
1066 res= primary_file->ha_index_end();
1067
1068 strategy= save_strategy;
1069 secondary_file= new_h2;
1070
1071 if (res || (res= (primary_file->ha_rnd_init(FALSE))))
1072 goto error;
1073
1074 table->prepare_for_position();
1075 secondary_file->extra(HA_EXTRA_KEYREAD);
1076 secondary_file->mrr_iter= primary_file->mrr_iter;
1077
1078 if ((res= secondary_file->ha_index_init(keyno, FALSE)))
1079 goto error;
1080
1081 if (pushed_cond)
1082 secondary_file->idx_cond_push(keyno, pushed_cond);
1083 }
1084 else
1085 {
1086 DBUG_ASSERT(secondary_file && secondary_file->inited==handler::INDEX);
1087 /*
1088 We get here when the access alternates betwen MRR scan(s) and non-MRR
1089 scans.
1090
1091 Calling primary_file->index_end() will invoke dsmrr_close() for this object,
1092 which will delete secondary_file. We need to keep it, so put it away and dont
1093 let it be deleted:
1094 */
1095 if (primary_file->inited == handler::INDEX)
1096 {
1097 handler *save_h2= secondary_file;
1098 Mrr_reader *save_strategy= strategy;
1099 secondary_file= NULL;
1100 strategy= NULL;
1101 res= primary_file->ha_index_end();
1102 secondary_file= save_h2;
1103 strategy= save_strategy;
1104 if (res)
1105 goto error;
1106 }
1107 if ((primary_file->inited != handler::RND) &&
1108 (res= primary_file->ha_rnd_init(FALSE)))
1109 goto error;
1110 }
1111 DBUG_RETURN(0);
1112
1113error:
1114 DBUG_RETURN(res);
1115}
1116
1117
1118void DsMrr_impl::close_second_handler()
1119{
1120 if (secondary_file)
1121 {
1122 secondary_file->extra(HA_EXTRA_NO_KEYREAD);
1123 secondary_file->ha_index_or_rnd_end();
1124 secondary_file->ha_external_lock(current_thd, F_UNLCK);
1125 secondary_file->ha_close();
1126 delete secondary_file;
1127 secondary_file= NULL;
1128 }
1129}
1130
1131
1132void DsMrr_impl::dsmrr_close()
1133{
1134 DBUG_ENTER("DsMrr_impl::dsmrr_close");
1135 close_second_handler();
1136 strategy= NULL;
1137 DBUG_VOID_RETURN;
1138}
1139
1140
1141/*
1142 my_qsort2-compatible static member function to compare key tuples
1143*/
1144
1145int Mrr_ordered_index_reader::compare_keys(void* arg, uchar* key1_arg,
1146 uchar* key2_arg)
1147{
1148 Mrr_ordered_index_reader *reader= (Mrr_ordered_index_reader*)arg;
1149 TABLE *table= reader->file->get_table();
1150 KEY_PART_INFO *part= table->key_info[reader->file->active_index].key_part;
1151 uchar *key1, *key2;
1152
1153 if (reader->keypar.use_key_pointers)
1154 {
1155 /* the buffer stores pointers to keys, get to the keys */
1156 memcpy(&key1, key1_arg, sizeof(char*));
1157 memcpy(&key2, key2_arg, sizeof(char*));
1158 }
1159 else
1160 {
1161 key1= key1_arg;
1162 key2= key2_arg;
1163 }
1164
1165 return key_tuple_cmp(part, key1, key2, reader->keypar.key_tuple_length);
1166}
1167
1168
1169int Mrr_ordered_index_reader::compare_keys_reverse(void* arg, uchar* key1,
1170 uchar* key2)
1171{
1172 return -compare_keys(arg, key1, key2);
1173}
1174
1175
1176/**
1177 Set the buffer space to be shared between rowid and key buffer
1178
1179 @return FALSE ok
1180 @return TRUE There is so little buffer space that we won't be able to use
1181 the strategy.
1182 This happens when we don't have enough space for one rowid
1183 element and one key element so this is mainly targeted at
1184 testing.
1185*/
1186
1187bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf,
1188 key_part_map key_tuple_map)
1189{
1190 long key_buff_elem_size= key_size_in_keybuf +
1191 (int)is_mrr_assoc * sizeof(range_id_t);
1192
1193 KEY *key_info= &primary_file->get_table()->key_info[keyno];
1194 /*
1195 Ok if we got here we need to allocate one part of the buffer
1196 for keys and another part for rowids.
1197 */
1198 ulonglong rowid_buf_elem_size= primary_file->ref_length +
1199 (int)is_mrr_assoc * sizeof(range_id_t);
1200
1201 /*
1202 Use rec_per_key statistics as a basis to find out how many rowids
1203 we'll get for each key value.
1204 TODO: what should be the default value to use when there is no
1205 statistics?
1206 */
1207 uint parts= my_count_bits(key_tuple_map);
1208 ha_rows rpc;
1209 ulonglong rowids_size= rowid_buf_elem_size;
1210 if ((rpc= (ha_rows) key_info->actual_rec_per_key(parts - 1)))
1211 rowids_size= rowid_buf_elem_size * rpc;
1212
1213 double fraction_for_rowids=
1214 (ulonglong2double(rowids_size) /
1215 (ulonglong2double(rowids_size) + key_buff_elem_size));
1216
1217 ptrdiff_t bytes_for_rowids=
1218 (ptrdiff_t)floor(0.5 + fraction_for_rowids * (full_buf_end - full_buf));
1219
1220 ptrdiff_t bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids;
1221
1222 if (bytes_for_keys < key_buff_elem_size + 1 ||
1223 bytes_for_rowids < (ptrdiff_t)rowid_buf_elem_size + 1)
1224 return TRUE; /* Failed to provide minimum space for one of the buffers */
1225
1226 rowid_buffer_end= full_buf + bytes_for_rowids;
1227 rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end);
1228 key_buffer= &backward_key_buf;
1229 key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end);
1230
1231 /* The above code guarantees that the buffers are big enough */
1232 DBUG_ASSERT(key_buffer->have_space_for(key_buff_elem_size) &&
1233 rowid_buffer.have_space_for((size_t)rowid_buf_elem_size));
1234
1235 return FALSE;
1236}
1237
1238
1239void DsMrr_impl::do_nothing(void *dsmrr_arg)
1240{
1241 /* Do nothing */
1242}
1243
1244
1245void DsMrr_impl::reset_buffer_sizes(void *dsmrr_arg)
1246{
1247 DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg;
1248 dsmrr->rowid_buffer.set_buffer_space(dsmrr->full_buf,
1249 dsmrr->rowid_buffer_end);
1250 dsmrr->key_buffer->set_buffer_space(dsmrr->rowid_buffer_end,
1251 dsmrr->full_buf_end);
1252}
1253
1254
1255/*
1256 Take unused space from the key buffer and give it to the rowid buffer
1257*/
1258
1259void DsMrr_impl::redistribute_buffer_space(void *dsmrr_arg)
1260{
1261 DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg;
1262 uchar *unused_start, *unused_end;
1263 dsmrr->key_buffer->remove_unused_space(&unused_start, &unused_end);
1264 dsmrr->rowid_buffer.grow(unused_start, unused_end);
1265}
1266
1267
1268/*
1269 @brief Initialize the iterator
1270
1271 @note
1272 Initialize the iterator to produce matches for the key of the first element
1273 in owner_arg->key_buffer
1274
1275 @retval 0 OK
1276 @retval HA_ERR_END_OF_FILE Either the owner->key_buffer is empty or
1277 no matches for the key we've tried (check
1278 key_buffer->is_empty() to tell these apart)
1279 @retval other code Fatal error
1280*/
1281
1282int Key_value_records_iterator::init(Mrr_ordered_index_reader *owner_arg)
1283{
1284 int res;
1285 owner= owner_arg;
1286
1287 identical_key_it.init(owner->key_buffer);
1288 owner->key_buffer->setup_reading(owner->keypar.key_size_in_keybuf,
1289 owner->is_mrr_assoc ? sizeof(void*) : 0);
1290
1291 if (identical_key_it.read())
1292 return HA_ERR_END_OF_FILE;
1293
1294 uchar *key_in_buf= last_identical_key_ptr= identical_key_it.read_ptr1;
1295
1296 uchar *index_tuple= key_in_buf;
1297 if (owner->keypar.use_key_pointers)
1298 memcpy(&index_tuple, key_in_buf, sizeof(char*));
1299
1300 /* Check out how many more identical keys are following */
1301 while (!identical_key_it.read())
1302 {
1303 if (Mrr_ordered_index_reader::compare_keys(owner, key_in_buf,
1304 identical_key_it.read_ptr1))
1305 break;
1306 last_identical_key_ptr= identical_key_it.read_ptr1;
1307 }
1308 identical_key_it.init(owner->key_buffer);
1309 res= owner->file->ha_index_read_map(owner->file->get_table()->record[0],
1310 index_tuple,
1311 owner->keypar.key_tuple_map,
1312 HA_READ_KEY_EXACT);
1313
1314 if (res)
1315 {
1316 /* Failed to find any matching records */
1317 move_to_next_key_value();
1318 return res;
1319 }
1320 owner->have_saved_rowid= FALSE;
1321 get_next_row= FALSE;
1322 return 0;
1323}
1324
1325
1326int Key_value_records_iterator::get_next(range_id_t *range_info)
1327{
1328 int res;
1329
1330 if (get_next_row)
1331 {
1332 if (owner->keypar.index_ranges_unique)
1333 {
1334 /* We're using a full unique key, no point to call index_next_same */
1335 return HA_ERR_END_OF_FILE;
1336 }
1337
1338 handler *h= owner->file;
1339 uchar *lookup_key;
1340 if (owner->keypar.use_key_pointers)
1341 memcpy(&lookup_key, identical_key_it.read_ptr1, sizeof(void*));
1342 else
1343 lookup_key= identical_key_it.read_ptr1;
1344
1345 if ((res= h->ha_index_next_same(h->get_table()->record[0],
1346 lookup_key,
1347 owner->keypar.key_tuple_length)))
1348 {
1349 /* It's either HA_ERR_END_OF_FILE or some other error */
1350 return res;
1351 }
1352 identical_key_it.init(owner->key_buffer);
1353 owner->have_saved_rowid= FALSE;
1354 get_next_row= FALSE;
1355 }
1356
1357 identical_key_it.read(); /* This gets us next range_id */
1358 memcpy(range_info, identical_key_it.read_ptr2, sizeof(range_id_t));
1359
1360 if (!last_identical_key_ptr ||
1361 (identical_key_it.read_ptr1 == last_identical_key_ptr))
1362 {
1363 /*
1364 We've reached the last of the identical keys that current record is a
1365 match for. Set get_next_row=TRUE so that we read the next index record
1366 on the next call to this function.
1367 */
1368 get_next_row= TRUE;
1369 }
1370 return 0;
1371}
1372
1373
1374void Key_value_records_iterator::move_to_next_key_value()
1375{
1376 while (!owner->key_buffer->read() &&
1377 (owner->key_buffer->read_ptr1 != last_identical_key_ptr)) {}
1378}
1379
1380
1381/**
1382 DS-MRR implementation: multi_range_read_next() function.
1383
1384 Calling convention is like multi_range_read_next() has.
1385*/
1386
1387int DsMrr_impl::dsmrr_next(range_id_t *range_info)
1388{
1389 int res;
1390 if (strategy_exhausted)
1391 return HA_ERR_END_OF_FILE;
1392
1393 while ((res= strategy->get_next(range_info)) == HA_ERR_END_OF_FILE)
1394 {
1395 if ((res= strategy->refill_buffer(FALSE)))
1396 break; /* EOF or error */
1397 }
1398 return res;
1399}
1400
1401
1402/**
1403 DS-MRR implementation: multi_range_read_info() function
1404*/
1405ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
1406 uint key_parts,
1407 uint *bufsz, uint *flags, Cost_estimate *cost)
1408{
1409 ha_rows res __attribute__((unused));
1410 uint def_flags= *flags;
1411 uint def_bufsz= *bufsz;
1412
1413 /* Get cost/flags/mem_usage of default MRR implementation */
1414 res= primary_file->handler::multi_range_read_info(keyno, n_ranges, rows,
1415 key_parts, &def_bufsz,
1416 &def_flags, cost);
1417 DBUG_ASSERT(!res);
1418
1419 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
1420 choose_mrr_impl(keyno, rows, flags, bufsz, cost))
1421 {
1422 /* Default implementation is choosen */
1423 DBUG_PRINT("info", ("Default MRR implementation choosen"));
1424 *flags= def_flags;
1425 *bufsz= def_bufsz;
1426 }
1427 else
1428 {
1429 /* *flags and *bufsz were set by choose_mrr_impl */
1430 DBUG_PRINT("info", ("DS-MRR implementation choosen"));
1431 }
1432 return 0;
1433}
1434
1435
1436/**
1437 DS-MRR Implementation: multi_range_read_info_const() function
1438*/
1439
1440ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
1441 void *seq_init_param, uint n_ranges,
1442 uint *bufsz, uint *flags, Cost_estimate *cost)
1443{
1444 ha_rows rows;
1445 uint def_flags= *flags;
1446 uint def_bufsz= *bufsz;
1447 /* Get cost/flags/mem_usage of default MRR implementation */
1448 rows= primary_file->handler::multi_range_read_info_const(keyno, seq,
1449 seq_init_param,
1450 n_ranges,
1451 &def_bufsz,
1452 &def_flags, cost);
1453 if (rows == HA_POS_ERROR)
1454 {
1455 /* Default implementation can't perform MRR scan => we can't either */
1456 return rows;
1457 }
1458
1459 /*
1460 If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
1461 use the default MRR implementation (we need it for UPDATE/DELETE).
1462 Otherwise, make a choice based on cost and @@optimizer_switch settings
1463 */
1464 if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
1465 choose_mrr_impl(keyno, rows, flags, bufsz, cost))
1466 {
1467 DBUG_PRINT("info", ("Default MRR implementation choosen"));
1468 *flags= def_flags;
1469 *bufsz= def_bufsz;
1470 }
1471 else
1472 {
1473 /* *flags and *bufsz were set by choose_mrr_impl */
1474 DBUG_PRINT("info", ("DS-MRR implementation choosen"));
1475 }
1476 return rows;
1477}
1478
1479
1480/**
1481 Check if key has partially-covered columns
1482
1483 We can't use DS-MRR to perform range scans when the ranges are over
1484 partially-covered keys, because we'll not have full key part values
1485 (we'll have their prefixes from the index) and will not be able to check
1486 if we've reached the end the range.
1487
1488 @param keyno Key to check
1489
1490 @todo
1491 Allow use of DS-MRR in cases where the index has partially-covered
1492 components but they are not used for scanning.
1493
1494 @retval TRUE Yes
1495 @retval FALSE No
1496*/
1497
1498bool key_uses_partial_cols(TABLE_SHARE *share, uint keyno)
1499{
1500 KEY_PART_INFO *kp= share->key_info[keyno].key_part;
1501 KEY_PART_INFO *kp_end= kp + share->key_info[keyno].user_defined_key_parts;
1502 for (; kp != kp_end; kp++)
1503 {
1504 if (!kp->field->part_of_key.is_set(keyno))
1505 return TRUE;
1506 }
1507 return FALSE;
1508}
1509
1510
1511/*
1512 Check if key/flags allow DS-MRR/CPK strategy to be used
1513
1514 @param thd
1515 @param keyno Index that will be used
1516 @param mrr_flags
1517
1518 @retval TRUE DS-MRR/CPK should be used
1519 @retval FALSE Otherwise
1520*/
1521
1522bool DsMrr_impl::check_cpk_scan(THD *thd, TABLE_SHARE *share, uint keyno,
1523 uint mrr_flags)
1524{
1525 return MY_TEST((mrr_flags & HA_MRR_SINGLE_POINT) &&
1526 keyno == share->primary_key &&
1527 primary_file->primary_key_is_clustered() &&
1528 optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS));
1529}
1530
1531
1532/*
1533 DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
1534
1535 Make the choice between using Default MRR implementation and DS-MRR.
1536 This function contains common functionality factored out of dsmrr_info()
1537 and dsmrr_info_const(). The function assumes that the default MRR
1538 implementation's applicability requirements are satisfied.
1539
1540 @param keyno Index number
1541 @param rows E(full rows to be retrieved)
1542 @param flags IN MRR flags provided by the MRR user
1543 OUT If DS-MRR is choosen, flags of DS-MRR implementation
1544 else the value is not modified
1545 @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation
1546 else the value is not modified
1547 @param cost IN Cost of default MRR implementation
1548 OUT If DS-MRR is choosen, cost of DS-MRR scan
1549 else the value is not modified
1550
1551 @retval TRUE Default MRR implementation should be used
1552 @retval FALSE DS-MRR implementation should be used
1553*/
1554
1555
1556bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
1557 uint *bufsz, Cost_estimate *cost)
1558{
1559 Cost_estimate dsmrr_cost;
1560 bool res;
1561 THD *thd= primary_file->get_table()->in_use;
1562 TABLE_SHARE *share= primary_file->get_table_share();
1563
1564 bool doing_cpk_scan= check_cpk_scan(thd, share, keyno, *flags);
1565 bool using_cpk= MY_TEST(keyno == share->primary_key &&
1566 primary_file->primary_key_is_clustered());
1567 *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS;
1568 if (!optimizer_flag(thd, OPTIMIZER_SWITCH_MRR) ||
1569 *flags & HA_MRR_INDEX_ONLY ||
1570 (using_cpk && !doing_cpk_scan) || key_uses_partial_cols(share, keyno))
1571 {
1572 /* Use the default implementation */
1573 *flags |= HA_MRR_USE_DEFAULT_IMPL;
1574 *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS;
1575 return TRUE;
1576 }
1577
1578 uint add_len= share->key_info[keyno].key_length + primary_file->ref_length;
1579 *bufsz -= add_len;
1580 if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
1581 return TRUE;
1582 *bufsz += add_len;
1583
1584 bool force_dsmrr;
1585 /*
1586 If mrr_cost_based flag is not set, then set cost of DS-MRR to be minimum of
1587 DS-MRR and Default implementations cost. This allows one to force use of
1588 DS-MRR whenever it is applicable without affecting other cost-based
1589 choices.
1590 */
1591 if ((force_dsmrr= !optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_COST_BASED)) &&
1592 dsmrr_cost.total_cost() > cost->total_cost())
1593 dsmrr_cost= *cost;
1594
1595 if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
1596 {
1597 *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */
1598 *flags &= ~HA_MRR_SORTED; /* We will return unordered output */
1599 *cost= dsmrr_cost;
1600 res= FALSE;
1601
1602
1603 if ((using_cpk && doing_cpk_scan) ||
1604 (optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS) &&
1605 *flags & HA_MRR_SINGLE_POINT))
1606 {
1607 *flags |= DSMRR_IMPL_SORT_KEYS;
1608 }
1609
1610 if (!(using_cpk && doing_cpk_scan) &&
1611 !(*flags & HA_MRR_INDEX_ONLY))
1612 {
1613 *flags |= DSMRR_IMPL_SORT_ROWIDS;
1614 }
1615 /*
1616 if ((*flags & HA_MRR_SINGLE_POINT) &&
1617 optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
1618 *flags |= HA_MRR_MATERIALIZED_KEYS;
1619 */
1620 }
1621 else
1622 {
1623 /* Use the default MRR implementation */
1624 res= TRUE;
1625 }
1626 return res;
1627}
1628
1629/*
1630 Take the flags we've returned previously and print one of
1631 - Key-ordered scan
1632 - Rowid-ordered scan
1633 - Key-ordered Rowid-ordered scan
1634*/
1635
1636int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size)
1637{
1638 const char *key_ordered= "Key-ordered scan";
1639 const char *rowid_ordered= "Rowid-ordered scan";
1640 const char *both_ordered= "Key-ordered Rowid-ordered scan";
1641 const char *used_str="";
1642 const uint BOTH_FLAGS= (DSMRR_IMPL_SORT_KEYS | DSMRR_IMPL_SORT_ROWIDS);
1643
1644 if (!(mrr_mode & HA_MRR_USE_DEFAULT_IMPL))
1645 {
1646 if ((mrr_mode & BOTH_FLAGS) == BOTH_FLAGS)
1647 used_str= both_ordered;
1648 else if (mrr_mode & DSMRR_IMPL_SORT_KEYS)
1649 used_str= key_ordered;
1650 else if (mrr_mode & DSMRR_IMPL_SORT_ROWIDS)
1651 used_str= rowid_ordered;
1652
1653 size_t used_str_len= strlen(used_str);
1654 size_t copy_len= MY_MIN(used_str_len, size);
1655 memcpy(str, used_str, copy_len);
1656 return (int)copy_len;
1657 }
1658 return 0;
1659}
1660
1661
1662static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost);
1663
1664
1665/**
1666 Get cost of DS-MRR scan
1667
1668 @param keynr Index to be used
1669 @param rows E(Number of rows to be scanned)
1670 @param flags Scan parameters (HA_MRR_* flags)
1671 @param buffer_size INOUT Buffer size
1672 @param cost OUT The cost
1673
1674 @retval FALSE OK
1675 @retval TRUE Error, DS-MRR cannot be used (the buffer is too small
1676 for even 1 rowid)
1677*/
1678
1679bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
1680 uint *buffer_size, Cost_estimate *cost)
1681{
1682 ulong max_buff_entries, elem_size;
1683 ha_rows rows_in_full_step;
1684 ha_rows rows_in_last_step;
1685 uint n_full_steps;
1686 double index_read_cost;
1687
1688 elem_size= primary_file->ref_length +
1689 sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
1690 max_buff_entries = *buffer_size / elem_size;
1691
1692 if (!max_buff_entries)
1693 return TRUE; /* Buffer has not enough space for even 1 rowid */
1694
1695 /* Number of iterations we'll make with full buffer */
1696 n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
1697
1698 /*
1699 Get numbers of rows we'll be processing in
1700 - non-last sweep, with full buffer
1701 - last iteration, with non-full buffer
1702 */
1703 rows_in_full_step= max_buff_entries;
1704 rows_in_last_step= rows % max_buff_entries;
1705
1706 /* Adjust buffer size if we expect to use only part of the buffer */
1707 if (n_full_steps)
1708 {
1709 get_sort_and_sweep_cost(table, rows_in_full_step, cost);
1710 cost->multiply(n_full_steps);
1711 }
1712 else
1713 {
1714 cost->reset();
1715 *buffer_size= (uint)MY_MAX(*buffer_size,
1716 (size_t)(1.2*rows_in_last_step) * elem_size +
1717 primary_file->ref_length + table->key_info[keynr].key_length);
1718 }
1719
1720 Cost_estimate last_step_cost;
1721 get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
1722 cost->add(&last_step_cost);
1723
1724 if (n_full_steps != 0)
1725 cost->mem_cost= *buffer_size;
1726 else
1727 cost->mem_cost= (double)rows_in_last_step * elem_size;
1728
1729 /* Total cost of all index accesses */
1730 index_read_cost= primary_file->keyread_time(keynr, 1, rows);
1731 cost->add_io(index_read_cost, 1 /* Random seeks */);
1732 return FALSE;
1733}
1734
1735
1736/*
1737 Get cost of one sort-and-sweep step
1738
1739 It consists of two parts:
1740 - sort an array of #nrows ROWIDs using qsort
1741 - read #nrows records from table in a sweep.
1742
1743 @param table Table being accessed
1744 @param nrows Number of rows to be sorted and retrieved
1745 @param cost OUT The cost of scan
1746*/
1747
1748static
1749void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
1750{
1751 if (nrows)
1752 {
1753 get_sweep_read_cost(table, nrows, FALSE, cost);
1754 /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
1755 double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
1756 if (cmp_op < 3)
1757 cmp_op= 3;
1758 cost->cpu_cost += cmp_op * log2(cmp_op);
1759 }
1760 else
1761 cost->reset();
1762}
1763
1764
1765/**
1766 Get cost of reading nrows table records in a "disk sweep"
1767
1768 A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
1769 for an ordered sequence of rowids.
1770
1771 We assume hard disk IO. The read is performed as follows:
1772
1773 1. The disk head is moved to the needed cylinder
1774 2. The controller waits for the plate to rotate
1775 3. The data is transferred
1776
1777 Time to do #3 is insignificant compared to #2+#1.
1778
1779 Time to move the disk head is proportional to head travel distance.
1780
1781 Time to wait for the plate to rotate depends on whether the disk head
1782 was moved or not.
1783
1784 If disk head wasn't moved, the wait time is proportional to distance
1785 between the previous block and the block we're reading.
1786
1787 If the head was moved, we don't know how much we'll need to wait for the
1788 plate to rotate. We assume the wait time to be a variate with a mean of
1789 0.5 of full rotation time.
1790
1791 Our cost units are "random disk seeks". The cost of random disk seek is
1792 actually not a constant, it depends one range of cylinders we're going
1793 to access. We make it constant by introducing a fuzzy concept of "typical
1794 datafile length" (it's fuzzy as it's hard to tell whether it should
1795 include index file, temp.tables etc). Then random seek cost is:
1796
1797 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
1798
1799 We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
1800
1801 @param table Table to be accessed
1802 @param nrows Number of rows to retrieve
1803 @param interrupted TRUE <=> Assume that the disk sweep will be
1804 interrupted by other disk IO. FALSE - otherwise.
1805 @param cost OUT The cost.
1806*/
1807
1808void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
1809 Cost_estimate *cost)
1810{
1811 DBUG_ENTER("get_sweep_read_cost");
1812
1813 cost->reset();
1814 if (table->file->primary_key_is_clustered())
1815 {
1816 cost->io_count= table->file->read_time(table->s->primary_key,
1817 (uint) nrows, nrows);
1818 }
1819 else
1820 {
1821 double n_blocks=
1822 ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
1823 double busy_blocks=
1824 n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
1825 if (busy_blocks < 1.0)
1826 busy_blocks= 1.0;
1827
1828 DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
1829 busy_blocks));
1830 cost->io_count= busy_blocks;
1831
1832 if (!interrupted)
1833 {
1834 /* Assume reading is done in one 'sweep' */
1835 cost->avg_io_cost= (DISK_SEEK_BASE_COST +
1836 DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
1837 }
1838 }
1839 DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
1840 DBUG_VOID_RETURN;
1841}
1842
1843
1844/* **************************************************************************
1845 * DS-MRR implementation ends
1846 ***************************************************************************/
1847
1848
1849