1 | /* Copyright (C) 2010, 2011 Monty Program Ab |
2 | |
3 | This program is free software; you can redistribute it and/or modify |
4 | it under the terms of the GNU General Public License as published by |
5 | the Free Software Foundation; version 2 of the License. |
6 | |
7 | This program is distributed in the hope that it will be useful, |
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | GNU General Public License for more details. |
11 | |
12 | You should have received a copy of the GNU General Public License |
13 | along with this program; if not, write to the Free Software |
14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
15 | |
16 | #include "mariadb.h" |
17 | #include "sql_parse.h" |
18 | #include <my_bit.h> |
19 | #include "sql_select.h" |
20 | #include "key.h" |
21 | |
22 | /**************************************************************************** |
23 | * Default MRR implementation (MRR to non-MRR converter) |
24 | ***************************************************************************/ |
25 | |
26 | /** |
27 | Get cost and other information about MRR scan over a known list of ranges |
28 | |
29 | Calculate estimated cost and other information about an MRR scan for given |
30 | sequence of ranges. |
31 | |
32 | @param keyno Index number |
33 | @param seq Range sequence to be traversed |
34 | @param seq_init_param First parameter for seq->init() |
35 | @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller |
36 | can't efficiently determine it |
37 | @param bufsz INOUT IN: Size of the buffer available for use |
38 | OUT: Size of the buffer that is expected to be actually |
39 | used, or 0 if buffer is not needed. |
40 | @param flags INOUT A combination of HA_MRR_* flags |
41 | @param cost OUT Estimated cost of MRR access |
42 | |
43 | @note |
44 | This method (or an overriding one in a derived class) must check for |
45 | thd->killed and return HA_POS_ERROR if it is not zero. This is required |
46 | for a user to be able to interrupt the calculation by killing the |
47 | connection/query. |
48 | |
49 | @retval |
50 | HA_POS_ERROR Error or the engine is unable to perform the requested |
51 | scan. Values of OUT parameters are undefined. |
52 | @retval |
53 | other OK, *cost contains cost of the scan, *bufsz and *flags |
54 | contain scan parameters. |
55 | */ |
56 | |
57 | ha_rows |
58 | handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, |
59 | void *seq_init_param, uint n_ranges_arg, |
60 | uint *bufsz, uint *flags, Cost_estimate *cost) |
61 | { |
62 | KEY_MULTI_RANGE range; |
63 | range_seq_t seq_it; |
64 | ha_rows rows, total_rows= 0; |
65 | uint n_ranges=0; |
66 | THD *thd= table->in_use; |
67 | |
68 | /* Default MRR implementation doesn't need buffer */ |
69 | *bufsz= 0; |
70 | |
71 | seq_it= seq->init(seq_init_param, n_ranges, *flags); |
72 | while (!seq->next(seq_it, &range)) |
73 | { |
74 | if (unlikely(thd->killed != 0)) |
75 | return HA_POS_ERROR; |
76 | |
77 | n_ranges++; |
78 | key_range *min_endp, *max_endp; |
79 | if (range.range_flag & GEOM_FLAG) |
80 | { |
81 | /* In this case tmp_min_flag contains the handler-read-function */ |
82 | range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG); |
83 | min_endp= &range.start_key; |
84 | max_endp= NULL; |
85 | } |
86 | else |
87 | { |
88 | min_endp= range.start_key.length? &range.start_key : NULL; |
89 | max_endp= range.end_key.length? &range.end_key : NULL; |
90 | } |
91 | if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) |
92 | rows= 1; /* there can be at most one row */ |
93 | else |
94 | { |
95 | if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, |
96 | max_endp))) |
97 | { |
98 | /* Can't scan one range => can't do MRR scan at all */ |
99 | total_rows= HA_POS_ERROR; |
100 | break; |
101 | } |
102 | } |
103 | total_rows += rows; |
104 | } |
105 | |
106 | if (total_rows != HA_POS_ERROR) |
107 | { |
108 | /* The following calculation is the same as in multi_range_read_info(): */ |
109 | *flags |= HA_MRR_USE_DEFAULT_IMPL; |
110 | cost->reset(); |
111 | cost->avg_io_cost= 1; /* assume random seeks */ |
112 | if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2) |
113 | cost->io_count= keyread_time(keyno, n_ranges, (uint)total_rows); |
114 | else |
115 | cost->io_count= read_time(keyno, n_ranges, total_rows); |
116 | cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; |
117 | } |
118 | return total_rows; |
119 | } |
120 | |
121 | |
122 | /** |
123 | Get cost and other information about MRR scan over some sequence of ranges |
124 | |
125 | Calculate estimated cost and other information about an MRR scan for some |
126 | sequence of ranges. |
127 | |
128 | The ranges themselves will be known only at execution phase. When this |
129 | function is called we only know number of ranges and a (rough) E(#records) |
130 | within those ranges. |
131 | |
132 | Currently this function is only called for "n-keypart singlepoint" ranges, |
133 | i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN" |
134 | |
135 | The flags parameter is a combination of those flags: HA_MRR_SORTED, |
136 | HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS. |
137 | |
138 | @param keyno Index number |
139 | @param n_ranges Estimated number of ranges (i.e. intervals) in the |
140 | range sequence. |
141 | @param n_rows Estimated total number of records contained within all |
142 | of the ranges |
143 | @param bufsz INOUT IN: Size of the buffer available for use |
144 | OUT: Size of the buffer that will be actually used, or |
145 | 0 if buffer is not needed. |
146 | @param flags INOUT A combination of HA_MRR_* flags |
147 | @param cost OUT Estimated cost of MRR access |
148 | |
149 | @retval |
150 | 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan |
151 | parameters. |
152 | @retval |
153 | other Error or can't perform the requested scan |
154 | */ |
155 | |
156 | ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, |
157 | uint key_parts, uint *bufsz, |
158 | uint *flags, Cost_estimate *cost) |
159 | { |
160 | /* |
161 | Currently we expect this function to be called only in preparation of scan |
162 | with HA_MRR_SINGLE_POINT property. |
163 | */ |
164 | DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT); |
165 | |
166 | *bufsz= 0; /* Default implementation doesn't need a buffer */ |
167 | *flags |= HA_MRR_USE_DEFAULT_IMPL; |
168 | |
169 | cost->reset(); |
170 | cost->avg_io_cost= 1; /* assume random seeks */ |
171 | |
172 | /* Produce the same cost as non-MRR code does */ |
173 | if (*flags & HA_MRR_INDEX_ONLY) |
174 | cost->io_count= keyread_time(keyno, n_ranges, n_rows); |
175 | else |
176 | cost->io_count= read_time(keyno, n_ranges, n_rows); |
177 | return 0; |
178 | } |
179 | |
180 | |
181 | /** |
182 | Initialize the MRR scan |
183 | |
184 | Initialize the MRR scan. This function may do heavyweight scan |
185 | initialization like row prefetching/sorting/etc (NOTE: but better not do |
186 | it here as we may not need it, e.g. if we never satisfy WHERE clause on |
187 | previous tables. For many implementations it would be natural to do such |
188 | initializations in the first multi_read_range_next() call) |
189 | |
190 | mode is a combination of the following flags: HA_MRR_SORTED, |
191 | HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION |
192 | |
193 | @param seq Range sequence to be traversed |
194 | @param seq_init_param First parameter for seq->init() |
195 | @param n_ranges Number of ranges in the sequence |
196 | @param mode Flags, see the description section for the details |
197 | @param buf INOUT: memory buffer to be used |
198 | |
199 | @note |
200 | One must have called index_init() before calling this function. Several |
201 | multi_range_read_init() calls may be made in course of one query. |
202 | |
203 | Buffer memory management is done according to the following scenario: |
204 | The caller allocates the buffer and provides it to the callee by filling |
205 | the members of HANDLER_BUFFER structure. |
206 | The callee consumes all or some fraction of the provided buffer space, and |
207 | sets the HANDLER_BUFFER members accordingly. |
208 | The callee may use the buffer memory until the next multi_range_read_init() |
209 | call is made, all records have been read, or until index_end() call is |
210 | made, whichever comes first. |
211 | |
212 | @retval 0 OK |
213 | @retval 1 Error |
214 | */ |
215 | |
216 | int |
217 | handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param, |
218 | uint n_ranges, uint mode, HANDLER_BUFFER *buf) |
219 | { |
220 | DBUG_ENTER("handler::multi_range_read_init" ); |
221 | mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); |
222 | mrr_funcs= *seq_funcs; |
223 | mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED); |
224 | mrr_have_range= FALSE; |
225 | DBUG_RETURN(0); |
226 | } |
227 | |
228 | /** |
229 | Get next record in MRR scan |
230 | |
231 | Default MRR implementation: read the next record |
232 | |
233 | @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect |
234 | Otherwise, the opaque value associated with the range |
235 | that contains the returned record. |
236 | |
237 | @retval 0 OK |
238 | @retval other Error code |
239 | */ |
240 | |
241 | int handler::multi_range_read_next(range_id_t *range_info) |
242 | { |
243 | int result= HA_ERR_END_OF_FILE; |
244 | bool range_res; |
245 | DBUG_ENTER("handler::multi_range_read_next" ); |
246 | |
247 | if (!mrr_have_range) |
248 | { |
249 | mrr_have_range= TRUE; |
250 | goto start; |
251 | } |
252 | |
253 | do |
254 | { |
255 | /* Save a call if there can be only one row in range. */ |
256 | if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE)) |
257 | { |
258 | result= read_range_next(); |
259 | /* On success or non-EOF errors jump to the end. */ |
260 | if (result != HA_ERR_END_OF_FILE) |
261 | break; |
262 | } |
263 | else |
264 | { |
265 | if (ha_was_semi_consistent_read()) |
266 | { |
267 | /* |
268 | The following assignment is redundant, but for extra safety and to |
269 | remove the compiler warning: |
270 | */ |
271 | range_res= FALSE; |
272 | goto scan_it_again; |
273 | } |
274 | /* |
275 | We need to set this for the last range only, but checking this |
276 | condition is more expensive than just setting the result code. |
277 | */ |
278 | result= HA_ERR_END_OF_FILE; |
279 | } |
280 | |
281 | start: |
282 | /* Try the next range(s) until one matches a record. */ |
283 | while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range))) |
284 | { |
285 | scan_it_again: |
286 | result= read_range_first(mrr_cur_range.start_key.keypart_map ? |
287 | &mrr_cur_range.start_key : 0, |
288 | mrr_cur_range.end_key.keypart_map ? |
289 | &mrr_cur_range.end_key : 0, |
290 | MY_TEST(mrr_cur_range.range_flag & EQ_RANGE), |
291 | mrr_is_output_sorted); |
292 | if (result != HA_ERR_END_OF_FILE) |
293 | break; |
294 | } |
295 | } |
296 | while ((result == HA_ERR_END_OF_FILE) && !range_res); |
297 | |
298 | *range_info= mrr_cur_range.ptr; |
299 | DBUG_PRINT("exit" ,("handler::multi_range_read_next result %d" , result)); |
300 | DBUG_RETURN(result); |
301 | } |
302 | |
303 | /**************************************************************************** |
304 | * Mrr_*_reader classes (building blocks for DS-MRR) |
305 | ***************************************************************************/ |
306 | |
307 | int Mrr_simple_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, |
308 | void *seq_init_param, uint n_ranges, |
309 | uint mode, Key_parameters *key_par_arg, |
310 | Lifo_buffer *key_buffer_arg, |
311 | Buffer_manager *buf_manager_arg) |
312 | { |
313 | HANDLER_BUFFER no_buffer = {NULL, NULL, NULL}; |
314 | file= h_arg; |
315 | return file->handler::multi_range_read_init(seq_funcs, seq_init_param, |
316 | n_ranges, mode, &no_buffer); |
317 | } |
318 | |
319 | |
320 | int Mrr_simple_index_reader::get_next(range_id_t *range_info) |
321 | { |
322 | int res; |
323 | while (!(res= file->handler::multi_range_read_next(range_info))) |
324 | { |
325 | KEY_MULTI_RANGE *curr_range= &file->handler::mrr_cur_range; |
326 | if (!file->mrr_funcs.skip_index_tuple || |
327 | !file->mrr_funcs.skip_index_tuple(file->mrr_iter, curr_range->ptr)) |
328 | break; |
329 | } |
330 | if (res && res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND) |
331 | file->print_error(res, MYF(0)); // Fatal error |
332 | return res; |
333 | } |
334 | |
335 | |
336 | /** |
337 | @brief Get next index record |
338 | |
339 | @param range_info OUT identifier of range that the returned record belongs to |
340 | |
341 | @note |
342 | We actually iterate over nested sequences: |
343 | - an ordered sequence of groups of identical keys |
344 | - each key group has key value, which has multiple matching records |
345 | - thus, each record matches all members of the key group |
346 | |
347 | @retval 0 OK, next record was successfully read |
348 | @retval HA_ERR_END_OF_FILE End of records |
349 | @retval Other Some other error; Error is printed |
350 | */ |
351 | |
352 | int Mrr_ordered_index_reader::get_next(range_id_t *range_info) |
353 | { |
354 | int res; |
355 | DBUG_ENTER("Mrr_ordered_index_reader::get_next" ); |
356 | |
357 | for(;;) |
358 | { |
359 | if (!scanning_key_val_iter) |
360 | { |
361 | while ((res= kv_it.init(this))) |
362 | { |
363 | if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE)) |
364 | DBUG_RETURN(res); /* Some fatal error */ |
365 | |
366 | if (key_buffer->is_empty()) |
367 | { |
368 | DBUG_RETURN(HA_ERR_END_OF_FILE); |
369 | } |
370 | } |
371 | scanning_key_val_iter= TRUE; |
372 | } |
373 | |
374 | if ((res= kv_it.get_next(range_info))) |
375 | { |
376 | scanning_key_val_iter= FALSE; |
377 | if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE)) |
378 | DBUG_RETURN(res); |
379 | kv_it.move_to_next_key_value(); |
380 | continue; |
381 | } |
382 | if (!skip_index_tuple(*range_info) && |
383 | !skip_record(*range_info, NULL)) |
384 | { |
385 | break; |
386 | } |
387 | /* Go get another (record, range_id) combination */ |
388 | } /* while */ |
389 | |
390 | DBUG_RETURN(0); |
391 | } |
392 | |
393 | |
394 | /* |
395 | Supply index reader with the O(1)space it needs for scan interrupt/restore |
396 | operation |
397 | */ |
398 | |
399 | bool Mrr_ordered_index_reader::set_interruption_temp_buffer(uint rowid_length, |
400 | uint key_len, |
401 | uint saved_pk_len, |
402 | uchar **space_start, |
403 | uchar *space_end) |
404 | { |
405 | if (space_end - *space_start <= (ptrdiff_t)(rowid_length + key_len + saved_pk_len)) |
406 | return TRUE; |
407 | support_scan_interruptions= TRUE; |
408 | |
409 | saved_rowid= *space_start; |
410 | *space_start += rowid_length; |
411 | |
412 | if (saved_pk_len) |
413 | { |
414 | saved_primary_key= *space_start; |
415 | *space_start += saved_pk_len; |
416 | } |
417 | else |
418 | saved_primary_key= NULL; |
419 | |
420 | saved_key_tuple= *space_start; |
421 | *space_start += key_len; |
422 | |
423 | have_saved_rowid= FALSE; |
424 | read_was_interrupted= FALSE; |
425 | return FALSE; |
426 | } |
427 | |
428 | void Mrr_ordered_index_reader::set_no_interruption_temp_buffer() |
429 | { |
430 | support_scan_interruptions= FALSE; |
431 | saved_key_tuple= saved_rowid= saved_primary_key= NULL; /* safety */ |
432 | have_saved_rowid= FALSE; |
433 | read_was_interrupted= FALSE; |
434 | } |
435 | |
436 | void Mrr_ordered_index_reader::interrupt_read() |
437 | { |
438 | DBUG_ASSERT(support_scan_interruptions); |
439 | TABLE *table= file->get_table(); |
440 | KEY *used_index= &table->key_info[file->active_index]; |
441 | /* Save the current key value */ |
442 | key_copy(saved_key_tuple, table->record[0], |
443 | used_index, used_index->key_length); |
444 | |
445 | if (saved_primary_key) |
446 | { |
447 | key_copy(saved_primary_key, table->record[0], |
448 | &table->key_info[table->s->primary_key], |
449 | table->key_info[table->s->primary_key].key_length); |
450 | } |
451 | read_was_interrupted= TRUE; |
452 | |
453 | /* Save the last rowid */ |
454 | memcpy(saved_rowid, file->ref, file->ref_length); |
455 | have_saved_rowid= TRUE; |
456 | } |
457 | |
458 | void Mrr_ordered_index_reader::position() |
459 | { |
460 | if (have_saved_rowid) |
461 | memcpy(file->ref, saved_rowid, file->ref_length); |
462 | else |
463 | Mrr_index_reader::position(); |
464 | } |
465 | |
466 | void Mrr_ordered_index_reader::resume_read() |
467 | { |
468 | TABLE *table= file->get_table(); |
469 | |
470 | if (!read_was_interrupted) |
471 | return; |
472 | |
473 | KEY *used_index= &table->key_info[file->active_index]; |
474 | key_restore(table->record[0], saved_key_tuple, |
475 | used_index, used_index->key_length); |
476 | if (saved_primary_key) |
477 | { |
478 | key_restore(table->record[0], saved_primary_key, |
479 | &table->key_info[table->s->primary_key], |
480 | table->key_info[table->s->primary_key].key_length); |
481 | } |
482 | } |
483 | |
484 | |
485 | /** |
486 | Fill the buffer with (lookup_tuple, range_id) pairs and sort |
487 | |
488 | @return |
489 | 0 OK, the buffer is non-empty and sorted |
490 | HA_ERR_END_OF_FILE Source exhausted, the buffer is empty. |
491 | */ |
492 | |
493 | int Mrr_ordered_index_reader::refill_buffer(bool initial) |
494 | { |
495 | KEY_MULTI_RANGE cur_range; |
496 | DBUG_ENTER("Mrr_ordered_index_reader::refill_buffer" ); |
497 | |
498 | DBUG_ASSERT(key_buffer->is_empty()); |
499 | |
500 | if (source_exhausted) |
501 | DBUG_RETURN(HA_ERR_END_OF_FILE); |
502 | |
503 | buf_manager->reset_buffer_sizes(buf_manager->arg); |
504 | key_buffer->reset(); |
505 | key_buffer->setup_writing(keypar.key_size_in_keybuf, |
506 | is_mrr_assoc? sizeof(range_id_t) : 0); |
507 | |
508 | while (key_buffer->can_write() && |
509 | !(source_exhausted= mrr_funcs.next(mrr_iter, &cur_range))) |
510 | { |
511 | DBUG_ASSERT(cur_range.range_flag & EQ_RANGE); |
512 | |
513 | /* Put key, or {key, range_id} pair into the buffer */ |
514 | key_buffer->write_ptr1= keypar.use_key_pointers ? |
515 | (uchar*)&cur_range.start_key.key : |
516 | (uchar*)cur_range.start_key.key; |
517 | key_buffer->write_ptr2= (uchar*)&cur_range.ptr; |
518 | key_buffer->write(); |
519 | } |
520 | |
521 | /* Force get_next() to start with kv_it.init() call: */ |
522 | scanning_key_val_iter= FALSE; |
523 | |
524 | if (source_exhausted && key_buffer->is_empty()) |
525 | DBUG_RETURN(HA_ERR_END_OF_FILE); |
526 | |
527 | if (!initial) |
528 | { |
529 | /* This is a non-initial buffer fill and we've got a non-empty buffer */ |
530 | THD *thd= current_thd; |
531 | status_var_increment(thd->status_var.ha_mrr_key_refills_count); |
532 | } |
533 | |
534 | key_buffer->sort((key_buffer->type() == Lifo_buffer::FORWARD)? |
535 | (qsort2_cmp)Mrr_ordered_index_reader::compare_keys_reverse : |
536 | (qsort2_cmp)Mrr_ordered_index_reader::compare_keys, |
537 | this); |
538 | DBUG_RETURN(0); |
539 | } |
540 | |
541 | |
542 | int Mrr_ordered_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, |
543 | void *seq_init_param, uint n_ranges, |
544 | uint mode, Key_parameters *key_par_arg, |
545 | Lifo_buffer *key_buffer_arg, |
546 | Buffer_manager *buf_manager_arg) |
547 | { |
548 | file= h_arg; |
549 | key_buffer= key_buffer_arg; |
550 | buf_manager= buf_manager_arg; |
551 | keypar= *key_par_arg; |
552 | |
553 | KEY *key_info= &file->get_table()->key_info[file->active_index]; |
554 | keypar.index_ranges_unique= MY_TEST(key_info->flags & HA_NOSAME && |
555 | key_info->user_defined_key_parts == |
556 | my_count_bits(keypar.key_tuple_map)); |
557 | |
558 | mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); |
559 | is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION); |
560 | mrr_funcs= *seq_funcs; |
561 | source_exhausted= FALSE; |
562 | read_was_interrupted= false; |
563 | have_saved_rowid= FALSE; |
564 | return 0; |
565 | } |
566 | |
567 | |
568 | static int rowid_cmp_reverse(void *file, uchar *a, uchar *b) |
569 | { |
570 | return - ((handler*)file)->cmp_ref(a, b); |
571 | } |
572 | |
573 | |
574 | int Mrr_ordered_rndpos_reader::init(handler *h_arg, |
575 | Mrr_index_reader *index_reader_arg, |
576 | uint mode, |
577 | Lifo_buffer *buf) |
578 | { |
579 | file= h_arg; |
580 | index_reader= index_reader_arg; |
581 | rowid_buffer= buf; |
582 | is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION); |
583 | index_reader_exhausted= FALSE; |
584 | index_reader_needs_refill= TRUE; |
585 | return 0; |
586 | } |
587 | |
588 | |
589 | /** |
590 | DS-MRR: Fill and sort the rowid buffer |
591 | |
592 | Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into |
593 | buffer. When the buffer is full or scan is completed, sort the buffer by |
594 | rowid and return. |
595 | |
596 | When this function returns, either rowid buffer is not empty, or the source |
597 | of lookup keys (i.e. ranges) is exhaused. |
598 | |
599 | @retval 0 OK, the next portion of rowids is in the buffer, |
600 | properly ordered |
601 | @retval other Error |
602 | */ |
603 | |
604 | int Mrr_ordered_rndpos_reader::refill_buffer(bool initial) |
605 | { |
606 | int res; |
607 | bool first_call= initial; |
608 | DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_buffer" ); |
609 | |
610 | if (index_reader_exhausted) |
611 | DBUG_RETURN(HA_ERR_END_OF_FILE); |
612 | |
613 | while (initial || index_reader_needs_refill || |
614 | (res= refill_from_index_reader()) == HA_ERR_END_OF_FILE) |
615 | { |
616 | if ((res= index_reader->refill_buffer(initial))) |
617 | { |
618 | if (res == HA_ERR_END_OF_FILE) |
619 | index_reader_exhausted= TRUE; |
620 | break; |
621 | } |
622 | initial= FALSE; |
623 | index_reader_needs_refill= FALSE; |
624 | } |
625 | |
626 | if (!first_call && !index_reader_exhausted) |
627 | { |
628 | /* Ok, this was a successful buffer refill operation */ |
629 | THD *thd= current_thd; |
630 | status_var_increment(thd->status_var.ha_mrr_rowid_refills_count); |
631 | } |
632 | |
633 | DBUG_RETURN(res); |
634 | } |
635 | |
636 | |
637 | void Mrr_index_reader::position() |
638 | { |
639 | file->position(file->get_table()->record[0]); |
640 | } |
641 | |
642 | |
643 | /* |
644 | @brief Try to refill the rowid buffer without calling |
645 | index_reader->refill_buffer(). |
646 | */ |
647 | |
648 | int Mrr_ordered_rndpos_reader::refill_from_index_reader() |
649 | { |
650 | range_id_t range_info; |
651 | int res; |
652 | DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_from_index_reader" ); |
653 | |
654 | DBUG_ASSERT(rowid_buffer->is_empty()); |
655 | index_rowid= index_reader->get_rowid_ptr(); |
656 | rowid_buffer->reset(); |
657 | rowid_buffer->setup_writing(file->ref_length, |
658 | is_mrr_assoc? sizeof(range_id_t) : 0); |
659 | |
660 | last_identical_rowid= NULL; |
661 | |
662 | index_reader->resume_read(); |
663 | while (rowid_buffer->can_write()) |
664 | { |
665 | res= index_reader->get_next(&range_info); |
666 | |
667 | if (res) |
668 | { |
669 | if (res != HA_ERR_END_OF_FILE) |
670 | DBUG_RETURN(res); |
671 | index_reader_needs_refill=TRUE; |
672 | break; |
673 | } |
674 | |
675 | index_reader->position(); |
676 | |
677 | /* Put rowid, or {rowid, range_id} pair into the buffer */ |
678 | rowid_buffer->write_ptr1= index_rowid; |
679 | rowid_buffer->write_ptr2= (uchar*)&range_info; |
680 | rowid_buffer->write(); |
681 | } |
682 | |
683 | /* |
684 | When index_reader_needs_refill=TRUE, this means we've got all of index |
685 | tuples for lookups keys that index_reader had. We are not in the middle |
686 | of an index read, so there is no need to call interrupt_read. |
687 | |
688 | Actually, we must not call interrupt_read(), because it could be that we |
689 | haven't read a single row (because all index lookups returned |
690 | HA_ERR_KEY_NOT_FOUND). In this case, interrupt_read() will cause [harmless] |
691 | valgrind warnings when trying to save garbage from table->record[0]. |
692 | */ |
693 | if (!index_reader_needs_refill) |
694 | index_reader->interrupt_read(); |
695 | /* Sort the buffer contents by rowid */ |
696 | rowid_buffer->sort((qsort2_cmp)rowid_cmp_reverse, (void*)file); |
697 | |
698 | rowid_buffer->setup_reading(file->ref_length, |
699 | is_mrr_assoc ? sizeof(range_id_t) : 0); |
700 | DBUG_RETURN(rowid_buffer->is_empty()? HA_ERR_END_OF_FILE : 0); |
701 | } |
702 | |
703 | |
704 | /* |
705 | Get the next {record, range_id} using ordered array of rowid+range_id pairs |
706 | |
707 | @note |
708 | Since we have sorted rowids, we try not to make multiple rnd_pos() calls |
709 | with the same rowid value. |
710 | */ |
711 | |
712 | int Mrr_ordered_rndpos_reader::get_next(range_id_t *range_info) |
713 | { |
714 | int res; |
715 | |
716 | /* |
717 | First, check if rowid buffer has elements with the same rowid value as |
718 | the previous. |
719 | */ |
720 | while (last_identical_rowid) |
721 | { |
722 | /* |
723 | Current record (the one we've returned in previous call) was obtained |
724 | from a rowid that matched multiple range_ids. Return this record again, |
725 | with next matching range_id. |
726 | */ |
727 | (void)rowid_buffer->read(); |
728 | |
729 | if (rowid_buffer->read_ptr1 == last_identical_rowid) |
730 | last_identical_rowid= NULL; /* reached the last of identical rowids */ |
731 | |
732 | if (!is_mrr_assoc) |
733 | return 0; |
734 | |
735 | memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t)); |
736 | if (!index_reader->skip_record(*range_info, rowid_buffer->read_ptr1)) |
737 | return 0; |
738 | } |
739 | |
740 | /* |
741 | Ok, last_identical_rowid==NULL, it's time to read next different rowid |
742 | value and get record for it. |
743 | */ |
744 | for(;;) |
745 | { |
746 | /* Return eof if there are no rowids in the buffer after re-fill attempt */ |
747 | if (rowid_buffer->read()) |
748 | return HA_ERR_END_OF_FILE; |
749 | |
750 | if (is_mrr_assoc) |
751 | { |
752 | memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t)); |
753 | if (index_reader->skip_record(*range_info, rowid_buffer->read_ptr1)) |
754 | continue; |
755 | } |
756 | |
757 | res= file->ha_rnd_pos(file->get_table()->record[0], |
758 | rowid_buffer->read_ptr1); |
759 | |
760 | if (res) |
761 | return res; /* Some fatal error */ |
762 | |
763 | break; /* Got another record */ |
764 | } |
765 | |
766 | /* |
767 | Check if subsequent buffer elements have the same rowid value as this |
768 | one. If yes, remember this fact so that we don't make any more rnd_pos() |
769 | calls with this value. |
770 | |
771 | Note: this implies that SQL layer doesn't touch table->record[0] |
772 | between calls. |
773 | */ |
774 | Lifo_buffer_iterator it; |
775 | it.init(rowid_buffer); |
776 | while (!it.read()) |
777 | { |
778 | if (file->cmp_ref(it.read_ptr1, rowid_buffer->read_ptr1)) |
779 | break; |
780 | last_identical_rowid= it.read_ptr1; |
781 | } |
782 | return 0; |
783 | } |
784 | |
785 | |
786 | /**************************************************************************** |
787 | * Top-level DS-MRR implementation functions (the ones called by storage engine) |
788 | ***************************************************************************/ |
789 | |
790 | /** |
791 | DS-MRR: Initialize and start MRR scan |
792 | |
793 | Initialize and start the MRR scan. Depending on the mode parameter, this |
794 | may use default or DS-MRR implementation. |
795 | |
796 | @param h_arg Table handler to be used |
797 | @param key Index to be used |
798 | @param seq_funcs Interval sequence enumeration functions |
799 | @param seq_init_param Interval sequence enumeration parameter |
800 | @param n_ranges Number of ranges in the sequence. |
801 | @param mode HA_MRR_* modes to use |
802 | @param buf INOUT Buffer to use |
803 | |
804 | @retval 0 Ok, Scan started. |
805 | @retval other Error |
806 | */ |
807 | |
808 | int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, |
809 | void *seq_init_param, uint n_ranges, uint mode, |
810 | HANDLER_BUFFER *buf) |
811 | { |
812 | THD *thd= h_arg->get_table()->in_use; |
813 | int res; |
814 | Key_parameters keypar; |
815 | uint UNINIT_VAR(key_buff_elem_size); /* set/used when do_sort_keys==TRUE */ |
816 | handler *h_idx; |
817 | Mrr_ordered_rndpos_reader *disk_strategy= NULL; |
818 | bool do_sort_keys= FALSE; |
819 | DBUG_ENTER("DsMrr_impl::dsmrr_init" ); |
820 | /* |
821 | index_merge may invoke a scan on an object for which dsmrr_info[_const] |
822 | has not been called, so set the owner handler here as well. |
823 | */ |
824 | primary_file= h_arg; |
825 | is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION); |
826 | |
827 | strategy_exhausted= FALSE; |
828 | |
829 | /* By default, have do-nothing buffer manager */ |
830 | buf_manager.arg= this; |
831 | buf_manager.reset_buffer_sizes= do_nothing; |
832 | buf_manager.redistribute_buffer_space= do_nothing; |
833 | |
834 | if (mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) |
835 | goto use_default_impl; |
836 | |
837 | /* |
838 | Determine whether we'll need to do key sorting and/or rnd_pos() scan |
839 | */ |
840 | index_strategy= NULL; |
841 | if ((mode & HA_MRR_SINGLE_POINT) && |
842 | optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)) |
843 | { |
844 | do_sort_keys= TRUE; |
845 | index_strategy= &reader_factory.ordered_index_reader; |
846 | } |
847 | else |
848 | index_strategy= &reader_factory.simple_index_reader; |
849 | |
850 | strategy= index_strategy; |
851 | /* |
852 | We don't need a rowid-to-rndpos step if |
853 | - We're doing a scan on clustered primary key |
854 | - [In the future] We're doing an index_only read |
855 | */ |
856 | DBUG_ASSERT(primary_file->inited == handler::INDEX || |
857 | (primary_file->inited == handler::RND && |
858 | secondary_file && |
859 | secondary_file->inited == handler::INDEX)); |
860 | |
861 | h_idx= (primary_file->inited == handler::INDEX)? primary_file: secondary_file; |
862 | keyno= h_idx->active_index; |
863 | |
864 | if (!(keyno == table->s->primary_key && h_idx->primary_key_is_clustered())) |
865 | { |
866 | strategy= disk_strategy= &reader_factory.ordered_rndpos_reader; |
867 | } |
868 | |
869 | full_buf= buf->buffer; |
870 | full_buf_end= buf->buffer_end; |
871 | |
872 | if (do_sort_keys) |
873 | { |
874 | /* Pre-calculate some parameters of key sorting */ |
875 | keypar.use_key_pointers= MY_TEST(mode & HA_MRR_MATERIALIZED_KEYS); |
876 | seq_funcs->get_key_info(seq_init_param, &keypar.key_tuple_length, |
877 | &keypar.key_tuple_map); |
878 | keypar.key_size_in_keybuf= keypar.use_key_pointers? |
879 | sizeof(char*) : keypar.key_tuple_length; |
880 | key_buff_elem_size= keypar.key_size_in_keybuf + (int)is_mrr_assoc * sizeof(void*); |
881 | |
882 | /* Ordered index reader needs some space to store an index tuple */ |
883 | if (strategy != index_strategy) |
884 | { |
885 | uint saved_pk_length=0; |
886 | if (h_idx->primary_key_is_clustered()) |
887 | { |
888 | uint pk= h_idx->get_table()->s->primary_key; |
889 | if (pk != MAX_KEY) |
890 | saved_pk_length= h_idx->get_table()->key_info[pk].key_length; |
891 | } |
892 | |
893 | KEY *used_index= &h_idx->get_table()->key_info[h_idx->active_index]; |
894 | if (reader_factory.ordered_index_reader. |
895 | set_interruption_temp_buffer(primary_file->ref_length, |
896 | used_index->key_length, |
897 | saved_pk_length, |
898 | &full_buf, full_buf_end)) |
899 | goto use_default_impl; |
900 | } |
901 | else |
902 | reader_factory.ordered_index_reader.set_no_interruption_temp_buffer(); |
903 | } |
904 | |
905 | if (strategy == index_strategy) |
906 | { |
907 | /* |
908 | Index strategy alone handles the record retrieval. Give all buffer space |
909 | to it. Key buffer should have forward orientation so we can return the |
910 | end of it. |
911 | */ |
912 | key_buffer= &forward_key_buf; |
913 | key_buffer->set_buffer_space(full_buf, full_buf_end); |
914 | |
915 | /* Safety: specify that rowid buffer has zero size: */ |
916 | rowid_buffer.set_buffer_space(full_buf_end, full_buf_end); |
917 | |
918 | if (do_sort_keys && !key_buffer->have_space_for(key_buff_elem_size)) |
919 | goto use_default_impl; |
920 | |
921 | if ((res= index_strategy->init(primary_file, seq_funcs, seq_init_param, n_ranges, |
922 | mode, &keypar, key_buffer, &buf_manager))) |
923 | goto error; |
924 | } |
925 | else |
926 | { |
927 | /* We'll have both index and rndpos strategies working together */ |
928 | if (do_sort_keys) |
929 | { |
930 | /* Both strategies will need buffer space, share the buffer */ |
931 | if (setup_buffer_sharing(keypar.key_size_in_keybuf, keypar.key_tuple_map)) |
932 | goto use_default_impl; |
933 | |
934 | buf_manager.reset_buffer_sizes= reset_buffer_sizes; |
935 | buf_manager.redistribute_buffer_space= redistribute_buffer_space; |
936 | } |
937 | else |
938 | { |
939 | /* index strategy doesn't need buffer, give all space to rowids*/ |
940 | rowid_buffer.set_buffer_space(full_buf, full_buf_end); |
941 | if (!rowid_buffer.have_space_for(primary_file->ref_length + |
942 | (int)is_mrr_assoc * sizeof(range_id_t))) |
943 | goto use_default_impl; |
944 | } |
945 | |
946 | if ((res= setup_two_handlers())) |
947 | goto error; |
948 | |
949 | if ((res= index_strategy->init(secondary_file, seq_funcs, seq_init_param, |
950 | n_ranges, mode, &keypar, key_buffer, |
951 | &buf_manager)) || |
952 | (res= disk_strategy->init(primary_file, index_strategy, mode, |
953 | &rowid_buffer))) |
954 | { |
955 | goto error; |
956 | } |
957 | } |
958 | |
959 | /* |
960 | At this point, we're sure that we're running a native MRR scan (i.e. we |
961 | didnt fall back to default implementation for some reason). |
962 | */ |
963 | status_var_increment(thd->status_var.ha_mrr_init_count); |
964 | |
965 | res= strategy->refill_buffer(TRUE); |
966 | if (res) |
967 | { |
968 | if (res != HA_ERR_END_OF_FILE) |
969 | goto error; |
970 | strategy_exhausted= TRUE; |
971 | } |
972 | |
973 | /* |
974 | If we have scanned through all intervals in *seq, then adjust *buf to |
975 | indicate that the remaining buffer space will not be used. |
976 | */ |
977 | // if (dsmrr_eof) |
978 | // buf->end_of_used_area= rowid_buffer.end_of_space(); |
979 | |
980 | |
981 | DBUG_RETURN(0); |
982 | error: |
983 | close_second_handler(); |
984 | /* Safety, not really needed but: */ |
985 | strategy= NULL; |
986 | DBUG_RETURN(res); |
987 | |
988 | use_default_impl: |
989 | if (primary_file->inited != handler::INDEX) |
990 | { |
991 | /* We can get here when |
992 | - we've previously successfully done a DS-MRR scan (and so have |
993 | secondary_file!= NULL, secondary_file->inited= INDEX, |
994 | primary_file->inited=RND) |
995 | - for this invocation, we haven't got enough buffer space, and so we |
996 | have to use the default MRR implementation. |
997 | |
998 | note: primary_file->ha_index_end() will call dsmrr_close() which will |
999 | close/destroy the secondary_file, this is intentional. |
1000 | (Yes this is slow, but one can't expect performance with join buffer |
1001 | so small that it can accomodate one rowid and one index tuple) |
1002 | */ |
1003 | if ((res= primary_file->ha_rnd_end()) || |
1004 | (res= primary_file->ha_index_init(keyno, MY_TEST(mode & HA_MRR_SORTED)))) |
1005 | { |
1006 | DBUG_RETURN(res); |
1007 | } |
1008 | } |
1009 | /* Call correct init function and assign to top level object */ |
1010 | Mrr_simple_index_reader *s= &reader_factory.simple_index_reader; |
1011 | res= s->init(primary_file, seq_funcs, seq_init_param, n_ranges, mode, NULL, |
1012 | NULL, NULL); |
1013 | strategy= s; |
1014 | DBUG_RETURN(res); |
1015 | } |
1016 | |
1017 | |
1018 | /* |
1019 | Whatever the current state is, make it so that we have two handler objects: |
1020 | - primary_file - initialized for rnd_pos() scan |
1021 | - secondary_file - initialized for scanning the index specified in |
1022 | this->keyno |
1023 | RETURN |
1024 | 0 OK |
1025 | HA_XXX Error code |
1026 | */ |
1027 | |
1028 | int DsMrr_impl::setup_two_handlers() |
1029 | { |
1030 | int res; |
1031 | THD *thd= primary_file->get_table()->in_use; |
1032 | DBUG_ENTER("DsMrr_impl::setup_two_handlers" ); |
1033 | if (!secondary_file) |
1034 | { |
1035 | handler *new_h2; |
1036 | Item *pushed_cond= NULL; |
1037 | DBUG_ASSERT(primary_file->inited == handler::INDEX); |
1038 | /* Create a separate handler object to do rnd_pos() calls. */ |
1039 | /* |
1040 | ::clone() takes up a lot of stack, especially on 64 bit platforms. |
1041 | The constant 5 is an empiric result. |
1042 | */ |
1043 | if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) |
1044 | DBUG_RETURN(1); |
1045 | |
1046 | /* Create a separate handler object to do rnd_pos() calls. */ |
1047 | if (!(new_h2= primary_file->clone(primary_file->get_table()->s-> |
1048 | normalized_path.str, |
1049 | thd->mem_root)) || |
1050 | new_h2->ha_external_lock(thd, F_RDLCK)) |
1051 | { |
1052 | delete new_h2; |
1053 | DBUG_RETURN(1); |
1054 | } |
1055 | |
1056 | if (keyno == primary_file->pushed_idx_cond_keyno) |
1057 | pushed_cond= primary_file->pushed_idx_cond; |
1058 | |
1059 | Mrr_reader *save_strategy= strategy; |
1060 | strategy= NULL; |
1061 | /* |
1062 | Caution: this call will invoke this->dsmrr_close(). Do not put the |
1063 | created secondary table handler new_h2 into this->secondary_file or it |
1064 | will delete it. Also, save the picked strategy |
1065 | */ |
1066 | res= primary_file->ha_index_end(); |
1067 | |
1068 | strategy= save_strategy; |
1069 | secondary_file= new_h2; |
1070 | |
1071 | if (res || (res= (primary_file->ha_rnd_init(FALSE)))) |
1072 | goto error; |
1073 | |
1074 | table->prepare_for_position(); |
1075 | secondary_file->extra(HA_EXTRA_KEYREAD); |
1076 | secondary_file->mrr_iter= primary_file->mrr_iter; |
1077 | |
1078 | if ((res= secondary_file->ha_index_init(keyno, FALSE))) |
1079 | goto error; |
1080 | |
1081 | if (pushed_cond) |
1082 | secondary_file->idx_cond_push(keyno, pushed_cond); |
1083 | } |
1084 | else |
1085 | { |
1086 | DBUG_ASSERT(secondary_file && secondary_file->inited==handler::INDEX); |
1087 | /* |
1088 | We get here when the access alternates betwen MRR scan(s) and non-MRR |
1089 | scans. |
1090 | |
1091 | Calling primary_file->index_end() will invoke dsmrr_close() for this object, |
1092 | which will delete secondary_file. We need to keep it, so put it away and dont |
1093 | let it be deleted: |
1094 | */ |
1095 | if (primary_file->inited == handler::INDEX) |
1096 | { |
1097 | handler *save_h2= secondary_file; |
1098 | Mrr_reader *save_strategy= strategy; |
1099 | secondary_file= NULL; |
1100 | strategy= NULL; |
1101 | res= primary_file->ha_index_end(); |
1102 | secondary_file= save_h2; |
1103 | strategy= save_strategy; |
1104 | if (res) |
1105 | goto error; |
1106 | } |
1107 | if ((primary_file->inited != handler::RND) && |
1108 | (res= primary_file->ha_rnd_init(FALSE))) |
1109 | goto error; |
1110 | } |
1111 | DBUG_RETURN(0); |
1112 | |
1113 | error: |
1114 | DBUG_RETURN(res); |
1115 | } |
1116 | |
1117 | |
1118 | void DsMrr_impl::close_second_handler() |
1119 | { |
1120 | if (secondary_file) |
1121 | { |
1122 | secondary_file->extra(HA_EXTRA_NO_KEYREAD); |
1123 | secondary_file->ha_index_or_rnd_end(); |
1124 | secondary_file->ha_external_lock(current_thd, F_UNLCK); |
1125 | secondary_file->ha_close(); |
1126 | delete secondary_file; |
1127 | secondary_file= NULL; |
1128 | } |
1129 | } |
1130 | |
1131 | |
1132 | void DsMrr_impl::dsmrr_close() |
1133 | { |
1134 | DBUG_ENTER("DsMrr_impl::dsmrr_close" ); |
1135 | close_second_handler(); |
1136 | strategy= NULL; |
1137 | DBUG_VOID_RETURN; |
1138 | } |
1139 | |
1140 | |
1141 | /* |
1142 | my_qsort2-compatible static member function to compare key tuples |
1143 | */ |
1144 | |
1145 | int Mrr_ordered_index_reader::compare_keys(void* arg, uchar* key1_arg, |
1146 | uchar* key2_arg) |
1147 | { |
1148 | Mrr_ordered_index_reader *reader= (Mrr_ordered_index_reader*)arg; |
1149 | TABLE *table= reader->file->get_table(); |
1150 | KEY_PART_INFO *part= table->key_info[reader->file->active_index].key_part; |
1151 | uchar *key1, *key2; |
1152 | |
1153 | if (reader->keypar.use_key_pointers) |
1154 | { |
1155 | /* the buffer stores pointers to keys, get to the keys */ |
1156 | memcpy(&key1, key1_arg, sizeof(char*)); |
1157 | memcpy(&key2, key2_arg, sizeof(char*)); |
1158 | } |
1159 | else |
1160 | { |
1161 | key1= key1_arg; |
1162 | key2= key2_arg; |
1163 | } |
1164 | |
1165 | return key_tuple_cmp(part, key1, key2, reader->keypar.key_tuple_length); |
1166 | } |
1167 | |
1168 | |
1169 | int Mrr_ordered_index_reader::compare_keys_reverse(void* arg, uchar* key1, |
1170 | uchar* key2) |
1171 | { |
1172 | return -compare_keys(arg, key1, key2); |
1173 | } |
1174 | |
1175 | |
1176 | /** |
1177 | Set the buffer space to be shared between rowid and key buffer |
1178 | |
1179 | @return FALSE ok |
1180 | @return TRUE There is so little buffer space that we won't be able to use |
1181 | the strategy. |
1182 | This happens when we don't have enough space for one rowid |
1183 | element and one key element so this is mainly targeted at |
1184 | testing. |
1185 | */ |
1186 | |
1187 | bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf, |
1188 | key_part_map key_tuple_map) |
1189 | { |
1190 | long key_buff_elem_size= key_size_in_keybuf + |
1191 | (int)is_mrr_assoc * sizeof(range_id_t); |
1192 | |
1193 | KEY *key_info= &primary_file->get_table()->key_info[keyno]; |
1194 | /* |
1195 | Ok if we got here we need to allocate one part of the buffer |
1196 | for keys and another part for rowids. |
1197 | */ |
1198 | ulonglong rowid_buf_elem_size= primary_file->ref_length + |
1199 | (int)is_mrr_assoc * sizeof(range_id_t); |
1200 | |
1201 | /* |
1202 | Use rec_per_key statistics as a basis to find out how many rowids |
1203 | we'll get for each key value. |
1204 | TODO: what should be the default value to use when there is no |
1205 | statistics? |
1206 | */ |
1207 | uint parts= my_count_bits(key_tuple_map); |
1208 | ha_rows rpc; |
1209 | ulonglong rowids_size= rowid_buf_elem_size; |
1210 | if ((rpc= (ha_rows) key_info->actual_rec_per_key(parts - 1))) |
1211 | rowids_size= rowid_buf_elem_size * rpc; |
1212 | |
1213 | double fraction_for_rowids= |
1214 | (ulonglong2double(rowids_size) / |
1215 | (ulonglong2double(rowids_size) + key_buff_elem_size)); |
1216 | |
1217 | ptrdiff_t bytes_for_rowids= |
1218 | (ptrdiff_t)floor(0.5 + fraction_for_rowids * (full_buf_end - full_buf)); |
1219 | |
1220 | ptrdiff_t bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids; |
1221 | |
1222 | if (bytes_for_keys < key_buff_elem_size + 1 || |
1223 | bytes_for_rowids < (ptrdiff_t)rowid_buf_elem_size + 1) |
1224 | return TRUE; /* Failed to provide minimum space for one of the buffers */ |
1225 | |
1226 | rowid_buffer_end= full_buf + bytes_for_rowids; |
1227 | rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end); |
1228 | key_buffer= &backward_key_buf; |
1229 | key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end); |
1230 | |
1231 | /* The above code guarantees that the buffers are big enough */ |
1232 | DBUG_ASSERT(key_buffer->have_space_for(key_buff_elem_size) && |
1233 | rowid_buffer.have_space_for((size_t)rowid_buf_elem_size)); |
1234 | |
1235 | return FALSE; |
1236 | } |
1237 | |
1238 | |
1239 | void DsMrr_impl::do_nothing(void *dsmrr_arg) |
1240 | { |
1241 | /* Do nothing */ |
1242 | } |
1243 | |
1244 | |
1245 | void DsMrr_impl::reset_buffer_sizes(void *dsmrr_arg) |
1246 | { |
1247 | DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg; |
1248 | dsmrr->rowid_buffer.set_buffer_space(dsmrr->full_buf, |
1249 | dsmrr->rowid_buffer_end); |
1250 | dsmrr->key_buffer->set_buffer_space(dsmrr->rowid_buffer_end, |
1251 | dsmrr->full_buf_end); |
1252 | } |
1253 | |
1254 | |
1255 | /* |
1256 | Take unused space from the key buffer and give it to the rowid buffer |
1257 | */ |
1258 | |
1259 | void DsMrr_impl::redistribute_buffer_space(void *dsmrr_arg) |
1260 | { |
1261 | DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg; |
1262 | uchar *unused_start, *unused_end; |
1263 | dsmrr->key_buffer->remove_unused_space(&unused_start, &unused_end); |
1264 | dsmrr->rowid_buffer.grow(unused_start, unused_end); |
1265 | } |
1266 | |
1267 | |
1268 | /* |
1269 | @brief Initialize the iterator |
1270 | |
1271 | @note |
1272 | Initialize the iterator to produce matches for the key of the first element |
1273 | in owner_arg->key_buffer |
1274 | |
1275 | @retval 0 OK |
1276 | @retval HA_ERR_END_OF_FILE Either the owner->key_buffer is empty or |
1277 | no matches for the key we've tried (check |
1278 | key_buffer->is_empty() to tell these apart) |
1279 | @retval other code Fatal error |
1280 | */ |
1281 | |
1282 | int Key_value_records_iterator::init(Mrr_ordered_index_reader *owner_arg) |
1283 | { |
1284 | int res; |
1285 | owner= owner_arg; |
1286 | |
1287 | identical_key_it.init(owner->key_buffer); |
1288 | owner->key_buffer->setup_reading(owner->keypar.key_size_in_keybuf, |
1289 | owner->is_mrr_assoc ? sizeof(void*) : 0); |
1290 | |
1291 | if (identical_key_it.read()) |
1292 | return HA_ERR_END_OF_FILE; |
1293 | |
1294 | uchar *key_in_buf= last_identical_key_ptr= identical_key_it.read_ptr1; |
1295 | |
1296 | uchar *index_tuple= key_in_buf; |
1297 | if (owner->keypar.use_key_pointers) |
1298 | memcpy(&index_tuple, key_in_buf, sizeof(char*)); |
1299 | |
1300 | /* Check out how many more identical keys are following */ |
1301 | while (!identical_key_it.read()) |
1302 | { |
1303 | if (Mrr_ordered_index_reader::compare_keys(owner, key_in_buf, |
1304 | identical_key_it.read_ptr1)) |
1305 | break; |
1306 | last_identical_key_ptr= identical_key_it.read_ptr1; |
1307 | } |
1308 | identical_key_it.init(owner->key_buffer); |
1309 | res= owner->file->ha_index_read_map(owner->file->get_table()->record[0], |
1310 | index_tuple, |
1311 | owner->keypar.key_tuple_map, |
1312 | HA_READ_KEY_EXACT); |
1313 | |
1314 | if (res) |
1315 | { |
1316 | /* Failed to find any matching records */ |
1317 | move_to_next_key_value(); |
1318 | return res; |
1319 | } |
1320 | owner->have_saved_rowid= FALSE; |
1321 | get_next_row= FALSE; |
1322 | return 0; |
1323 | } |
1324 | |
1325 | |
1326 | int Key_value_records_iterator::get_next(range_id_t *range_info) |
1327 | { |
1328 | int res; |
1329 | |
1330 | if (get_next_row) |
1331 | { |
1332 | if (owner->keypar.index_ranges_unique) |
1333 | { |
1334 | /* We're using a full unique key, no point to call index_next_same */ |
1335 | return HA_ERR_END_OF_FILE; |
1336 | } |
1337 | |
1338 | handler *h= owner->file; |
1339 | uchar *lookup_key; |
1340 | if (owner->keypar.use_key_pointers) |
1341 | memcpy(&lookup_key, identical_key_it.read_ptr1, sizeof(void*)); |
1342 | else |
1343 | lookup_key= identical_key_it.read_ptr1; |
1344 | |
1345 | if ((res= h->ha_index_next_same(h->get_table()->record[0], |
1346 | lookup_key, |
1347 | owner->keypar.key_tuple_length))) |
1348 | { |
1349 | /* It's either HA_ERR_END_OF_FILE or some other error */ |
1350 | return res; |
1351 | } |
1352 | identical_key_it.init(owner->key_buffer); |
1353 | owner->have_saved_rowid= FALSE; |
1354 | get_next_row= FALSE; |
1355 | } |
1356 | |
1357 | identical_key_it.read(); /* This gets us next range_id */ |
1358 | memcpy(range_info, identical_key_it.read_ptr2, sizeof(range_id_t)); |
1359 | |
1360 | if (!last_identical_key_ptr || |
1361 | (identical_key_it.read_ptr1 == last_identical_key_ptr)) |
1362 | { |
1363 | /* |
1364 | We've reached the last of the identical keys that current record is a |
1365 | match for. Set get_next_row=TRUE so that we read the next index record |
1366 | on the next call to this function. |
1367 | */ |
1368 | get_next_row= TRUE; |
1369 | } |
1370 | return 0; |
1371 | } |
1372 | |
1373 | |
1374 | void Key_value_records_iterator::move_to_next_key_value() |
1375 | { |
1376 | while (!owner->key_buffer->read() && |
1377 | (owner->key_buffer->read_ptr1 != last_identical_key_ptr)) {} |
1378 | } |
1379 | |
1380 | |
1381 | /** |
1382 | DS-MRR implementation: multi_range_read_next() function. |
1383 | |
1384 | Calling convention is like multi_range_read_next() has. |
1385 | */ |
1386 | |
1387 | int DsMrr_impl::dsmrr_next(range_id_t *range_info) |
1388 | { |
1389 | int res; |
1390 | if (strategy_exhausted) |
1391 | return HA_ERR_END_OF_FILE; |
1392 | |
1393 | while ((res= strategy->get_next(range_info)) == HA_ERR_END_OF_FILE) |
1394 | { |
1395 | if ((res= strategy->refill_buffer(FALSE))) |
1396 | break; /* EOF or error */ |
1397 | } |
1398 | return res; |
1399 | } |
1400 | |
1401 | |
1402 | /** |
1403 | DS-MRR implementation: multi_range_read_info() function |
1404 | */ |
1405 | ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, |
1406 | uint key_parts, |
1407 | uint *bufsz, uint *flags, Cost_estimate *cost) |
1408 | { |
1409 | ha_rows res __attribute__((unused)); |
1410 | uint def_flags= *flags; |
1411 | uint def_bufsz= *bufsz; |
1412 | |
1413 | /* Get cost/flags/mem_usage of default MRR implementation */ |
1414 | res= primary_file->handler::multi_range_read_info(keyno, n_ranges, rows, |
1415 | key_parts, &def_bufsz, |
1416 | &def_flags, cost); |
1417 | DBUG_ASSERT(!res); |
1418 | |
1419 | if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || |
1420 | choose_mrr_impl(keyno, rows, flags, bufsz, cost)) |
1421 | { |
1422 | /* Default implementation is choosen */ |
1423 | DBUG_PRINT("info" , ("Default MRR implementation choosen" )); |
1424 | *flags= def_flags; |
1425 | *bufsz= def_bufsz; |
1426 | } |
1427 | else |
1428 | { |
1429 | /* *flags and *bufsz were set by choose_mrr_impl */ |
1430 | DBUG_PRINT("info" , ("DS-MRR implementation choosen" )); |
1431 | } |
1432 | return 0; |
1433 | } |
1434 | |
1435 | |
1436 | /** |
1437 | DS-MRR Implementation: multi_range_read_info_const() function |
1438 | */ |
1439 | |
1440 | ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, |
1441 | void *seq_init_param, uint n_ranges, |
1442 | uint *bufsz, uint *flags, Cost_estimate *cost) |
1443 | { |
1444 | ha_rows rows; |
1445 | uint def_flags= *flags; |
1446 | uint def_bufsz= *bufsz; |
1447 | /* Get cost/flags/mem_usage of default MRR implementation */ |
1448 | rows= primary_file->handler::multi_range_read_info_const(keyno, seq, |
1449 | seq_init_param, |
1450 | n_ranges, |
1451 | &def_bufsz, |
1452 | &def_flags, cost); |
1453 | if (rows == HA_POS_ERROR) |
1454 | { |
1455 | /* Default implementation can't perform MRR scan => we can't either */ |
1456 | return rows; |
1457 | } |
1458 | |
1459 | /* |
1460 | If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to |
1461 | use the default MRR implementation (we need it for UPDATE/DELETE). |
1462 | Otherwise, make a choice based on cost and @@optimizer_switch settings |
1463 | */ |
1464 | if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || |
1465 | choose_mrr_impl(keyno, rows, flags, bufsz, cost)) |
1466 | { |
1467 | DBUG_PRINT("info" , ("Default MRR implementation choosen" )); |
1468 | *flags= def_flags; |
1469 | *bufsz= def_bufsz; |
1470 | } |
1471 | else |
1472 | { |
1473 | /* *flags and *bufsz were set by choose_mrr_impl */ |
1474 | DBUG_PRINT("info" , ("DS-MRR implementation choosen" )); |
1475 | } |
1476 | return rows; |
1477 | } |
1478 | |
1479 | |
1480 | /** |
1481 | Check if key has partially-covered columns |
1482 | |
1483 | We can't use DS-MRR to perform range scans when the ranges are over |
1484 | partially-covered keys, because we'll not have full key part values |
1485 | (we'll have their prefixes from the index) and will not be able to check |
1486 | if we've reached the end the range. |
1487 | |
1488 | @param keyno Key to check |
1489 | |
1490 | @todo |
1491 | Allow use of DS-MRR in cases where the index has partially-covered |
1492 | components but they are not used for scanning. |
1493 | |
1494 | @retval TRUE Yes |
1495 | @retval FALSE No |
1496 | */ |
1497 | |
1498 | bool key_uses_partial_cols(TABLE_SHARE *share, uint keyno) |
1499 | { |
1500 | KEY_PART_INFO *kp= share->key_info[keyno].key_part; |
1501 | KEY_PART_INFO *kp_end= kp + share->key_info[keyno].user_defined_key_parts; |
1502 | for (; kp != kp_end; kp++) |
1503 | { |
1504 | if (!kp->field->part_of_key.is_set(keyno)) |
1505 | return TRUE; |
1506 | } |
1507 | return FALSE; |
1508 | } |
1509 | |
1510 | |
1511 | /* |
1512 | Check if key/flags allow DS-MRR/CPK strategy to be used |
1513 | |
1514 | @param thd |
1515 | @param keyno Index that will be used |
1516 | @param mrr_flags |
1517 | |
1518 | @retval TRUE DS-MRR/CPK should be used |
1519 | @retval FALSE Otherwise |
1520 | */ |
1521 | |
1522 | bool DsMrr_impl::check_cpk_scan(THD *thd, TABLE_SHARE *share, uint keyno, |
1523 | uint mrr_flags) |
1524 | { |
1525 | return MY_TEST((mrr_flags & HA_MRR_SINGLE_POINT) && |
1526 | keyno == share->primary_key && |
1527 | primary_file->primary_key_is_clustered() && |
1528 | optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)); |
1529 | } |
1530 | |
1531 | |
1532 | /* |
1533 | DS-MRR Internals: Choose between Default MRR implementation and DS-MRR |
1534 | |
1535 | Make the choice between using Default MRR implementation and DS-MRR. |
1536 | This function contains common functionality factored out of dsmrr_info() |
1537 | and dsmrr_info_const(). The function assumes that the default MRR |
1538 | implementation's applicability requirements are satisfied. |
1539 | |
1540 | @param keyno Index number |
1541 | @param rows E(full rows to be retrieved) |
1542 | @param flags IN MRR flags provided by the MRR user |
1543 | OUT If DS-MRR is choosen, flags of DS-MRR implementation |
1544 | else the value is not modified |
1545 | @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation |
1546 | else the value is not modified |
1547 | @param cost IN Cost of default MRR implementation |
1548 | OUT If DS-MRR is choosen, cost of DS-MRR scan |
1549 | else the value is not modified |
1550 | |
1551 | @retval TRUE Default MRR implementation should be used |
1552 | @retval FALSE DS-MRR implementation should be used |
1553 | */ |
1554 | |
1555 | |
1556 | bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, |
1557 | uint *bufsz, Cost_estimate *cost) |
1558 | { |
1559 | Cost_estimate dsmrr_cost; |
1560 | bool res; |
1561 | THD *thd= primary_file->get_table()->in_use; |
1562 | TABLE_SHARE *share= primary_file->get_table_share(); |
1563 | |
1564 | bool doing_cpk_scan= check_cpk_scan(thd, share, keyno, *flags); |
1565 | bool using_cpk= MY_TEST(keyno == share->primary_key && |
1566 | primary_file->primary_key_is_clustered()); |
1567 | *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS; |
1568 | if (!optimizer_flag(thd, OPTIMIZER_SWITCH_MRR) || |
1569 | *flags & HA_MRR_INDEX_ONLY || |
1570 | (using_cpk && !doing_cpk_scan) || key_uses_partial_cols(share, keyno)) |
1571 | { |
1572 | /* Use the default implementation */ |
1573 | *flags |= HA_MRR_USE_DEFAULT_IMPL; |
1574 | *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS; |
1575 | return TRUE; |
1576 | } |
1577 | |
1578 | uint add_len= share->key_info[keyno].key_length + primary_file->ref_length; |
1579 | *bufsz -= add_len; |
1580 | if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost)) |
1581 | return TRUE; |
1582 | *bufsz += add_len; |
1583 | |
1584 | bool force_dsmrr; |
1585 | /* |
1586 | If mrr_cost_based flag is not set, then set cost of DS-MRR to be minimum of |
1587 | DS-MRR and Default implementations cost. This allows one to force use of |
1588 | DS-MRR whenever it is applicable without affecting other cost-based |
1589 | choices. |
1590 | */ |
1591 | if ((force_dsmrr= !optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_COST_BASED)) && |
1592 | dsmrr_cost.total_cost() > cost->total_cost()) |
1593 | dsmrr_cost= *cost; |
1594 | |
1595 | if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost()) |
1596 | { |
1597 | *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */ |
1598 | *flags &= ~HA_MRR_SORTED; /* We will return unordered output */ |
1599 | *cost= dsmrr_cost; |
1600 | res= FALSE; |
1601 | |
1602 | |
1603 | if ((using_cpk && doing_cpk_scan) || |
1604 | (optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS) && |
1605 | *flags & HA_MRR_SINGLE_POINT)) |
1606 | { |
1607 | *flags |= DSMRR_IMPL_SORT_KEYS; |
1608 | } |
1609 | |
1610 | if (!(using_cpk && doing_cpk_scan) && |
1611 | !(*flags & HA_MRR_INDEX_ONLY)) |
1612 | { |
1613 | *flags |= DSMRR_IMPL_SORT_ROWIDS; |
1614 | } |
1615 | /* |
1616 | if ((*flags & HA_MRR_SINGLE_POINT) && |
1617 | optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)) |
1618 | *flags |= HA_MRR_MATERIALIZED_KEYS; |
1619 | */ |
1620 | } |
1621 | else |
1622 | { |
1623 | /* Use the default MRR implementation */ |
1624 | res= TRUE; |
1625 | } |
1626 | return res; |
1627 | } |
1628 | |
1629 | /* |
1630 | Take the flags we've returned previously and print one of |
1631 | - Key-ordered scan |
1632 | - Rowid-ordered scan |
1633 | - Key-ordered Rowid-ordered scan |
1634 | */ |
1635 | |
1636 | int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size) |
1637 | { |
1638 | const char *key_ordered= "Key-ordered scan" ; |
1639 | const char *rowid_ordered= "Rowid-ordered scan" ; |
1640 | const char *both_ordered= "Key-ordered Rowid-ordered scan" ; |
1641 | const char *used_str="" ; |
1642 | const uint BOTH_FLAGS= (DSMRR_IMPL_SORT_KEYS | DSMRR_IMPL_SORT_ROWIDS); |
1643 | |
1644 | if (!(mrr_mode & HA_MRR_USE_DEFAULT_IMPL)) |
1645 | { |
1646 | if ((mrr_mode & BOTH_FLAGS) == BOTH_FLAGS) |
1647 | used_str= both_ordered; |
1648 | else if (mrr_mode & DSMRR_IMPL_SORT_KEYS) |
1649 | used_str= key_ordered; |
1650 | else if (mrr_mode & DSMRR_IMPL_SORT_ROWIDS) |
1651 | used_str= rowid_ordered; |
1652 | |
1653 | size_t used_str_len= strlen(used_str); |
1654 | size_t copy_len= MY_MIN(used_str_len, size); |
1655 | memcpy(str, used_str, copy_len); |
1656 | return (int)copy_len; |
1657 | } |
1658 | return 0; |
1659 | } |
1660 | |
1661 | |
1662 | static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost); |
1663 | |
1664 | |
1665 | /** |
1666 | Get cost of DS-MRR scan |
1667 | |
1668 | @param keynr Index to be used |
1669 | @param rows E(Number of rows to be scanned) |
1670 | @param flags Scan parameters (HA_MRR_* flags) |
1671 | @param buffer_size INOUT Buffer size |
1672 | @param cost OUT The cost |
1673 | |
1674 | @retval FALSE OK |
1675 | @retval TRUE Error, DS-MRR cannot be used (the buffer is too small |
1676 | for even 1 rowid) |
1677 | */ |
1678 | |
1679 | bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, |
1680 | uint *buffer_size, Cost_estimate *cost) |
1681 | { |
1682 | ulong max_buff_entries, elem_size; |
1683 | ha_rows rows_in_full_step; |
1684 | ha_rows rows_in_last_step; |
1685 | uint n_full_steps; |
1686 | double index_read_cost; |
1687 | |
1688 | elem_size= primary_file->ref_length + |
1689 | sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION)); |
1690 | max_buff_entries = *buffer_size / elem_size; |
1691 | |
1692 | if (!max_buff_entries) |
1693 | return TRUE; /* Buffer has not enough space for even 1 rowid */ |
1694 | |
1695 | /* Number of iterations we'll make with full buffer */ |
1696 | n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries); |
1697 | |
1698 | /* |
1699 | Get numbers of rows we'll be processing in |
1700 | - non-last sweep, with full buffer |
1701 | - last iteration, with non-full buffer |
1702 | */ |
1703 | rows_in_full_step= max_buff_entries; |
1704 | rows_in_last_step= rows % max_buff_entries; |
1705 | |
1706 | /* Adjust buffer size if we expect to use only part of the buffer */ |
1707 | if (n_full_steps) |
1708 | { |
1709 | get_sort_and_sweep_cost(table, rows_in_full_step, cost); |
1710 | cost->multiply(n_full_steps); |
1711 | } |
1712 | else |
1713 | { |
1714 | cost->reset(); |
1715 | *buffer_size= (uint)MY_MAX(*buffer_size, |
1716 | (size_t)(1.2*rows_in_last_step) * elem_size + |
1717 | primary_file->ref_length + table->key_info[keynr].key_length); |
1718 | } |
1719 | |
1720 | Cost_estimate last_step_cost; |
1721 | get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost); |
1722 | cost->add(&last_step_cost); |
1723 | |
1724 | if (n_full_steps != 0) |
1725 | cost->mem_cost= *buffer_size; |
1726 | else |
1727 | cost->mem_cost= (double)rows_in_last_step * elem_size; |
1728 | |
1729 | /* Total cost of all index accesses */ |
1730 | index_read_cost= primary_file->keyread_time(keynr, 1, rows); |
1731 | cost->add_io(index_read_cost, 1 /* Random seeks */); |
1732 | return FALSE; |
1733 | } |
1734 | |
1735 | |
1736 | /* |
1737 | Get cost of one sort-and-sweep step |
1738 | |
1739 | It consists of two parts: |
1740 | - sort an array of #nrows ROWIDs using qsort |
1741 | - read #nrows records from table in a sweep. |
1742 | |
1743 | @param table Table being accessed |
1744 | @param nrows Number of rows to be sorted and retrieved |
1745 | @param cost OUT The cost of scan |
1746 | */ |
1747 | |
1748 | static |
1749 | void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost) |
1750 | { |
1751 | if (nrows) |
1752 | { |
1753 | get_sweep_read_cost(table, nrows, FALSE, cost); |
1754 | /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */ |
1755 | double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID); |
1756 | if (cmp_op < 3) |
1757 | cmp_op= 3; |
1758 | cost->cpu_cost += cmp_op * log2(cmp_op); |
1759 | } |
1760 | else |
1761 | cost->reset(); |
1762 | } |
1763 | |
1764 | |
1765 | /** |
1766 | Get cost of reading nrows table records in a "disk sweep" |
1767 | |
1768 | A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made |
1769 | for an ordered sequence of rowids. |
1770 | |
1771 | We assume hard disk IO. The read is performed as follows: |
1772 | |
1773 | 1. The disk head is moved to the needed cylinder |
1774 | 2. The controller waits for the plate to rotate |
1775 | 3. The data is transferred |
1776 | |
1777 | Time to do #3 is insignificant compared to #2+#1. |
1778 | |
1779 | Time to move the disk head is proportional to head travel distance. |
1780 | |
1781 | Time to wait for the plate to rotate depends on whether the disk head |
1782 | was moved or not. |
1783 | |
1784 | If disk head wasn't moved, the wait time is proportional to distance |
1785 | between the previous block and the block we're reading. |
1786 | |
1787 | If the head was moved, we don't know how much we'll need to wait for the |
1788 | plate to rotate. We assume the wait time to be a variate with a mean of |
1789 | 0.5 of full rotation time. |
1790 | |
1791 | Our cost units are "random disk seeks". The cost of random disk seek is |
1792 | actually not a constant, it depends one range of cylinders we're going |
1793 | to access. We make it constant by introducing a fuzzy concept of "typical |
1794 | datafile length" (it's fuzzy as it's hard to tell whether it should |
1795 | include index file, temp.tables etc). Then random seek cost is: |
1796 | |
1797 | 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length |
1798 | |
1799 | We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9. |
1800 | |
1801 | @param table Table to be accessed |
1802 | @param nrows Number of rows to retrieve |
1803 | @param interrupted TRUE <=> Assume that the disk sweep will be |
1804 | interrupted by other disk IO. FALSE - otherwise. |
1805 | @param cost OUT The cost. |
1806 | */ |
1807 | |
1808 | void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, |
1809 | Cost_estimate *cost) |
1810 | { |
1811 | DBUG_ENTER("get_sweep_read_cost" ); |
1812 | |
1813 | cost->reset(); |
1814 | if (table->file->primary_key_is_clustered()) |
1815 | { |
1816 | cost->io_count= table->file->read_time(table->s->primary_key, |
1817 | (uint) nrows, nrows); |
1818 | } |
1819 | else |
1820 | { |
1821 | double n_blocks= |
1822 | ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE); |
1823 | double busy_blocks= |
1824 | n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows))); |
1825 | if (busy_blocks < 1.0) |
1826 | busy_blocks= 1.0; |
1827 | |
1828 | DBUG_PRINT("info" ,("sweep: nblocks=%g, busy_blocks=%g" , n_blocks, |
1829 | busy_blocks)); |
1830 | cost->io_count= busy_blocks; |
1831 | |
1832 | if (!interrupted) |
1833 | { |
1834 | /* Assume reading is done in one 'sweep' */ |
1835 | cost->avg_io_cost= (DISK_SEEK_BASE_COST + |
1836 | DISK_SEEK_PROP_COST*n_blocks/busy_blocks); |
1837 | } |
1838 | } |
1839 | DBUG_PRINT("info" ,("returning cost=%g" , cost->total_cost())); |
1840 | DBUG_VOID_RETURN; |
1841 | } |
1842 | |
1843 | |
1844 | /* ************************************************************************** |
1845 | * DS-MRR implementation ends |
1846 | ***************************************************************************/ |
1847 | |
1848 | |
1849 | |