| 1 | /* Copyright (C) 2010, 2011 Monty Program Ab |
| 2 | |
| 3 | This program is free software; you can redistribute it and/or modify |
| 4 | it under the terms of the GNU General Public License as published by |
| 5 | the Free Software Foundation; version 2 of the License. |
| 6 | |
| 7 | This program is distributed in the hope that it will be useful, |
| 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | GNU General Public License for more details. |
| 11 | |
| 12 | You should have received a copy of the GNU General Public License |
| 13 | along with this program; if not, write to the Free Software |
| 14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
| 15 | |
| 16 | #include "mariadb.h" |
| 17 | #include "sql_parse.h" |
| 18 | #include <my_bit.h> |
| 19 | #include "sql_select.h" |
| 20 | #include "key.h" |
| 21 | |
| 22 | /**************************************************************************** |
| 23 | * Default MRR implementation (MRR to non-MRR converter) |
| 24 | ***************************************************************************/ |
| 25 | |
| 26 | /** |
| 27 | Get cost and other information about MRR scan over a known list of ranges |
| 28 | |
| 29 | Calculate estimated cost and other information about an MRR scan for given |
| 30 | sequence of ranges. |
| 31 | |
| 32 | @param keyno Index number |
| 33 | @param seq Range sequence to be traversed |
| 34 | @param seq_init_param First parameter for seq->init() |
| 35 | @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller |
| 36 | can't efficiently determine it |
| 37 | @param bufsz INOUT IN: Size of the buffer available for use |
| 38 | OUT: Size of the buffer that is expected to be actually |
| 39 | used, or 0 if buffer is not needed. |
| 40 | @param flags INOUT A combination of HA_MRR_* flags |
| 41 | @param cost OUT Estimated cost of MRR access |
| 42 | |
| 43 | @note |
| 44 | This method (or an overriding one in a derived class) must check for |
| 45 | thd->killed and return HA_POS_ERROR if it is not zero. This is required |
| 46 | for a user to be able to interrupt the calculation by killing the |
| 47 | connection/query. |
| 48 | |
| 49 | @retval |
| 50 | HA_POS_ERROR Error or the engine is unable to perform the requested |
| 51 | scan. Values of OUT parameters are undefined. |
| 52 | @retval |
| 53 | other OK, *cost contains cost of the scan, *bufsz and *flags |
| 54 | contain scan parameters. |
| 55 | */ |
| 56 | |
| 57 | ha_rows |
| 58 | handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, |
| 59 | void *seq_init_param, uint n_ranges_arg, |
| 60 | uint *bufsz, uint *flags, Cost_estimate *cost) |
| 61 | { |
| 62 | KEY_MULTI_RANGE range; |
| 63 | range_seq_t seq_it; |
| 64 | ha_rows rows, total_rows= 0; |
| 65 | uint n_ranges=0; |
| 66 | THD *thd= table->in_use; |
| 67 | |
| 68 | /* Default MRR implementation doesn't need buffer */ |
| 69 | *bufsz= 0; |
| 70 | |
| 71 | seq_it= seq->init(seq_init_param, n_ranges, *flags); |
| 72 | while (!seq->next(seq_it, &range)) |
| 73 | { |
| 74 | if (unlikely(thd->killed != 0)) |
| 75 | return HA_POS_ERROR; |
| 76 | |
| 77 | n_ranges++; |
| 78 | key_range *min_endp, *max_endp; |
| 79 | if (range.range_flag & GEOM_FLAG) |
| 80 | { |
| 81 | /* In this case tmp_min_flag contains the handler-read-function */ |
| 82 | range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG); |
| 83 | min_endp= &range.start_key; |
| 84 | max_endp= NULL; |
| 85 | } |
| 86 | else |
| 87 | { |
| 88 | min_endp= range.start_key.length? &range.start_key : NULL; |
| 89 | max_endp= range.end_key.length? &range.end_key : NULL; |
| 90 | } |
| 91 | if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) |
| 92 | rows= 1; /* there can be at most one row */ |
| 93 | else |
| 94 | { |
| 95 | if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, |
| 96 | max_endp))) |
| 97 | { |
| 98 | /* Can't scan one range => can't do MRR scan at all */ |
| 99 | total_rows= HA_POS_ERROR; |
| 100 | break; |
| 101 | } |
| 102 | } |
| 103 | total_rows += rows; |
| 104 | } |
| 105 | |
| 106 | if (total_rows != HA_POS_ERROR) |
| 107 | { |
| 108 | /* The following calculation is the same as in multi_range_read_info(): */ |
| 109 | *flags |= HA_MRR_USE_DEFAULT_IMPL; |
| 110 | cost->reset(); |
| 111 | cost->avg_io_cost= 1; /* assume random seeks */ |
| 112 | if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2) |
| 113 | cost->io_count= keyread_time(keyno, n_ranges, (uint)total_rows); |
| 114 | else |
| 115 | cost->io_count= read_time(keyno, n_ranges, total_rows); |
| 116 | cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; |
| 117 | } |
| 118 | return total_rows; |
| 119 | } |
| 120 | |
| 121 | |
| 122 | /** |
| 123 | Get cost and other information about MRR scan over some sequence of ranges |
| 124 | |
| 125 | Calculate estimated cost and other information about an MRR scan for some |
| 126 | sequence of ranges. |
| 127 | |
| 128 | The ranges themselves will be known only at execution phase. When this |
| 129 | function is called we only know number of ranges and a (rough) E(#records) |
| 130 | within those ranges. |
| 131 | |
| 132 | Currently this function is only called for "n-keypart singlepoint" ranges, |
| 133 | i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN" |
| 134 | |
| 135 | The flags parameter is a combination of those flags: HA_MRR_SORTED, |
| 136 | HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS. |
| 137 | |
| 138 | @param keyno Index number |
| 139 | @param n_ranges Estimated number of ranges (i.e. intervals) in the |
| 140 | range sequence. |
| 141 | @param n_rows Estimated total number of records contained within all |
| 142 | of the ranges |
| 143 | @param bufsz INOUT IN: Size of the buffer available for use |
| 144 | OUT: Size of the buffer that will be actually used, or |
| 145 | 0 if buffer is not needed. |
| 146 | @param flags INOUT A combination of HA_MRR_* flags |
| 147 | @param cost OUT Estimated cost of MRR access |
| 148 | |
| 149 | @retval |
| 150 | 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan |
| 151 | parameters. |
| 152 | @retval |
| 153 | other Error or can't perform the requested scan |
| 154 | */ |
| 155 | |
| 156 | ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, |
| 157 | uint key_parts, uint *bufsz, |
| 158 | uint *flags, Cost_estimate *cost) |
| 159 | { |
| 160 | /* |
| 161 | Currently we expect this function to be called only in preparation of scan |
| 162 | with HA_MRR_SINGLE_POINT property. |
| 163 | */ |
| 164 | DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT); |
| 165 | |
| 166 | *bufsz= 0; /* Default implementation doesn't need a buffer */ |
| 167 | *flags |= HA_MRR_USE_DEFAULT_IMPL; |
| 168 | |
| 169 | cost->reset(); |
| 170 | cost->avg_io_cost= 1; /* assume random seeks */ |
| 171 | |
| 172 | /* Produce the same cost as non-MRR code does */ |
| 173 | if (*flags & HA_MRR_INDEX_ONLY) |
| 174 | cost->io_count= keyread_time(keyno, n_ranges, n_rows); |
| 175 | else |
| 176 | cost->io_count= read_time(keyno, n_ranges, n_rows); |
| 177 | return 0; |
| 178 | } |
| 179 | |
| 180 | |
| 181 | /** |
| 182 | Initialize the MRR scan |
| 183 | |
| 184 | Initialize the MRR scan. This function may do heavyweight scan |
| 185 | initialization like row prefetching/sorting/etc (NOTE: but better not do |
| 186 | it here as we may not need it, e.g. if we never satisfy WHERE clause on |
| 187 | previous tables. For many implementations it would be natural to do such |
| 188 | initializations in the first multi_read_range_next() call) |
| 189 | |
| 190 | mode is a combination of the following flags: HA_MRR_SORTED, |
| 191 | HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION |
| 192 | |
| 193 | @param seq Range sequence to be traversed |
| 194 | @param seq_init_param First parameter for seq->init() |
| 195 | @param n_ranges Number of ranges in the sequence |
| 196 | @param mode Flags, see the description section for the details |
| 197 | @param buf INOUT: memory buffer to be used |
| 198 | |
| 199 | @note |
| 200 | One must have called index_init() before calling this function. Several |
| 201 | multi_range_read_init() calls may be made in course of one query. |
| 202 | |
| 203 | Buffer memory management is done according to the following scenario: |
| 204 | The caller allocates the buffer and provides it to the callee by filling |
| 205 | the members of HANDLER_BUFFER structure. |
| 206 | The callee consumes all or some fraction of the provided buffer space, and |
| 207 | sets the HANDLER_BUFFER members accordingly. |
| 208 | The callee may use the buffer memory until the next multi_range_read_init() |
| 209 | call is made, all records have been read, or until index_end() call is |
| 210 | made, whichever comes first. |
| 211 | |
| 212 | @retval 0 OK |
| 213 | @retval 1 Error |
| 214 | */ |
| 215 | |
| 216 | int |
| 217 | handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param, |
| 218 | uint n_ranges, uint mode, HANDLER_BUFFER *buf) |
| 219 | { |
| 220 | DBUG_ENTER("handler::multi_range_read_init" ); |
| 221 | mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); |
| 222 | mrr_funcs= *seq_funcs; |
| 223 | mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED); |
| 224 | mrr_have_range= FALSE; |
| 225 | DBUG_RETURN(0); |
| 226 | } |
| 227 | |
| 228 | /** |
| 229 | Get next record in MRR scan |
| 230 | |
| 231 | Default MRR implementation: read the next record |
| 232 | |
| 233 | @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect |
| 234 | Otherwise, the opaque value associated with the range |
| 235 | that contains the returned record. |
| 236 | |
| 237 | @retval 0 OK |
| 238 | @retval other Error code |
| 239 | */ |
| 240 | |
| 241 | int handler::multi_range_read_next(range_id_t *range_info) |
| 242 | { |
| 243 | int result= HA_ERR_END_OF_FILE; |
| 244 | bool range_res; |
| 245 | DBUG_ENTER("handler::multi_range_read_next" ); |
| 246 | |
| 247 | if (!mrr_have_range) |
| 248 | { |
| 249 | mrr_have_range= TRUE; |
| 250 | goto start; |
| 251 | } |
| 252 | |
| 253 | do |
| 254 | { |
| 255 | /* Save a call if there can be only one row in range. */ |
| 256 | if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE)) |
| 257 | { |
| 258 | result= read_range_next(); |
| 259 | /* On success or non-EOF errors jump to the end. */ |
| 260 | if (result != HA_ERR_END_OF_FILE) |
| 261 | break; |
| 262 | } |
| 263 | else |
| 264 | { |
| 265 | if (ha_was_semi_consistent_read()) |
| 266 | { |
| 267 | /* |
| 268 | The following assignment is redundant, but for extra safety and to |
| 269 | remove the compiler warning: |
| 270 | */ |
| 271 | range_res= FALSE; |
| 272 | goto scan_it_again; |
| 273 | } |
| 274 | /* |
| 275 | We need to set this for the last range only, but checking this |
| 276 | condition is more expensive than just setting the result code. |
| 277 | */ |
| 278 | result= HA_ERR_END_OF_FILE; |
| 279 | } |
| 280 | |
| 281 | start: |
| 282 | /* Try the next range(s) until one matches a record. */ |
| 283 | while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range))) |
| 284 | { |
| 285 | scan_it_again: |
| 286 | result= read_range_first(mrr_cur_range.start_key.keypart_map ? |
| 287 | &mrr_cur_range.start_key : 0, |
| 288 | mrr_cur_range.end_key.keypart_map ? |
| 289 | &mrr_cur_range.end_key : 0, |
| 290 | MY_TEST(mrr_cur_range.range_flag & EQ_RANGE), |
| 291 | mrr_is_output_sorted); |
| 292 | if (result != HA_ERR_END_OF_FILE) |
| 293 | break; |
| 294 | } |
| 295 | } |
| 296 | while ((result == HA_ERR_END_OF_FILE) && !range_res); |
| 297 | |
| 298 | *range_info= mrr_cur_range.ptr; |
| 299 | DBUG_PRINT("exit" ,("handler::multi_range_read_next result %d" , result)); |
| 300 | DBUG_RETURN(result); |
| 301 | } |
| 302 | |
| 303 | /**************************************************************************** |
| 304 | * Mrr_*_reader classes (building blocks for DS-MRR) |
| 305 | ***************************************************************************/ |
| 306 | |
| 307 | int Mrr_simple_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, |
| 308 | void *seq_init_param, uint n_ranges, |
| 309 | uint mode, Key_parameters *key_par_arg, |
| 310 | Lifo_buffer *key_buffer_arg, |
| 311 | Buffer_manager *buf_manager_arg) |
| 312 | { |
| 313 | HANDLER_BUFFER no_buffer = {NULL, NULL, NULL}; |
| 314 | file= h_arg; |
| 315 | return file->handler::multi_range_read_init(seq_funcs, seq_init_param, |
| 316 | n_ranges, mode, &no_buffer); |
| 317 | } |
| 318 | |
| 319 | |
| 320 | int Mrr_simple_index_reader::get_next(range_id_t *range_info) |
| 321 | { |
| 322 | int res; |
| 323 | while (!(res= file->handler::multi_range_read_next(range_info))) |
| 324 | { |
| 325 | KEY_MULTI_RANGE *curr_range= &file->handler::mrr_cur_range; |
| 326 | if (!file->mrr_funcs.skip_index_tuple || |
| 327 | !file->mrr_funcs.skip_index_tuple(file->mrr_iter, curr_range->ptr)) |
| 328 | break; |
| 329 | } |
| 330 | if (res && res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND) |
| 331 | file->print_error(res, MYF(0)); // Fatal error |
| 332 | return res; |
| 333 | } |
| 334 | |
| 335 | |
| 336 | /** |
| 337 | @brief Get next index record |
| 338 | |
| 339 | @param range_info OUT identifier of range that the returned record belongs to |
| 340 | |
| 341 | @note |
| 342 | We actually iterate over nested sequences: |
| 343 | - an ordered sequence of groups of identical keys |
| 344 | - each key group has key value, which has multiple matching records |
| 345 | - thus, each record matches all members of the key group |
| 346 | |
| 347 | @retval 0 OK, next record was successfully read |
| 348 | @retval HA_ERR_END_OF_FILE End of records |
| 349 | @retval Other Some other error; Error is printed |
| 350 | */ |
| 351 | |
| 352 | int Mrr_ordered_index_reader::get_next(range_id_t *range_info) |
| 353 | { |
| 354 | int res; |
| 355 | DBUG_ENTER("Mrr_ordered_index_reader::get_next" ); |
| 356 | |
| 357 | for(;;) |
| 358 | { |
| 359 | if (!scanning_key_val_iter) |
| 360 | { |
| 361 | while ((res= kv_it.init(this))) |
| 362 | { |
| 363 | if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE)) |
| 364 | DBUG_RETURN(res); /* Some fatal error */ |
| 365 | |
| 366 | if (key_buffer->is_empty()) |
| 367 | { |
| 368 | DBUG_RETURN(HA_ERR_END_OF_FILE); |
| 369 | } |
| 370 | } |
| 371 | scanning_key_val_iter= TRUE; |
| 372 | } |
| 373 | |
| 374 | if ((res= kv_it.get_next(range_info))) |
| 375 | { |
| 376 | scanning_key_val_iter= FALSE; |
| 377 | if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE)) |
| 378 | DBUG_RETURN(res); |
| 379 | kv_it.move_to_next_key_value(); |
| 380 | continue; |
| 381 | } |
| 382 | if (!skip_index_tuple(*range_info) && |
| 383 | !skip_record(*range_info, NULL)) |
| 384 | { |
| 385 | break; |
| 386 | } |
| 387 | /* Go get another (record, range_id) combination */ |
| 388 | } /* while */ |
| 389 | |
| 390 | DBUG_RETURN(0); |
| 391 | } |
| 392 | |
| 393 | |
| 394 | /* |
| 395 | Supply index reader with the O(1)space it needs for scan interrupt/restore |
| 396 | operation |
| 397 | */ |
| 398 | |
| 399 | bool Mrr_ordered_index_reader::set_interruption_temp_buffer(uint rowid_length, |
| 400 | uint key_len, |
| 401 | uint saved_pk_len, |
| 402 | uchar **space_start, |
| 403 | uchar *space_end) |
| 404 | { |
| 405 | if (space_end - *space_start <= (ptrdiff_t)(rowid_length + key_len + saved_pk_len)) |
| 406 | return TRUE; |
| 407 | support_scan_interruptions= TRUE; |
| 408 | |
| 409 | saved_rowid= *space_start; |
| 410 | *space_start += rowid_length; |
| 411 | |
| 412 | if (saved_pk_len) |
| 413 | { |
| 414 | saved_primary_key= *space_start; |
| 415 | *space_start += saved_pk_len; |
| 416 | } |
| 417 | else |
| 418 | saved_primary_key= NULL; |
| 419 | |
| 420 | saved_key_tuple= *space_start; |
| 421 | *space_start += key_len; |
| 422 | |
| 423 | have_saved_rowid= FALSE; |
| 424 | read_was_interrupted= FALSE; |
| 425 | return FALSE; |
| 426 | } |
| 427 | |
| 428 | void Mrr_ordered_index_reader::set_no_interruption_temp_buffer() |
| 429 | { |
| 430 | support_scan_interruptions= FALSE; |
| 431 | saved_key_tuple= saved_rowid= saved_primary_key= NULL; /* safety */ |
| 432 | have_saved_rowid= FALSE; |
| 433 | read_was_interrupted= FALSE; |
| 434 | } |
| 435 | |
| 436 | void Mrr_ordered_index_reader::interrupt_read() |
| 437 | { |
| 438 | DBUG_ASSERT(support_scan_interruptions); |
| 439 | TABLE *table= file->get_table(); |
| 440 | KEY *used_index= &table->key_info[file->active_index]; |
| 441 | /* Save the current key value */ |
| 442 | key_copy(saved_key_tuple, table->record[0], |
| 443 | used_index, used_index->key_length); |
| 444 | |
| 445 | if (saved_primary_key) |
| 446 | { |
| 447 | key_copy(saved_primary_key, table->record[0], |
| 448 | &table->key_info[table->s->primary_key], |
| 449 | table->key_info[table->s->primary_key].key_length); |
| 450 | } |
| 451 | read_was_interrupted= TRUE; |
| 452 | |
| 453 | /* Save the last rowid */ |
| 454 | memcpy(saved_rowid, file->ref, file->ref_length); |
| 455 | have_saved_rowid= TRUE; |
| 456 | } |
| 457 | |
| 458 | void Mrr_ordered_index_reader::position() |
| 459 | { |
| 460 | if (have_saved_rowid) |
| 461 | memcpy(file->ref, saved_rowid, file->ref_length); |
| 462 | else |
| 463 | Mrr_index_reader::position(); |
| 464 | } |
| 465 | |
| 466 | void Mrr_ordered_index_reader::resume_read() |
| 467 | { |
| 468 | TABLE *table= file->get_table(); |
| 469 | |
| 470 | if (!read_was_interrupted) |
| 471 | return; |
| 472 | |
| 473 | KEY *used_index= &table->key_info[file->active_index]; |
| 474 | key_restore(table->record[0], saved_key_tuple, |
| 475 | used_index, used_index->key_length); |
| 476 | if (saved_primary_key) |
| 477 | { |
| 478 | key_restore(table->record[0], saved_primary_key, |
| 479 | &table->key_info[table->s->primary_key], |
| 480 | table->key_info[table->s->primary_key].key_length); |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | |
| 485 | /** |
| 486 | Fill the buffer with (lookup_tuple, range_id) pairs and sort |
| 487 | |
| 488 | @return |
| 489 | 0 OK, the buffer is non-empty and sorted |
| 490 | HA_ERR_END_OF_FILE Source exhausted, the buffer is empty. |
| 491 | */ |
| 492 | |
| 493 | int Mrr_ordered_index_reader::refill_buffer(bool initial) |
| 494 | { |
| 495 | KEY_MULTI_RANGE cur_range; |
| 496 | DBUG_ENTER("Mrr_ordered_index_reader::refill_buffer" ); |
| 497 | |
| 498 | DBUG_ASSERT(key_buffer->is_empty()); |
| 499 | |
| 500 | if (source_exhausted) |
| 501 | DBUG_RETURN(HA_ERR_END_OF_FILE); |
| 502 | |
| 503 | buf_manager->reset_buffer_sizes(buf_manager->arg); |
| 504 | key_buffer->reset(); |
| 505 | key_buffer->setup_writing(keypar.key_size_in_keybuf, |
| 506 | is_mrr_assoc? sizeof(range_id_t) : 0); |
| 507 | |
| 508 | while (key_buffer->can_write() && |
| 509 | !(source_exhausted= mrr_funcs.next(mrr_iter, &cur_range))) |
| 510 | { |
| 511 | DBUG_ASSERT(cur_range.range_flag & EQ_RANGE); |
| 512 | |
| 513 | /* Put key, or {key, range_id} pair into the buffer */ |
| 514 | key_buffer->write_ptr1= keypar.use_key_pointers ? |
| 515 | (uchar*)&cur_range.start_key.key : |
| 516 | (uchar*)cur_range.start_key.key; |
| 517 | key_buffer->write_ptr2= (uchar*)&cur_range.ptr; |
| 518 | key_buffer->write(); |
| 519 | } |
| 520 | |
| 521 | /* Force get_next() to start with kv_it.init() call: */ |
| 522 | scanning_key_val_iter= FALSE; |
| 523 | |
| 524 | if (source_exhausted && key_buffer->is_empty()) |
| 525 | DBUG_RETURN(HA_ERR_END_OF_FILE); |
| 526 | |
| 527 | if (!initial) |
| 528 | { |
| 529 | /* This is a non-initial buffer fill and we've got a non-empty buffer */ |
| 530 | THD *thd= current_thd; |
| 531 | status_var_increment(thd->status_var.ha_mrr_key_refills_count); |
| 532 | } |
| 533 | |
| 534 | key_buffer->sort((key_buffer->type() == Lifo_buffer::FORWARD)? |
| 535 | (qsort2_cmp)Mrr_ordered_index_reader::compare_keys_reverse : |
| 536 | (qsort2_cmp)Mrr_ordered_index_reader::compare_keys, |
| 537 | this); |
| 538 | DBUG_RETURN(0); |
| 539 | } |
| 540 | |
| 541 | |
| 542 | int Mrr_ordered_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, |
| 543 | void *seq_init_param, uint n_ranges, |
| 544 | uint mode, Key_parameters *key_par_arg, |
| 545 | Lifo_buffer *key_buffer_arg, |
| 546 | Buffer_manager *buf_manager_arg) |
| 547 | { |
| 548 | file= h_arg; |
| 549 | key_buffer= key_buffer_arg; |
| 550 | buf_manager= buf_manager_arg; |
| 551 | keypar= *key_par_arg; |
| 552 | |
| 553 | KEY *key_info= &file->get_table()->key_info[file->active_index]; |
| 554 | keypar.index_ranges_unique= MY_TEST(key_info->flags & HA_NOSAME && |
| 555 | key_info->user_defined_key_parts == |
| 556 | my_count_bits(keypar.key_tuple_map)); |
| 557 | |
| 558 | mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); |
| 559 | is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION); |
| 560 | mrr_funcs= *seq_funcs; |
| 561 | source_exhausted= FALSE; |
| 562 | read_was_interrupted= false; |
| 563 | have_saved_rowid= FALSE; |
| 564 | return 0; |
| 565 | } |
| 566 | |
| 567 | |
| 568 | static int rowid_cmp_reverse(void *file, uchar *a, uchar *b) |
| 569 | { |
| 570 | return - ((handler*)file)->cmp_ref(a, b); |
| 571 | } |
| 572 | |
| 573 | |
| 574 | int Mrr_ordered_rndpos_reader::init(handler *h_arg, |
| 575 | Mrr_index_reader *index_reader_arg, |
| 576 | uint mode, |
| 577 | Lifo_buffer *buf) |
| 578 | { |
| 579 | file= h_arg; |
| 580 | index_reader= index_reader_arg; |
| 581 | rowid_buffer= buf; |
| 582 | is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION); |
| 583 | index_reader_exhausted= FALSE; |
| 584 | index_reader_needs_refill= TRUE; |
| 585 | return 0; |
| 586 | } |
| 587 | |
| 588 | |
| 589 | /** |
| 590 | DS-MRR: Fill and sort the rowid buffer |
| 591 | |
| 592 | Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into |
| 593 | buffer. When the buffer is full or scan is completed, sort the buffer by |
| 594 | rowid and return. |
| 595 | |
| 596 | When this function returns, either rowid buffer is not empty, or the source |
| 597 | of lookup keys (i.e. ranges) is exhaused. |
| 598 | |
| 599 | @retval 0 OK, the next portion of rowids is in the buffer, |
| 600 | properly ordered |
| 601 | @retval other Error |
| 602 | */ |
| 603 | |
| 604 | int Mrr_ordered_rndpos_reader::refill_buffer(bool initial) |
| 605 | { |
| 606 | int res; |
| 607 | bool first_call= initial; |
| 608 | DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_buffer" ); |
| 609 | |
| 610 | if (index_reader_exhausted) |
| 611 | DBUG_RETURN(HA_ERR_END_OF_FILE); |
| 612 | |
| 613 | while (initial || index_reader_needs_refill || |
| 614 | (res= refill_from_index_reader()) == HA_ERR_END_OF_FILE) |
| 615 | { |
| 616 | if ((res= index_reader->refill_buffer(initial))) |
| 617 | { |
| 618 | if (res == HA_ERR_END_OF_FILE) |
| 619 | index_reader_exhausted= TRUE; |
| 620 | break; |
| 621 | } |
| 622 | initial= FALSE; |
| 623 | index_reader_needs_refill= FALSE; |
| 624 | } |
| 625 | |
| 626 | if (!first_call && !index_reader_exhausted) |
| 627 | { |
| 628 | /* Ok, this was a successful buffer refill operation */ |
| 629 | THD *thd= current_thd; |
| 630 | status_var_increment(thd->status_var.ha_mrr_rowid_refills_count); |
| 631 | } |
| 632 | |
| 633 | DBUG_RETURN(res); |
| 634 | } |
| 635 | |
| 636 | |
| 637 | void Mrr_index_reader::position() |
| 638 | { |
| 639 | file->position(file->get_table()->record[0]); |
| 640 | } |
| 641 | |
| 642 | |
| 643 | /* |
| 644 | @brief Try to refill the rowid buffer without calling |
| 645 | index_reader->refill_buffer(). |
| 646 | */ |
| 647 | |
| 648 | int Mrr_ordered_rndpos_reader::refill_from_index_reader() |
| 649 | { |
| 650 | range_id_t range_info; |
| 651 | int res; |
| 652 | DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_from_index_reader" ); |
| 653 | |
| 654 | DBUG_ASSERT(rowid_buffer->is_empty()); |
| 655 | index_rowid= index_reader->get_rowid_ptr(); |
| 656 | rowid_buffer->reset(); |
| 657 | rowid_buffer->setup_writing(file->ref_length, |
| 658 | is_mrr_assoc? sizeof(range_id_t) : 0); |
| 659 | |
| 660 | last_identical_rowid= NULL; |
| 661 | |
| 662 | index_reader->resume_read(); |
| 663 | while (rowid_buffer->can_write()) |
| 664 | { |
| 665 | res= index_reader->get_next(&range_info); |
| 666 | |
| 667 | if (res) |
| 668 | { |
| 669 | if (res != HA_ERR_END_OF_FILE) |
| 670 | DBUG_RETURN(res); |
| 671 | index_reader_needs_refill=TRUE; |
| 672 | break; |
| 673 | } |
| 674 | |
| 675 | index_reader->position(); |
| 676 | |
| 677 | /* Put rowid, or {rowid, range_id} pair into the buffer */ |
| 678 | rowid_buffer->write_ptr1= index_rowid; |
| 679 | rowid_buffer->write_ptr2= (uchar*)&range_info; |
| 680 | rowid_buffer->write(); |
| 681 | } |
| 682 | |
| 683 | /* |
| 684 | When index_reader_needs_refill=TRUE, this means we've got all of index |
| 685 | tuples for lookups keys that index_reader had. We are not in the middle |
| 686 | of an index read, so there is no need to call interrupt_read. |
| 687 | |
| 688 | Actually, we must not call interrupt_read(), because it could be that we |
| 689 | haven't read a single row (because all index lookups returned |
| 690 | HA_ERR_KEY_NOT_FOUND). In this case, interrupt_read() will cause [harmless] |
| 691 | valgrind warnings when trying to save garbage from table->record[0]. |
| 692 | */ |
| 693 | if (!index_reader_needs_refill) |
| 694 | index_reader->interrupt_read(); |
| 695 | /* Sort the buffer contents by rowid */ |
| 696 | rowid_buffer->sort((qsort2_cmp)rowid_cmp_reverse, (void*)file); |
| 697 | |
| 698 | rowid_buffer->setup_reading(file->ref_length, |
| 699 | is_mrr_assoc ? sizeof(range_id_t) : 0); |
| 700 | DBUG_RETURN(rowid_buffer->is_empty()? HA_ERR_END_OF_FILE : 0); |
| 701 | } |
| 702 | |
| 703 | |
| 704 | /* |
| 705 | Get the next {record, range_id} using ordered array of rowid+range_id pairs |
| 706 | |
| 707 | @note |
| 708 | Since we have sorted rowids, we try not to make multiple rnd_pos() calls |
| 709 | with the same rowid value. |
| 710 | */ |
| 711 | |
| 712 | int Mrr_ordered_rndpos_reader::get_next(range_id_t *range_info) |
| 713 | { |
| 714 | int res; |
| 715 | |
| 716 | /* |
| 717 | First, check if rowid buffer has elements with the same rowid value as |
| 718 | the previous. |
| 719 | */ |
| 720 | while (last_identical_rowid) |
| 721 | { |
| 722 | /* |
| 723 | Current record (the one we've returned in previous call) was obtained |
| 724 | from a rowid that matched multiple range_ids. Return this record again, |
| 725 | with next matching range_id. |
| 726 | */ |
| 727 | (void)rowid_buffer->read(); |
| 728 | |
| 729 | if (rowid_buffer->read_ptr1 == last_identical_rowid) |
| 730 | last_identical_rowid= NULL; /* reached the last of identical rowids */ |
| 731 | |
| 732 | if (!is_mrr_assoc) |
| 733 | return 0; |
| 734 | |
| 735 | memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t)); |
| 736 | if (!index_reader->skip_record(*range_info, rowid_buffer->read_ptr1)) |
| 737 | return 0; |
| 738 | } |
| 739 | |
| 740 | /* |
| 741 | Ok, last_identical_rowid==NULL, it's time to read next different rowid |
| 742 | value and get record for it. |
| 743 | */ |
| 744 | for(;;) |
| 745 | { |
| 746 | /* Return eof if there are no rowids in the buffer after re-fill attempt */ |
| 747 | if (rowid_buffer->read()) |
| 748 | return HA_ERR_END_OF_FILE; |
| 749 | |
| 750 | if (is_mrr_assoc) |
| 751 | { |
| 752 | memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t)); |
| 753 | if (index_reader->skip_record(*range_info, rowid_buffer->read_ptr1)) |
| 754 | continue; |
| 755 | } |
| 756 | |
| 757 | res= file->ha_rnd_pos(file->get_table()->record[0], |
| 758 | rowid_buffer->read_ptr1); |
| 759 | |
| 760 | if (res) |
| 761 | return res; /* Some fatal error */ |
| 762 | |
| 763 | break; /* Got another record */ |
| 764 | } |
| 765 | |
| 766 | /* |
| 767 | Check if subsequent buffer elements have the same rowid value as this |
| 768 | one. If yes, remember this fact so that we don't make any more rnd_pos() |
| 769 | calls with this value. |
| 770 | |
| 771 | Note: this implies that SQL layer doesn't touch table->record[0] |
| 772 | between calls. |
| 773 | */ |
| 774 | Lifo_buffer_iterator it; |
| 775 | it.init(rowid_buffer); |
| 776 | while (!it.read()) |
| 777 | { |
| 778 | if (file->cmp_ref(it.read_ptr1, rowid_buffer->read_ptr1)) |
| 779 | break; |
| 780 | last_identical_rowid= it.read_ptr1; |
| 781 | } |
| 782 | return 0; |
| 783 | } |
| 784 | |
| 785 | |
| 786 | /**************************************************************************** |
| 787 | * Top-level DS-MRR implementation functions (the ones called by storage engine) |
| 788 | ***************************************************************************/ |
| 789 | |
| 790 | /** |
| 791 | DS-MRR: Initialize and start MRR scan |
| 792 | |
| 793 | Initialize and start the MRR scan. Depending on the mode parameter, this |
| 794 | may use default or DS-MRR implementation. |
| 795 | |
| 796 | @param h_arg Table handler to be used |
| 797 | @param key Index to be used |
| 798 | @param seq_funcs Interval sequence enumeration functions |
| 799 | @param seq_init_param Interval sequence enumeration parameter |
| 800 | @param n_ranges Number of ranges in the sequence. |
| 801 | @param mode HA_MRR_* modes to use |
| 802 | @param buf INOUT Buffer to use |
| 803 | |
| 804 | @retval 0 Ok, Scan started. |
| 805 | @retval other Error |
| 806 | */ |
| 807 | |
| 808 | int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, |
| 809 | void *seq_init_param, uint n_ranges, uint mode, |
| 810 | HANDLER_BUFFER *buf) |
| 811 | { |
| 812 | THD *thd= h_arg->get_table()->in_use; |
| 813 | int res; |
| 814 | Key_parameters keypar; |
| 815 | uint UNINIT_VAR(key_buff_elem_size); /* set/used when do_sort_keys==TRUE */ |
| 816 | handler *h_idx; |
| 817 | Mrr_ordered_rndpos_reader *disk_strategy= NULL; |
| 818 | bool do_sort_keys= FALSE; |
| 819 | DBUG_ENTER("DsMrr_impl::dsmrr_init" ); |
| 820 | /* |
| 821 | index_merge may invoke a scan on an object for which dsmrr_info[_const] |
| 822 | has not been called, so set the owner handler here as well. |
| 823 | */ |
| 824 | primary_file= h_arg; |
| 825 | is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION); |
| 826 | |
| 827 | strategy_exhausted= FALSE; |
| 828 | |
| 829 | /* By default, have do-nothing buffer manager */ |
| 830 | buf_manager.arg= this; |
| 831 | buf_manager.reset_buffer_sizes= do_nothing; |
| 832 | buf_manager.redistribute_buffer_space= do_nothing; |
| 833 | |
| 834 | if (mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) |
| 835 | goto use_default_impl; |
| 836 | |
| 837 | /* |
| 838 | Determine whether we'll need to do key sorting and/or rnd_pos() scan |
| 839 | */ |
| 840 | index_strategy= NULL; |
| 841 | if ((mode & HA_MRR_SINGLE_POINT) && |
| 842 | optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)) |
| 843 | { |
| 844 | do_sort_keys= TRUE; |
| 845 | index_strategy= &reader_factory.ordered_index_reader; |
| 846 | } |
| 847 | else |
| 848 | index_strategy= &reader_factory.simple_index_reader; |
| 849 | |
| 850 | strategy= index_strategy; |
| 851 | /* |
| 852 | We don't need a rowid-to-rndpos step if |
| 853 | - We're doing a scan on clustered primary key |
| 854 | - [In the future] We're doing an index_only read |
| 855 | */ |
| 856 | DBUG_ASSERT(primary_file->inited == handler::INDEX || |
| 857 | (primary_file->inited == handler::RND && |
| 858 | secondary_file && |
| 859 | secondary_file->inited == handler::INDEX)); |
| 860 | |
| 861 | h_idx= (primary_file->inited == handler::INDEX)? primary_file: secondary_file; |
| 862 | keyno= h_idx->active_index; |
| 863 | |
| 864 | if (!(keyno == table->s->primary_key && h_idx->primary_key_is_clustered())) |
| 865 | { |
| 866 | strategy= disk_strategy= &reader_factory.ordered_rndpos_reader; |
| 867 | } |
| 868 | |
| 869 | full_buf= buf->buffer; |
| 870 | full_buf_end= buf->buffer_end; |
| 871 | |
| 872 | if (do_sort_keys) |
| 873 | { |
| 874 | /* Pre-calculate some parameters of key sorting */ |
| 875 | keypar.use_key_pointers= MY_TEST(mode & HA_MRR_MATERIALIZED_KEYS); |
| 876 | seq_funcs->get_key_info(seq_init_param, &keypar.key_tuple_length, |
| 877 | &keypar.key_tuple_map); |
| 878 | keypar.key_size_in_keybuf= keypar.use_key_pointers? |
| 879 | sizeof(char*) : keypar.key_tuple_length; |
| 880 | key_buff_elem_size= keypar.key_size_in_keybuf + (int)is_mrr_assoc * sizeof(void*); |
| 881 | |
| 882 | /* Ordered index reader needs some space to store an index tuple */ |
| 883 | if (strategy != index_strategy) |
| 884 | { |
| 885 | uint saved_pk_length=0; |
| 886 | if (h_idx->primary_key_is_clustered()) |
| 887 | { |
| 888 | uint pk= h_idx->get_table()->s->primary_key; |
| 889 | if (pk != MAX_KEY) |
| 890 | saved_pk_length= h_idx->get_table()->key_info[pk].key_length; |
| 891 | } |
| 892 | |
| 893 | KEY *used_index= &h_idx->get_table()->key_info[h_idx->active_index]; |
| 894 | if (reader_factory.ordered_index_reader. |
| 895 | set_interruption_temp_buffer(primary_file->ref_length, |
| 896 | used_index->key_length, |
| 897 | saved_pk_length, |
| 898 | &full_buf, full_buf_end)) |
| 899 | goto use_default_impl; |
| 900 | } |
| 901 | else |
| 902 | reader_factory.ordered_index_reader.set_no_interruption_temp_buffer(); |
| 903 | } |
| 904 | |
| 905 | if (strategy == index_strategy) |
| 906 | { |
| 907 | /* |
| 908 | Index strategy alone handles the record retrieval. Give all buffer space |
| 909 | to it. Key buffer should have forward orientation so we can return the |
| 910 | end of it. |
| 911 | */ |
| 912 | key_buffer= &forward_key_buf; |
| 913 | key_buffer->set_buffer_space(full_buf, full_buf_end); |
| 914 | |
| 915 | /* Safety: specify that rowid buffer has zero size: */ |
| 916 | rowid_buffer.set_buffer_space(full_buf_end, full_buf_end); |
| 917 | |
| 918 | if (do_sort_keys && !key_buffer->have_space_for(key_buff_elem_size)) |
| 919 | goto use_default_impl; |
| 920 | |
| 921 | if ((res= index_strategy->init(primary_file, seq_funcs, seq_init_param, n_ranges, |
| 922 | mode, &keypar, key_buffer, &buf_manager))) |
| 923 | goto error; |
| 924 | } |
| 925 | else |
| 926 | { |
| 927 | /* We'll have both index and rndpos strategies working together */ |
| 928 | if (do_sort_keys) |
| 929 | { |
| 930 | /* Both strategies will need buffer space, share the buffer */ |
| 931 | if (setup_buffer_sharing(keypar.key_size_in_keybuf, keypar.key_tuple_map)) |
| 932 | goto use_default_impl; |
| 933 | |
| 934 | buf_manager.reset_buffer_sizes= reset_buffer_sizes; |
| 935 | buf_manager.redistribute_buffer_space= redistribute_buffer_space; |
| 936 | } |
| 937 | else |
| 938 | { |
| 939 | /* index strategy doesn't need buffer, give all space to rowids*/ |
| 940 | rowid_buffer.set_buffer_space(full_buf, full_buf_end); |
| 941 | if (!rowid_buffer.have_space_for(primary_file->ref_length + |
| 942 | (int)is_mrr_assoc * sizeof(range_id_t))) |
| 943 | goto use_default_impl; |
| 944 | } |
| 945 | |
| 946 | if ((res= setup_two_handlers())) |
| 947 | goto error; |
| 948 | |
| 949 | if ((res= index_strategy->init(secondary_file, seq_funcs, seq_init_param, |
| 950 | n_ranges, mode, &keypar, key_buffer, |
| 951 | &buf_manager)) || |
| 952 | (res= disk_strategy->init(primary_file, index_strategy, mode, |
| 953 | &rowid_buffer))) |
| 954 | { |
| 955 | goto error; |
| 956 | } |
| 957 | } |
| 958 | |
| 959 | /* |
| 960 | At this point, we're sure that we're running a native MRR scan (i.e. we |
| 961 | didnt fall back to default implementation for some reason). |
| 962 | */ |
| 963 | status_var_increment(thd->status_var.ha_mrr_init_count); |
| 964 | |
| 965 | res= strategy->refill_buffer(TRUE); |
| 966 | if (res) |
| 967 | { |
| 968 | if (res != HA_ERR_END_OF_FILE) |
| 969 | goto error; |
| 970 | strategy_exhausted= TRUE; |
| 971 | } |
| 972 | |
| 973 | /* |
| 974 | If we have scanned through all intervals in *seq, then adjust *buf to |
| 975 | indicate that the remaining buffer space will not be used. |
| 976 | */ |
| 977 | // if (dsmrr_eof) |
| 978 | // buf->end_of_used_area= rowid_buffer.end_of_space(); |
| 979 | |
| 980 | |
| 981 | DBUG_RETURN(0); |
| 982 | error: |
| 983 | close_second_handler(); |
| 984 | /* Safety, not really needed but: */ |
| 985 | strategy= NULL; |
| 986 | DBUG_RETURN(res); |
| 987 | |
| 988 | use_default_impl: |
| 989 | if (primary_file->inited != handler::INDEX) |
| 990 | { |
| 991 | /* We can get here when |
| 992 | - we've previously successfully done a DS-MRR scan (and so have |
| 993 | secondary_file!= NULL, secondary_file->inited= INDEX, |
| 994 | primary_file->inited=RND) |
| 995 | - for this invocation, we haven't got enough buffer space, and so we |
| 996 | have to use the default MRR implementation. |
| 997 | |
| 998 | note: primary_file->ha_index_end() will call dsmrr_close() which will |
| 999 | close/destroy the secondary_file, this is intentional. |
| 1000 | (Yes this is slow, but one can't expect performance with join buffer |
| 1001 | so small that it can accomodate one rowid and one index tuple) |
| 1002 | */ |
| 1003 | if ((res= primary_file->ha_rnd_end()) || |
| 1004 | (res= primary_file->ha_index_init(keyno, MY_TEST(mode & HA_MRR_SORTED)))) |
| 1005 | { |
| 1006 | DBUG_RETURN(res); |
| 1007 | } |
| 1008 | } |
| 1009 | /* Call correct init function and assign to top level object */ |
| 1010 | Mrr_simple_index_reader *s= &reader_factory.simple_index_reader; |
| 1011 | res= s->init(primary_file, seq_funcs, seq_init_param, n_ranges, mode, NULL, |
| 1012 | NULL, NULL); |
| 1013 | strategy= s; |
| 1014 | DBUG_RETURN(res); |
| 1015 | } |
| 1016 | |
| 1017 | |
| 1018 | /* |
| 1019 | Whatever the current state is, make it so that we have two handler objects: |
| 1020 | - primary_file - initialized for rnd_pos() scan |
| 1021 | - secondary_file - initialized for scanning the index specified in |
| 1022 | this->keyno |
| 1023 | RETURN |
| 1024 | 0 OK |
| 1025 | HA_XXX Error code |
| 1026 | */ |
| 1027 | |
| 1028 | int DsMrr_impl::setup_two_handlers() |
| 1029 | { |
| 1030 | int res; |
| 1031 | THD *thd= primary_file->get_table()->in_use; |
| 1032 | DBUG_ENTER("DsMrr_impl::setup_two_handlers" ); |
| 1033 | if (!secondary_file) |
| 1034 | { |
| 1035 | handler *new_h2; |
| 1036 | Item *pushed_cond= NULL; |
| 1037 | DBUG_ASSERT(primary_file->inited == handler::INDEX); |
| 1038 | /* Create a separate handler object to do rnd_pos() calls. */ |
| 1039 | /* |
| 1040 | ::clone() takes up a lot of stack, especially on 64 bit platforms. |
| 1041 | The constant 5 is an empiric result. |
| 1042 | */ |
| 1043 | if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) |
| 1044 | DBUG_RETURN(1); |
| 1045 | |
| 1046 | /* Create a separate handler object to do rnd_pos() calls. */ |
| 1047 | if (!(new_h2= primary_file->clone(primary_file->get_table()->s-> |
| 1048 | normalized_path.str, |
| 1049 | thd->mem_root)) || |
| 1050 | new_h2->ha_external_lock(thd, F_RDLCK)) |
| 1051 | { |
| 1052 | delete new_h2; |
| 1053 | DBUG_RETURN(1); |
| 1054 | } |
| 1055 | |
| 1056 | if (keyno == primary_file->pushed_idx_cond_keyno) |
| 1057 | pushed_cond= primary_file->pushed_idx_cond; |
| 1058 | |
| 1059 | Mrr_reader *save_strategy= strategy; |
| 1060 | strategy= NULL; |
| 1061 | /* |
| 1062 | Caution: this call will invoke this->dsmrr_close(). Do not put the |
| 1063 | created secondary table handler new_h2 into this->secondary_file or it |
| 1064 | will delete it. Also, save the picked strategy |
| 1065 | */ |
| 1066 | res= primary_file->ha_index_end(); |
| 1067 | |
| 1068 | strategy= save_strategy; |
| 1069 | secondary_file= new_h2; |
| 1070 | |
| 1071 | if (res || (res= (primary_file->ha_rnd_init(FALSE)))) |
| 1072 | goto error; |
| 1073 | |
| 1074 | table->prepare_for_position(); |
| 1075 | secondary_file->extra(HA_EXTRA_KEYREAD); |
| 1076 | secondary_file->mrr_iter= primary_file->mrr_iter; |
| 1077 | |
| 1078 | if ((res= secondary_file->ha_index_init(keyno, FALSE))) |
| 1079 | goto error; |
| 1080 | |
| 1081 | if (pushed_cond) |
| 1082 | secondary_file->idx_cond_push(keyno, pushed_cond); |
| 1083 | } |
| 1084 | else |
| 1085 | { |
| 1086 | DBUG_ASSERT(secondary_file && secondary_file->inited==handler::INDEX); |
| 1087 | /* |
| 1088 | We get here when the access alternates betwen MRR scan(s) and non-MRR |
| 1089 | scans. |
| 1090 | |
| 1091 | Calling primary_file->index_end() will invoke dsmrr_close() for this object, |
| 1092 | which will delete secondary_file. We need to keep it, so put it away and dont |
| 1093 | let it be deleted: |
| 1094 | */ |
| 1095 | if (primary_file->inited == handler::INDEX) |
| 1096 | { |
| 1097 | handler *save_h2= secondary_file; |
| 1098 | Mrr_reader *save_strategy= strategy; |
| 1099 | secondary_file= NULL; |
| 1100 | strategy= NULL; |
| 1101 | res= primary_file->ha_index_end(); |
| 1102 | secondary_file= save_h2; |
| 1103 | strategy= save_strategy; |
| 1104 | if (res) |
| 1105 | goto error; |
| 1106 | } |
| 1107 | if ((primary_file->inited != handler::RND) && |
| 1108 | (res= primary_file->ha_rnd_init(FALSE))) |
| 1109 | goto error; |
| 1110 | } |
| 1111 | DBUG_RETURN(0); |
| 1112 | |
| 1113 | error: |
| 1114 | DBUG_RETURN(res); |
| 1115 | } |
| 1116 | |
| 1117 | |
| 1118 | void DsMrr_impl::close_second_handler() |
| 1119 | { |
| 1120 | if (secondary_file) |
| 1121 | { |
| 1122 | secondary_file->extra(HA_EXTRA_NO_KEYREAD); |
| 1123 | secondary_file->ha_index_or_rnd_end(); |
| 1124 | secondary_file->ha_external_lock(current_thd, F_UNLCK); |
| 1125 | secondary_file->ha_close(); |
| 1126 | delete secondary_file; |
| 1127 | secondary_file= NULL; |
| 1128 | } |
| 1129 | } |
| 1130 | |
| 1131 | |
| 1132 | void DsMrr_impl::dsmrr_close() |
| 1133 | { |
| 1134 | DBUG_ENTER("DsMrr_impl::dsmrr_close" ); |
| 1135 | close_second_handler(); |
| 1136 | strategy= NULL; |
| 1137 | DBUG_VOID_RETURN; |
| 1138 | } |
| 1139 | |
| 1140 | |
| 1141 | /* |
| 1142 | my_qsort2-compatible static member function to compare key tuples |
| 1143 | */ |
| 1144 | |
| 1145 | int Mrr_ordered_index_reader::compare_keys(void* arg, uchar* key1_arg, |
| 1146 | uchar* key2_arg) |
| 1147 | { |
| 1148 | Mrr_ordered_index_reader *reader= (Mrr_ordered_index_reader*)arg; |
| 1149 | TABLE *table= reader->file->get_table(); |
| 1150 | KEY_PART_INFO *part= table->key_info[reader->file->active_index].key_part; |
| 1151 | uchar *key1, *key2; |
| 1152 | |
| 1153 | if (reader->keypar.use_key_pointers) |
| 1154 | { |
| 1155 | /* the buffer stores pointers to keys, get to the keys */ |
| 1156 | memcpy(&key1, key1_arg, sizeof(char*)); |
| 1157 | memcpy(&key2, key2_arg, sizeof(char*)); |
| 1158 | } |
| 1159 | else |
| 1160 | { |
| 1161 | key1= key1_arg; |
| 1162 | key2= key2_arg; |
| 1163 | } |
| 1164 | |
| 1165 | return key_tuple_cmp(part, key1, key2, reader->keypar.key_tuple_length); |
| 1166 | } |
| 1167 | |
| 1168 | |
| 1169 | int Mrr_ordered_index_reader::compare_keys_reverse(void* arg, uchar* key1, |
| 1170 | uchar* key2) |
| 1171 | { |
| 1172 | return -compare_keys(arg, key1, key2); |
| 1173 | } |
| 1174 | |
| 1175 | |
| 1176 | /** |
| 1177 | Set the buffer space to be shared between rowid and key buffer |
| 1178 | |
| 1179 | @return FALSE ok |
| 1180 | @return TRUE There is so little buffer space that we won't be able to use |
| 1181 | the strategy. |
| 1182 | This happens when we don't have enough space for one rowid |
| 1183 | element and one key element so this is mainly targeted at |
| 1184 | testing. |
| 1185 | */ |
| 1186 | |
| 1187 | bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf, |
| 1188 | key_part_map key_tuple_map) |
| 1189 | { |
| 1190 | long key_buff_elem_size= key_size_in_keybuf + |
| 1191 | (int)is_mrr_assoc * sizeof(range_id_t); |
| 1192 | |
| 1193 | KEY *key_info= &primary_file->get_table()->key_info[keyno]; |
| 1194 | /* |
| 1195 | Ok if we got here we need to allocate one part of the buffer |
| 1196 | for keys and another part for rowids. |
| 1197 | */ |
| 1198 | ulonglong rowid_buf_elem_size= primary_file->ref_length + |
| 1199 | (int)is_mrr_assoc * sizeof(range_id_t); |
| 1200 | |
| 1201 | /* |
| 1202 | Use rec_per_key statistics as a basis to find out how many rowids |
| 1203 | we'll get for each key value. |
| 1204 | TODO: what should be the default value to use when there is no |
| 1205 | statistics? |
| 1206 | */ |
| 1207 | uint parts= my_count_bits(key_tuple_map); |
| 1208 | ha_rows rpc; |
| 1209 | ulonglong rowids_size= rowid_buf_elem_size; |
| 1210 | if ((rpc= (ha_rows) key_info->actual_rec_per_key(parts - 1))) |
| 1211 | rowids_size= rowid_buf_elem_size * rpc; |
| 1212 | |
| 1213 | double fraction_for_rowids= |
| 1214 | (ulonglong2double(rowids_size) / |
| 1215 | (ulonglong2double(rowids_size) + key_buff_elem_size)); |
| 1216 | |
| 1217 | ptrdiff_t bytes_for_rowids= |
| 1218 | (ptrdiff_t)floor(0.5 + fraction_for_rowids * (full_buf_end - full_buf)); |
| 1219 | |
| 1220 | ptrdiff_t bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids; |
| 1221 | |
| 1222 | if (bytes_for_keys < key_buff_elem_size + 1 || |
| 1223 | bytes_for_rowids < (ptrdiff_t)rowid_buf_elem_size + 1) |
| 1224 | return TRUE; /* Failed to provide minimum space for one of the buffers */ |
| 1225 | |
| 1226 | rowid_buffer_end= full_buf + bytes_for_rowids; |
| 1227 | rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end); |
| 1228 | key_buffer= &backward_key_buf; |
| 1229 | key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end); |
| 1230 | |
| 1231 | /* The above code guarantees that the buffers are big enough */ |
| 1232 | DBUG_ASSERT(key_buffer->have_space_for(key_buff_elem_size) && |
| 1233 | rowid_buffer.have_space_for((size_t)rowid_buf_elem_size)); |
| 1234 | |
| 1235 | return FALSE; |
| 1236 | } |
| 1237 | |
| 1238 | |
| 1239 | void DsMrr_impl::do_nothing(void *dsmrr_arg) |
| 1240 | { |
| 1241 | /* Do nothing */ |
| 1242 | } |
| 1243 | |
| 1244 | |
| 1245 | void DsMrr_impl::reset_buffer_sizes(void *dsmrr_arg) |
| 1246 | { |
| 1247 | DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg; |
| 1248 | dsmrr->rowid_buffer.set_buffer_space(dsmrr->full_buf, |
| 1249 | dsmrr->rowid_buffer_end); |
| 1250 | dsmrr->key_buffer->set_buffer_space(dsmrr->rowid_buffer_end, |
| 1251 | dsmrr->full_buf_end); |
| 1252 | } |
| 1253 | |
| 1254 | |
| 1255 | /* |
| 1256 | Take unused space from the key buffer and give it to the rowid buffer |
| 1257 | */ |
| 1258 | |
| 1259 | void DsMrr_impl::redistribute_buffer_space(void *dsmrr_arg) |
| 1260 | { |
| 1261 | DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg; |
| 1262 | uchar *unused_start, *unused_end; |
| 1263 | dsmrr->key_buffer->remove_unused_space(&unused_start, &unused_end); |
| 1264 | dsmrr->rowid_buffer.grow(unused_start, unused_end); |
| 1265 | } |
| 1266 | |
| 1267 | |
| 1268 | /* |
| 1269 | @brief Initialize the iterator |
| 1270 | |
| 1271 | @note |
| 1272 | Initialize the iterator to produce matches for the key of the first element |
| 1273 | in owner_arg->key_buffer |
| 1274 | |
| 1275 | @retval 0 OK |
| 1276 | @retval HA_ERR_END_OF_FILE Either the owner->key_buffer is empty or |
| 1277 | no matches for the key we've tried (check |
| 1278 | key_buffer->is_empty() to tell these apart) |
| 1279 | @retval other code Fatal error |
| 1280 | */ |
| 1281 | |
| 1282 | int Key_value_records_iterator::init(Mrr_ordered_index_reader *owner_arg) |
| 1283 | { |
| 1284 | int res; |
| 1285 | owner= owner_arg; |
| 1286 | |
| 1287 | identical_key_it.init(owner->key_buffer); |
| 1288 | owner->key_buffer->setup_reading(owner->keypar.key_size_in_keybuf, |
| 1289 | owner->is_mrr_assoc ? sizeof(void*) : 0); |
| 1290 | |
| 1291 | if (identical_key_it.read()) |
| 1292 | return HA_ERR_END_OF_FILE; |
| 1293 | |
| 1294 | uchar *key_in_buf= last_identical_key_ptr= identical_key_it.read_ptr1; |
| 1295 | |
| 1296 | uchar *index_tuple= key_in_buf; |
| 1297 | if (owner->keypar.use_key_pointers) |
| 1298 | memcpy(&index_tuple, key_in_buf, sizeof(char*)); |
| 1299 | |
| 1300 | /* Check out how many more identical keys are following */ |
| 1301 | while (!identical_key_it.read()) |
| 1302 | { |
| 1303 | if (Mrr_ordered_index_reader::compare_keys(owner, key_in_buf, |
| 1304 | identical_key_it.read_ptr1)) |
| 1305 | break; |
| 1306 | last_identical_key_ptr= identical_key_it.read_ptr1; |
| 1307 | } |
| 1308 | identical_key_it.init(owner->key_buffer); |
| 1309 | res= owner->file->ha_index_read_map(owner->file->get_table()->record[0], |
| 1310 | index_tuple, |
| 1311 | owner->keypar.key_tuple_map, |
| 1312 | HA_READ_KEY_EXACT); |
| 1313 | |
| 1314 | if (res) |
| 1315 | { |
| 1316 | /* Failed to find any matching records */ |
| 1317 | move_to_next_key_value(); |
| 1318 | return res; |
| 1319 | } |
| 1320 | owner->have_saved_rowid= FALSE; |
| 1321 | get_next_row= FALSE; |
| 1322 | return 0; |
| 1323 | } |
| 1324 | |
| 1325 | |
| 1326 | int Key_value_records_iterator::get_next(range_id_t *range_info) |
| 1327 | { |
| 1328 | int res; |
| 1329 | |
| 1330 | if (get_next_row) |
| 1331 | { |
| 1332 | if (owner->keypar.index_ranges_unique) |
| 1333 | { |
| 1334 | /* We're using a full unique key, no point to call index_next_same */ |
| 1335 | return HA_ERR_END_OF_FILE; |
| 1336 | } |
| 1337 | |
| 1338 | handler *h= owner->file; |
| 1339 | uchar *lookup_key; |
| 1340 | if (owner->keypar.use_key_pointers) |
| 1341 | memcpy(&lookup_key, identical_key_it.read_ptr1, sizeof(void*)); |
| 1342 | else |
| 1343 | lookup_key= identical_key_it.read_ptr1; |
| 1344 | |
| 1345 | if ((res= h->ha_index_next_same(h->get_table()->record[0], |
| 1346 | lookup_key, |
| 1347 | owner->keypar.key_tuple_length))) |
| 1348 | { |
| 1349 | /* It's either HA_ERR_END_OF_FILE or some other error */ |
| 1350 | return res; |
| 1351 | } |
| 1352 | identical_key_it.init(owner->key_buffer); |
| 1353 | owner->have_saved_rowid= FALSE; |
| 1354 | get_next_row= FALSE; |
| 1355 | } |
| 1356 | |
| 1357 | identical_key_it.read(); /* This gets us next range_id */ |
| 1358 | memcpy(range_info, identical_key_it.read_ptr2, sizeof(range_id_t)); |
| 1359 | |
| 1360 | if (!last_identical_key_ptr || |
| 1361 | (identical_key_it.read_ptr1 == last_identical_key_ptr)) |
| 1362 | { |
| 1363 | /* |
| 1364 | We've reached the last of the identical keys that current record is a |
| 1365 | match for. Set get_next_row=TRUE so that we read the next index record |
| 1366 | on the next call to this function. |
| 1367 | */ |
| 1368 | get_next_row= TRUE; |
| 1369 | } |
| 1370 | return 0; |
| 1371 | } |
| 1372 | |
| 1373 | |
| 1374 | void Key_value_records_iterator::move_to_next_key_value() |
| 1375 | { |
| 1376 | while (!owner->key_buffer->read() && |
| 1377 | (owner->key_buffer->read_ptr1 != last_identical_key_ptr)) {} |
| 1378 | } |
| 1379 | |
| 1380 | |
| 1381 | /** |
| 1382 | DS-MRR implementation: multi_range_read_next() function. |
| 1383 | |
| 1384 | Calling convention is like multi_range_read_next() has. |
| 1385 | */ |
| 1386 | |
| 1387 | int DsMrr_impl::dsmrr_next(range_id_t *range_info) |
| 1388 | { |
| 1389 | int res; |
| 1390 | if (strategy_exhausted) |
| 1391 | return HA_ERR_END_OF_FILE; |
| 1392 | |
| 1393 | while ((res= strategy->get_next(range_info)) == HA_ERR_END_OF_FILE) |
| 1394 | { |
| 1395 | if ((res= strategy->refill_buffer(FALSE))) |
| 1396 | break; /* EOF or error */ |
| 1397 | } |
| 1398 | return res; |
| 1399 | } |
| 1400 | |
| 1401 | |
| 1402 | /** |
| 1403 | DS-MRR implementation: multi_range_read_info() function |
| 1404 | */ |
| 1405 | ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, |
| 1406 | uint key_parts, |
| 1407 | uint *bufsz, uint *flags, Cost_estimate *cost) |
| 1408 | { |
| 1409 | ha_rows res __attribute__((unused)); |
| 1410 | uint def_flags= *flags; |
| 1411 | uint def_bufsz= *bufsz; |
| 1412 | |
| 1413 | /* Get cost/flags/mem_usage of default MRR implementation */ |
| 1414 | res= primary_file->handler::multi_range_read_info(keyno, n_ranges, rows, |
| 1415 | key_parts, &def_bufsz, |
| 1416 | &def_flags, cost); |
| 1417 | DBUG_ASSERT(!res); |
| 1418 | |
| 1419 | if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || |
| 1420 | choose_mrr_impl(keyno, rows, flags, bufsz, cost)) |
| 1421 | { |
| 1422 | /* Default implementation is choosen */ |
| 1423 | DBUG_PRINT("info" , ("Default MRR implementation choosen" )); |
| 1424 | *flags= def_flags; |
| 1425 | *bufsz= def_bufsz; |
| 1426 | } |
| 1427 | else |
| 1428 | { |
| 1429 | /* *flags and *bufsz were set by choose_mrr_impl */ |
| 1430 | DBUG_PRINT("info" , ("DS-MRR implementation choosen" )); |
| 1431 | } |
| 1432 | return 0; |
| 1433 | } |
| 1434 | |
| 1435 | |
| 1436 | /** |
| 1437 | DS-MRR Implementation: multi_range_read_info_const() function |
| 1438 | */ |
| 1439 | |
| 1440 | ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, |
| 1441 | void *seq_init_param, uint n_ranges, |
| 1442 | uint *bufsz, uint *flags, Cost_estimate *cost) |
| 1443 | { |
| 1444 | ha_rows rows; |
| 1445 | uint def_flags= *flags; |
| 1446 | uint def_bufsz= *bufsz; |
| 1447 | /* Get cost/flags/mem_usage of default MRR implementation */ |
| 1448 | rows= primary_file->handler::multi_range_read_info_const(keyno, seq, |
| 1449 | seq_init_param, |
| 1450 | n_ranges, |
| 1451 | &def_bufsz, |
| 1452 | &def_flags, cost); |
| 1453 | if (rows == HA_POS_ERROR) |
| 1454 | { |
| 1455 | /* Default implementation can't perform MRR scan => we can't either */ |
| 1456 | return rows; |
| 1457 | } |
| 1458 | |
| 1459 | /* |
| 1460 | If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to |
| 1461 | use the default MRR implementation (we need it for UPDATE/DELETE). |
| 1462 | Otherwise, make a choice based on cost and @@optimizer_switch settings |
| 1463 | */ |
| 1464 | if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || |
| 1465 | choose_mrr_impl(keyno, rows, flags, bufsz, cost)) |
| 1466 | { |
| 1467 | DBUG_PRINT("info" , ("Default MRR implementation choosen" )); |
| 1468 | *flags= def_flags; |
| 1469 | *bufsz= def_bufsz; |
| 1470 | } |
| 1471 | else |
| 1472 | { |
| 1473 | /* *flags and *bufsz were set by choose_mrr_impl */ |
| 1474 | DBUG_PRINT("info" , ("DS-MRR implementation choosen" )); |
| 1475 | } |
| 1476 | return rows; |
| 1477 | } |
| 1478 | |
| 1479 | |
| 1480 | /** |
| 1481 | Check if key has partially-covered columns |
| 1482 | |
| 1483 | We can't use DS-MRR to perform range scans when the ranges are over |
| 1484 | partially-covered keys, because we'll not have full key part values |
| 1485 | (we'll have their prefixes from the index) and will not be able to check |
| 1486 | if we've reached the end the range. |
| 1487 | |
| 1488 | @param keyno Key to check |
| 1489 | |
| 1490 | @todo |
| 1491 | Allow use of DS-MRR in cases where the index has partially-covered |
| 1492 | components but they are not used for scanning. |
| 1493 | |
| 1494 | @retval TRUE Yes |
| 1495 | @retval FALSE No |
| 1496 | */ |
| 1497 | |
| 1498 | bool key_uses_partial_cols(TABLE_SHARE *share, uint keyno) |
| 1499 | { |
| 1500 | KEY_PART_INFO *kp= share->key_info[keyno].key_part; |
| 1501 | KEY_PART_INFO *kp_end= kp + share->key_info[keyno].user_defined_key_parts; |
| 1502 | for (; kp != kp_end; kp++) |
| 1503 | { |
| 1504 | if (!kp->field->part_of_key.is_set(keyno)) |
| 1505 | return TRUE; |
| 1506 | } |
| 1507 | return FALSE; |
| 1508 | } |
| 1509 | |
| 1510 | |
| 1511 | /* |
| 1512 | Check if key/flags allow DS-MRR/CPK strategy to be used |
| 1513 | |
| 1514 | @param thd |
| 1515 | @param keyno Index that will be used |
| 1516 | @param mrr_flags |
| 1517 | |
| 1518 | @retval TRUE DS-MRR/CPK should be used |
| 1519 | @retval FALSE Otherwise |
| 1520 | */ |
| 1521 | |
| 1522 | bool DsMrr_impl::check_cpk_scan(THD *thd, TABLE_SHARE *share, uint keyno, |
| 1523 | uint mrr_flags) |
| 1524 | { |
| 1525 | return MY_TEST((mrr_flags & HA_MRR_SINGLE_POINT) && |
| 1526 | keyno == share->primary_key && |
| 1527 | primary_file->primary_key_is_clustered() && |
| 1528 | optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)); |
| 1529 | } |
| 1530 | |
| 1531 | |
| 1532 | /* |
| 1533 | DS-MRR Internals: Choose between Default MRR implementation and DS-MRR |
| 1534 | |
| 1535 | Make the choice between using Default MRR implementation and DS-MRR. |
| 1536 | This function contains common functionality factored out of dsmrr_info() |
| 1537 | and dsmrr_info_const(). The function assumes that the default MRR |
| 1538 | implementation's applicability requirements are satisfied. |
| 1539 | |
| 1540 | @param keyno Index number |
| 1541 | @param rows E(full rows to be retrieved) |
| 1542 | @param flags IN MRR flags provided by the MRR user |
| 1543 | OUT If DS-MRR is choosen, flags of DS-MRR implementation |
| 1544 | else the value is not modified |
| 1545 | @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation |
| 1546 | else the value is not modified |
| 1547 | @param cost IN Cost of default MRR implementation |
| 1548 | OUT If DS-MRR is choosen, cost of DS-MRR scan |
| 1549 | else the value is not modified |
| 1550 | |
| 1551 | @retval TRUE Default MRR implementation should be used |
| 1552 | @retval FALSE DS-MRR implementation should be used |
| 1553 | */ |
| 1554 | |
| 1555 | |
| 1556 | bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, |
| 1557 | uint *bufsz, Cost_estimate *cost) |
| 1558 | { |
| 1559 | Cost_estimate dsmrr_cost; |
| 1560 | bool res; |
| 1561 | THD *thd= primary_file->get_table()->in_use; |
| 1562 | TABLE_SHARE *share= primary_file->get_table_share(); |
| 1563 | |
| 1564 | bool doing_cpk_scan= check_cpk_scan(thd, share, keyno, *flags); |
| 1565 | bool using_cpk= MY_TEST(keyno == share->primary_key && |
| 1566 | primary_file->primary_key_is_clustered()); |
| 1567 | *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS; |
| 1568 | if (!optimizer_flag(thd, OPTIMIZER_SWITCH_MRR) || |
| 1569 | *flags & HA_MRR_INDEX_ONLY || |
| 1570 | (using_cpk && !doing_cpk_scan) || key_uses_partial_cols(share, keyno)) |
| 1571 | { |
| 1572 | /* Use the default implementation */ |
| 1573 | *flags |= HA_MRR_USE_DEFAULT_IMPL; |
| 1574 | *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS; |
| 1575 | return TRUE; |
| 1576 | } |
| 1577 | |
| 1578 | uint add_len= share->key_info[keyno].key_length + primary_file->ref_length; |
| 1579 | *bufsz -= add_len; |
| 1580 | if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost)) |
| 1581 | return TRUE; |
| 1582 | *bufsz += add_len; |
| 1583 | |
| 1584 | bool force_dsmrr; |
| 1585 | /* |
| 1586 | If mrr_cost_based flag is not set, then set cost of DS-MRR to be minimum of |
| 1587 | DS-MRR and Default implementations cost. This allows one to force use of |
| 1588 | DS-MRR whenever it is applicable without affecting other cost-based |
| 1589 | choices. |
| 1590 | */ |
| 1591 | if ((force_dsmrr= !optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_COST_BASED)) && |
| 1592 | dsmrr_cost.total_cost() > cost->total_cost()) |
| 1593 | dsmrr_cost= *cost; |
| 1594 | |
| 1595 | if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost()) |
| 1596 | { |
| 1597 | *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */ |
| 1598 | *flags &= ~HA_MRR_SORTED; /* We will return unordered output */ |
| 1599 | *cost= dsmrr_cost; |
| 1600 | res= FALSE; |
| 1601 | |
| 1602 | |
| 1603 | if ((using_cpk && doing_cpk_scan) || |
| 1604 | (optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS) && |
| 1605 | *flags & HA_MRR_SINGLE_POINT)) |
| 1606 | { |
| 1607 | *flags |= DSMRR_IMPL_SORT_KEYS; |
| 1608 | } |
| 1609 | |
| 1610 | if (!(using_cpk && doing_cpk_scan) && |
| 1611 | !(*flags & HA_MRR_INDEX_ONLY)) |
| 1612 | { |
| 1613 | *flags |= DSMRR_IMPL_SORT_ROWIDS; |
| 1614 | } |
| 1615 | /* |
| 1616 | if ((*flags & HA_MRR_SINGLE_POINT) && |
| 1617 | optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)) |
| 1618 | *flags |= HA_MRR_MATERIALIZED_KEYS; |
| 1619 | */ |
| 1620 | } |
| 1621 | else |
| 1622 | { |
| 1623 | /* Use the default MRR implementation */ |
| 1624 | res= TRUE; |
| 1625 | } |
| 1626 | return res; |
| 1627 | } |
| 1628 | |
| 1629 | /* |
| 1630 | Take the flags we've returned previously and print one of |
| 1631 | - Key-ordered scan |
| 1632 | - Rowid-ordered scan |
| 1633 | - Key-ordered Rowid-ordered scan |
| 1634 | */ |
| 1635 | |
| 1636 | int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size) |
| 1637 | { |
| 1638 | const char *key_ordered= "Key-ordered scan" ; |
| 1639 | const char *rowid_ordered= "Rowid-ordered scan" ; |
| 1640 | const char *both_ordered= "Key-ordered Rowid-ordered scan" ; |
| 1641 | const char *used_str="" ; |
| 1642 | const uint BOTH_FLAGS= (DSMRR_IMPL_SORT_KEYS | DSMRR_IMPL_SORT_ROWIDS); |
| 1643 | |
| 1644 | if (!(mrr_mode & HA_MRR_USE_DEFAULT_IMPL)) |
| 1645 | { |
| 1646 | if ((mrr_mode & BOTH_FLAGS) == BOTH_FLAGS) |
| 1647 | used_str= both_ordered; |
| 1648 | else if (mrr_mode & DSMRR_IMPL_SORT_KEYS) |
| 1649 | used_str= key_ordered; |
| 1650 | else if (mrr_mode & DSMRR_IMPL_SORT_ROWIDS) |
| 1651 | used_str= rowid_ordered; |
| 1652 | |
| 1653 | size_t used_str_len= strlen(used_str); |
| 1654 | size_t copy_len= MY_MIN(used_str_len, size); |
| 1655 | memcpy(str, used_str, copy_len); |
| 1656 | return (int)copy_len; |
| 1657 | } |
| 1658 | return 0; |
| 1659 | } |
| 1660 | |
| 1661 | |
| 1662 | static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost); |
| 1663 | |
| 1664 | |
| 1665 | /** |
| 1666 | Get cost of DS-MRR scan |
| 1667 | |
| 1668 | @param keynr Index to be used |
| 1669 | @param rows E(Number of rows to be scanned) |
| 1670 | @param flags Scan parameters (HA_MRR_* flags) |
| 1671 | @param buffer_size INOUT Buffer size |
| 1672 | @param cost OUT The cost |
| 1673 | |
| 1674 | @retval FALSE OK |
| 1675 | @retval TRUE Error, DS-MRR cannot be used (the buffer is too small |
| 1676 | for even 1 rowid) |
| 1677 | */ |
| 1678 | |
| 1679 | bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, |
| 1680 | uint *buffer_size, Cost_estimate *cost) |
| 1681 | { |
| 1682 | ulong max_buff_entries, elem_size; |
| 1683 | ha_rows rows_in_full_step; |
| 1684 | ha_rows rows_in_last_step; |
| 1685 | uint n_full_steps; |
| 1686 | double index_read_cost; |
| 1687 | |
| 1688 | elem_size= primary_file->ref_length + |
| 1689 | sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION)); |
| 1690 | max_buff_entries = *buffer_size / elem_size; |
| 1691 | |
| 1692 | if (!max_buff_entries) |
| 1693 | return TRUE; /* Buffer has not enough space for even 1 rowid */ |
| 1694 | |
| 1695 | /* Number of iterations we'll make with full buffer */ |
| 1696 | n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries); |
| 1697 | |
| 1698 | /* |
| 1699 | Get numbers of rows we'll be processing in |
| 1700 | - non-last sweep, with full buffer |
| 1701 | - last iteration, with non-full buffer |
| 1702 | */ |
| 1703 | rows_in_full_step= max_buff_entries; |
| 1704 | rows_in_last_step= rows % max_buff_entries; |
| 1705 | |
| 1706 | /* Adjust buffer size if we expect to use only part of the buffer */ |
| 1707 | if (n_full_steps) |
| 1708 | { |
| 1709 | get_sort_and_sweep_cost(table, rows_in_full_step, cost); |
| 1710 | cost->multiply(n_full_steps); |
| 1711 | } |
| 1712 | else |
| 1713 | { |
| 1714 | cost->reset(); |
| 1715 | *buffer_size= (uint)MY_MAX(*buffer_size, |
| 1716 | (size_t)(1.2*rows_in_last_step) * elem_size + |
| 1717 | primary_file->ref_length + table->key_info[keynr].key_length); |
| 1718 | } |
| 1719 | |
| 1720 | Cost_estimate last_step_cost; |
| 1721 | get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost); |
| 1722 | cost->add(&last_step_cost); |
| 1723 | |
| 1724 | if (n_full_steps != 0) |
| 1725 | cost->mem_cost= *buffer_size; |
| 1726 | else |
| 1727 | cost->mem_cost= (double)rows_in_last_step * elem_size; |
| 1728 | |
| 1729 | /* Total cost of all index accesses */ |
| 1730 | index_read_cost= primary_file->keyread_time(keynr, 1, rows); |
| 1731 | cost->add_io(index_read_cost, 1 /* Random seeks */); |
| 1732 | return FALSE; |
| 1733 | } |
| 1734 | |
| 1735 | |
| 1736 | /* |
| 1737 | Get cost of one sort-and-sweep step |
| 1738 | |
| 1739 | It consists of two parts: |
| 1740 | - sort an array of #nrows ROWIDs using qsort |
| 1741 | - read #nrows records from table in a sweep. |
| 1742 | |
| 1743 | @param table Table being accessed |
| 1744 | @param nrows Number of rows to be sorted and retrieved |
| 1745 | @param cost OUT The cost of scan |
| 1746 | */ |
| 1747 | |
| 1748 | static |
| 1749 | void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost) |
| 1750 | { |
| 1751 | if (nrows) |
| 1752 | { |
| 1753 | get_sweep_read_cost(table, nrows, FALSE, cost); |
| 1754 | /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */ |
| 1755 | double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID); |
| 1756 | if (cmp_op < 3) |
| 1757 | cmp_op= 3; |
| 1758 | cost->cpu_cost += cmp_op * log2(cmp_op); |
| 1759 | } |
| 1760 | else |
| 1761 | cost->reset(); |
| 1762 | } |
| 1763 | |
| 1764 | |
| 1765 | /** |
| 1766 | Get cost of reading nrows table records in a "disk sweep" |
| 1767 | |
| 1768 | A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made |
| 1769 | for an ordered sequence of rowids. |
| 1770 | |
| 1771 | We assume hard disk IO. The read is performed as follows: |
| 1772 | |
| 1773 | 1. The disk head is moved to the needed cylinder |
| 1774 | 2. The controller waits for the plate to rotate |
| 1775 | 3. The data is transferred |
| 1776 | |
| 1777 | Time to do #3 is insignificant compared to #2+#1. |
| 1778 | |
| 1779 | Time to move the disk head is proportional to head travel distance. |
| 1780 | |
| 1781 | Time to wait for the plate to rotate depends on whether the disk head |
| 1782 | was moved or not. |
| 1783 | |
| 1784 | If disk head wasn't moved, the wait time is proportional to distance |
| 1785 | between the previous block and the block we're reading. |
| 1786 | |
| 1787 | If the head was moved, we don't know how much we'll need to wait for the |
| 1788 | plate to rotate. We assume the wait time to be a variate with a mean of |
| 1789 | 0.5 of full rotation time. |
| 1790 | |
| 1791 | Our cost units are "random disk seeks". The cost of random disk seek is |
| 1792 | actually not a constant, it depends one range of cylinders we're going |
| 1793 | to access. We make it constant by introducing a fuzzy concept of "typical |
| 1794 | datafile length" (it's fuzzy as it's hard to tell whether it should |
| 1795 | include index file, temp.tables etc). Then random seek cost is: |
| 1796 | |
| 1797 | 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length |
| 1798 | |
| 1799 | We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9. |
| 1800 | |
| 1801 | @param table Table to be accessed |
| 1802 | @param nrows Number of rows to retrieve |
| 1803 | @param interrupted TRUE <=> Assume that the disk sweep will be |
| 1804 | interrupted by other disk IO. FALSE - otherwise. |
| 1805 | @param cost OUT The cost. |
| 1806 | */ |
| 1807 | |
| 1808 | void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, |
| 1809 | Cost_estimate *cost) |
| 1810 | { |
| 1811 | DBUG_ENTER("get_sweep_read_cost" ); |
| 1812 | |
| 1813 | cost->reset(); |
| 1814 | if (table->file->primary_key_is_clustered()) |
| 1815 | { |
| 1816 | cost->io_count= table->file->read_time(table->s->primary_key, |
| 1817 | (uint) nrows, nrows); |
| 1818 | } |
| 1819 | else |
| 1820 | { |
| 1821 | double n_blocks= |
| 1822 | ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE); |
| 1823 | double busy_blocks= |
| 1824 | n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows))); |
| 1825 | if (busy_blocks < 1.0) |
| 1826 | busy_blocks= 1.0; |
| 1827 | |
| 1828 | DBUG_PRINT("info" ,("sweep: nblocks=%g, busy_blocks=%g" , n_blocks, |
| 1829 | busy_blocks)); |
| 1830 | cost->io_count= busy_blocks; |
| 1831 | |
| 1832 | if (!interrupted) |
| 1833 | { |
| 1834 | /* Assume reading is done in one 'sweep' */ |
| 1835 | cost->avg_io_cost= (DISK_SEEK_BASE_COST + |
| 1836 | DISK_SEEK_PROP_COST*n_blocks/busy_blocks); |
| 1837 | } |
| 1838 | } |
| 1839 | DBUG_PRINT("info" ,("returning cost=%g" , cost->total_cost())); |
| 1840 | DBUG_VOID_RETURN; |
| 1841 | } |
| 1842 | |
| 1843 | |
| 1844 | /* ************************************************************************** |
| 1845 | * DS-MRR implementation ends |
| 1846 | ***************************************************************************/ |
| 1847 | |
| 1848 | |
| 1849 | |