| 1 | /* |
| 2 | Copyright (c) 2009, 2011, Monty Program Ab |
| 3 | |
| 4 | This program is free software; you can redistribute it and/or modify |
| 5 | it under the terms of the GNU General Public License as published by |
| 6 | the Free Software Foundation; version 2 of the License. |
| 7 | |
| 8 | This program is distributed in the hope that it will be useful, |
| 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License |
| 14 | along with this program; if not, write to the Free Software |
| 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
| 16 | |
| 17 | /** |
| 18 | @defgroup DS-MRR declarations |
| 19 | @{ |
| 20 | */ |
| 21 | |
| 22 | /** |
| 23 | A Disk-Sweep implementation of MRR Interface (DS-MRR for short) |
| 24 | |
| 25 | This is a "plugin"(*) for storage engines that allows to |
| 26 | 1. When doing index scans, read table rows in rowid order; |
| 27 | 2. when making many index lookups, do them in key order and don't |
| 28 | lookup the same key value multiple times; |
| 29 | 3. Do both #1 and #2, when applicable. |
| 30 | These changes are expected to speed up query execution for disk-based |
| 31 | storage engines running io-bound loads and "big" queries (ie. queries that |
| 32 | do joins and enumerate lots of records). |
| 33 | |
| 34 | (*) - only conceptually. No dynamic loading or binary compatibility of any |
| 35 | kind. |
| 36 | |
| 37 | General scheme of things: |
| 38 | |
| 39 | SQL Layer code |
| 40 | | | | |
| 41 | v v v |
| 42 | -|---|---|---- handler->multi_range_read_XXX() function calls |
| 43 | | | | |
| 44 | _____________________________________ |
| 45 | / DS-MRR module \ |
| 46 | | (order/de-duplicate lookup keys, | |
| 47 | | scan indexes in key order, | |
| 48 | | order/de-duplicate rowids, | |
| 49 | | retrieve full record reads in rowid | |
| 50 | | order) | |
| 51 | \_____________________________________/ |
| 52 | | | | |
| 53 | -|---|---|----- handler->read_range_first()/read_range_next(), |
| 54 | | | | handler->index_read(), handler->rnd_pos() calls. |
| 55 | | | | |
| 56 | v v v |
| 57 | Storage engine internals |
| 58 | |
| 59 | |
| 60 | Currently DS-MRR is used by MyISAM, InnoDB and Maria storage engines. |
| 61 | Potentially it can be used with any table handler that has disk-based data |
| 62 | storage and has better performance when reading data in rowid order. |
| 63 | */ |
| 64 | |
| 65 | #include "sql_lifo_buffer.h" |
| 66 | |
| 67 | class DsMrr_impl; |
| 68 | class Mrr_ordered_index_reader; |
| 69 | |
| 70 | |
| 71 | /* A structure with key parameters that's shared among several classes */ |
| 72 | class Key_parameters |
| 73 | { |
| 74 | public: |
| 75 | uint key_tuple_length; /* Length of index lookup tuple, in bytes */ |
| 76 | key_part_map key_tuple_map; /* keyparts used in index lookup tuples */ |
| 77 | |
| 78 | /* |
| 79 | This is |
| 80 | = key_tuple_length if we copy keys to buffer |
| 81 | = sizeof(void*) if we're using pointers to materialized keys. |
| 82 | */ |
| 83 | uint key_size_in_keybuf; |
| 84 | |
| 85 | /* TRUE <=> don't copy key values, use pointers to them instead. */ |
| 86 | bool use_key_pointers; |
| 87 | |
| 88 | /* TRUE <=> We can get at most one index tuple for a lookup key */ |
| 89 | bool index_ranges_unique; |
| 90 | }; |
| 91 | |
| 92 | |
| 93 | /** |
| 94 | A class to enumerate (record, range_id) pairs that match given key value. |
| 95 | |
| 96 | @note |
| 97 | |
| 98 | The idea is that we have a Lifo_buffer which holds (key, range_id) pairs |
| 99 | ordered by key value. From the front of the buffer we see |
| 100 | |
| 101 | (key_val1, range_id1), (key_val1, range_id2) ... (key_val2, range_idN) |
| 102 | |
| 103 | we take the first elements that have the same key value (key_val1 in the |
| 104 | example above), and make lookup into the table. The table will have |
| 105 | multiple matches for key_val1: |
| 106 | |
| 107 | == Table Index == |
| 108 | ... |
| 109 | key_val1 -> key_val1, index_tuple1 |
| 110 | key_val1, index_tuple2 |
| 111 | ... |
| 112 | key_val1, index_tupleN |
| 113 | ... |
| 114 | |
| 115 | Our goal is to produce all possible combinations, i.e. we need: |
| 116 | |
| 117 | {(key_val1, index_tuple1), range_id1} |
| 118 | {(key_val1, index_tuple1), range_id2} |
| 119 | ... ... | |
| 120 | {(key_val1, index_tuple1), range_idN}, |
| 121 | |
| 122 | {(key_val1, index_tuple2), range_id1} |
| 123 | {(key_val1, index_tuple2), range_id2} |
| 124 | ... ... | |
| 125 | {(key_val1, index_tuple2), range_idN}, |
| 126 | |
| 127 | ... ... ... |
| 128 | |
| 129 | {(key_val1, index_tupleK), range_idN} |
| 130 | */ |
| 131 | |
| 132 | class Key_value_records_iterator |
| 133 | { |
| 134 | /* Use this to get table handler, key buffer and other parameters */ |
| 135 | Mrr_ordered_index_reader *owner; |
| 136 | |
| 137 | /* Iterator to get (key, range_id) pairs from */ |
| 138 | Lifo_buffer_iterator identical_key_it; |
| 139 | |
| 140 | /* |
| 141 | Last of the identical key values (when we get this pointer from |
| 142 | identical_key_it, it will be time to stop). |
| 143 | */ |
| 144 | uchar *last_identical_key_ptr; |
| 145 | |
| 146 | /* |
| 147 | FALSE <=> we're right after the init() call, the record has been already |
| 148 | read with owner->file->index_read_map() call |
| 149 | */ |
| 150 | bool get_next_row; |
| 151 | |
| 152 | public: |
| 153 | int init(Mrr_ordered_index_reader *owner_arg); |
| 154 | int get_next(range_id_t *range_info); |
| 155 | void move_to_next_key_value(); |
| 156 | }; |
| 157 | |
| 158 | |
| 159 | /* |
| 160 | Buffer manager interface. Mrr_reader objects use it to inqure DsMrr_impl |
| 161 | to manage buffer space for them. |
| 162 | */ |
| 163 | typedef struct st_buffer_manager |
| 164 | { |
| 165 | public: |
| 166 | /* Opaque value to be passed as the first argument to all member functions */ |
| 167 | void *arg; |
| 168 | |
| 169 | /* |
| 170 | This is called when we've freed more space from the rowid buffer. The |
| 171 | callee will get the unused space from the rowid buffer and give it to the |
| 172 | key buffer. |
| 173 | */ |
| 174 | void (*redistribute_buffer_space)(void *arg); |
| 175 | |
| 176 | /* |
| 177 | This is called when both key and rowid buffers are empty, and so it's time |
| 178 | to reset them to their original size (They've lost their original size, |
| 179 | because we were dynamically growing rowid buffer and shrinking key buffer). |
| 180 | */ |
| 181 | void (*reset_buffer_sizes)(void *arg); |
| 182 | |
| 183 | } Buffer_manager; |
| 184 | |
| 185 | |
| 186 | /* |
| 187 | Mrr_reader - DS-MRR execution strategy abstraction |
| 188 | |
| 189 | A reader produces ([index]_record, range_info) pairs, and requires periodic |
| 190 | refill operations. |
| 191 | |
| 192 | - one starts using the reader by calling reader->get_next(), |
| 193 | - when a get_next() call returns HA_ERR_END_OF_FILE, one must call |
| 194 | refill_buffer() before they can make more get_next() calls. |
| 195 | - when refill_buffer() returns HA_ERR_END_OF_FILE, this means the real |
| 196 | end of stream and get_next() should not be called anymore. |
| 197 | |
| 198 | Both functions can return other error codes, these mean unrecoverable errors |
| 199 | after which one cannot continue. |
| 200 | */ |
| 201 | |
| 202 | class Mrr_reader |
| 203 | { |
| 204 | public: |
| 205 | virtual int get_next(range_id_t *range_info) = 0; |
| 206 | virtual int refill_buffer(bool initial) = 0; |
| 207 | virtual ~Mrr_reader() {}; /* just to remove compiler warning */ |
| 208 | }; |
| 209 | |
| 210 | |
| 211 | /* |
| 212 | A common base for readers that do index scans and produce index tuples |
| 213 | */ |
| 214 | |
| 215 | class Mrr_index_reader : public Mrr_reader |
| 216 | { |
| 217 | protected: |
| 218 | handler *file; /* Handler object to use */ |
| 219 | public: |
| 220 | virtual int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, |
| 221 | void *seq_init_param, uint n_ranges, |
| 222 | uint mode, Key_parameters *key_par, |
| 223 | Lifo_buffer *key_buffer, |
| 224 | Buffer_manager *buf_manager_arg) = 0; |
| 225 | |
| 226 | /* Get pointer to place where every get_next() call will put rowid */ |
| 227 | virtual uchar *get_rowid_ptr() = 0; |
| 228 | /* Get the rowid (call this after get_next() call) */ |
| 229 | virtual void position(); |
| 230 | virtual bool skip_record(range_id_t range_id, uchar *rowid) = 0; |
| 231 | |
| 232 | virtual void interrupt_read() {} |
| 233 | virtual void resume_read() {} |
| 234 | }; |
| 235 | |
| 236 | |
| 237 | /* |
| 238 | A "bypass" index reader that just does and index scan. The index scan is done |
| 239 | by calling default MRR implementation (i.e. handler::multi_range_read_XXX()) |
| 240 | functions. |
| 241 | */ |
| 242 | |
| 243 | class Mrr_simple_index_reader : public Mrr_index_reader |
| 244 | { |
| 245 | public: |
| 246 | int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, |
| 247 | void *seq_init_param, uint n_ranges, |
| 248 | uint mode, Key_parameters *key_par, |
| 249 | Lifo_buffer *key_buffer, |
| 250 | Buffer_manager *buf_manager_arg); |
| 251 | int get_next(range_id_t *range_info); |
| 252 | int refill_buffer(bool initial) { return initial? 0: HA_ERR_END_OF_FILE; } |
| 253 | uchar *get_rowid_ptr() { return file->ref; } |
| 254 | bool skip_record(range_id_t range_id, uchar *rowid) |
| 255 | { |
| 256 | return (file->mrr_funcs.skip_record && |
| 257 | file->mrr_funcs.skip_record(file->mrr_iter, range_id, rowid)); |
| 258 | } |
| 259 | }; |
| 260 | |
| 261 | |
| 262 | /* |
| 263 | A reader that sorts the key values before it makes the index lookups. |
| 264 | */ |
| 265 | |
| 266 | class Mrr_ordered_index_reader : public Mrr_index_reader |
| 267 | { |
| 268 | public: |
| 269 | int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, |
| 270 | void *seq_init_param, uint n_ranges, |
| 271 | uint mode, Key_parameters *key_par, |
| 272 | Lifo_buffer *key_buffer, |
| 273 | Buffer_manager *buf_manager_arg); |
| 274 | int get_next(range_id_t *range_info); |
| 275 | int refill_buffer(bool initial); |
| 276 | uchar *get_rowid_ptr() { return file->ref; } |
| 277 | |
| 278 | bool skip_record(range_id_t range_info, uchar *rowid) |
| 279 | { |
| 280 | return (mrr_funcs.skip_record && |
| 281 | mrr_funcs.skip_record(mrr_iter, range_info, rowid)); |
| 282 | } |
| 283 | |
| 284 | bool skip_index_tuple(range_id_t range_info) |
| 285 | { |
| 286 | return (mrr_funcs.skip_index_tuple && |
| 287 | mrr_funcs.skip_index_tuple(mrr_iter, range_info)); |
| 288 | } |
| 289 | |
| 290 | bool set_interruption_temp_buffer(uint rowid_length, uint key_len, |
| 291 | uint saved_pk_len, |
| 292 | uchar **space_start, uchar *space_end); |
| 293 | void set_no_interruption_temp_buffer(); |
| 294 | |
| 295 | void interrupt_read(); |
| 296 | void resume_read(); |
| 297 | void position(); |
| 298 | private: |
| 299 | Key_value_records_iterator kv_it; |
| 300 | |
| 301 | bool scanning_key_val_iter; |
| 302 | |
| 303 | /* Buffer to store (key, range_id) pairs */ |
| 304 | Lifo_buffer *key_buffer; |
| 305 | |
| 306 | /* This manages key buffer allocation and sizing for us */ |
| 307 | Buffer_manager *buf_manager; |
| 308 | |
| 309 | Key_parameters keypar; /* index scan and lookup tuple parameters */ |
| 310 | |
| 311 | /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */ |
| 312 | bool is_mrr_assoc; |
| 313 | |
| 314 | /* Range sequence iteration members */ |
| 315 | RANGE_SEQ_IF mrr_funcs; |
| 316 | range_seq_t mrr_iter; |
| 317 | |
| 318 | /* TRUE == reached eof when enumerating ranges */ |
| 319 | bool source_exhausted; |
| 320 | |
| 321 | /* |
| 322 | Following members are for interrupt_read()/resume_read(). The idea is that |
| 323 | in some cases index scan that is done by this object is interrupted by |
| 324 | rnd_pos() calls made by Mrr_ordered_rndpos_reader. The problem is that |
| 325 | we're sharing handler->record[0] with that object, and it destroys its |
| 326 | contents. |
| 327 | We need to save/restore our current |
| 328 | - index tuple (for pushed index condition checks) |
| 329 | - clustered primary key values (again, for pushed index condition checks) |
| 330 | - rowid of the last record we've retrieved (in case this rowid matches |
| 331 | multiple ranges and we'll need to return it again) |
| 332 | */ |
| 333 | bool support_scan_interruptions; |
| 334 | /* Space where we save the rowid of the last record we've returned */ |
| 335 | uchar *saved_rowid; |
| 336 | |
| 337 | /* TRUE <=> saved_rowid has the last saved rowid */ |
| 338 | bool have_saved_rowid; |
| 339 | |
| 340 | uchar *saved_key_tuple; /* Saved current key tuple */ |
| 341 | uchar *saved_primary_key; /* Saved current primary key tuple */ |
| 342 | |
| 343 | /* |
| 344 | TRUE<=> saved_key_tuple (and saved_primary_key when applicable) have |
| 345 | valid values. |
| 346 | */ |
| 347 | bool read_was_interrupted; |
| 348 | |
| 349 | static int compare_keys(void* arg, uchar* key1, uchar* key2); |
| 350 | static int compare_keys_reverse(void* arg, uchar* key1, uchar* key2); |
| 351 | |
| 352 | friend class Key_value_records_iterator; |
| 353 | friend class DsMrr_impl; |
| 354 | friend class Mrr_ordered_rndpos_reader; |
| 355 | }; |
| 356 | |
| 357 | |
| 358 | /* |
| 359 | A reader that gets rowids from an Mrr_index_reader, and then sorts them |
| 360 | before getting full records with handler->rndpos() calls. |
| 361 | */ |
| 362 | |
| 363 | class Mrr_ordered_rndpos_reader : public Mrr_reader |
| 364 | { |
| 365 | public: |
| 366 | int init(handler *file, Mrr_index_reader *index_reader, uint mode, |
| 367 | Lifo_buffer *buf); |
| 368 | int get_next(range_id_t *range_info); |
| 369 | int refill_buffer(bool initial); |
| 370 | private: |
| 371 | handler *file; /* Handler to use */ |
| 372 | |
| 373 | /* This what we get (rowid, range_info) pairs from */ |
| 374 | Mrr_index_reader *index_reader; |
| 375 | |
| 376 | /* index_reader->get_next() puts rowid here */ |
| 377 | uchar *index_rowid; |
| 378 | |
| 379 | /* TRUE <=> index_reader->refill_buffer() call has returned EOF */ |
| 380 | bool index_reader_exhausted; |
| 381 | |
| 382 | /* |
| 383 | TRUE <=> We should call index_reader->refill_buffer(). This happens if |
| 384 | 1. we've made index_reader->get_next() call which returned EOF |
| 385 | 2. we haven't made any index_reader calls (and our first call should |
| 386 | be index_reader->refill_buffer(initial=TRUE) |
| 387 | */ |
| 388 | bool index_reader_needs_refill; |
| 389 | |
| 390 | /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */ |
| 391 | bool is_mrr_assoc; |
| 392 | |
| 393 | /* |
| 394 | When reading from ordered rowid buffer: the rowid element of the last |
| 395 | buffer element that has rowid identical to this one. |
| 396 | */ |
| 397 | uchar *last_identical_rowid; |
| 398 | |
| 399 | /* Buffer to store (rowid, range_id) pairs */ |
| 400 | Lifo_buffer *rowid_buffer; |
| 401 | |
| 402 | int refill_from_index_reader(); |
| 403 | }; |
| 404 | |
| 405 | |
| 406 | /* |
| 407 | A primitive "factory" of various Mrr_*_reader classes (the point is to |
| 408 | get various kinds of readers without having to allocate them on the heap) |
| 409 | */ |
| 410 | |
| 411 | class Mrr_reader_factory |
| 412 | { |
| 413 | public: |
| 414 | Mrr_ordered_rndpos_reader ordered_rndpos_reader; |
| 415 | Mrr_ordered_index_reader ordered_index_reader; |
| 416 | Mrr_simple_index_reader simple_index_reader; |
| 417 | }; |
| 418 | |
| 419 | |
| 420 | #define DSMRR_IMPL_SORT_KEYS HA_MRR_IMPLEMENTATION_FLAG1 |
| 421 | #define DSMRR_IMPL_SORT_ROWIDS HA_MRR_IMPLEMENTATION_FLAG2 |
| 422 | |
| 423 | /* |
| 424 | DS-MRR implementation for one table. Create/use one object of this class for |
| 425 | each ha_{myisam/innobase/etc} object. That object will be further referred to |
| 426 | as "the handler" |
| 427 | |
| 428 | DsMrr_impl supports has the following execution strategies: |
| 429 | |
| 430 | - Bypass DS-MRR, pass all calls to default MRR implementation, which is |
| 431 | an MRR-to-non-MRR call converter. |
| 432 | - Key-Ordered Retrieval |
| 433 | - Rowid-Ordered Retrieval |
| 434 | |
| 435 | DsMrr_impl will use one of the above strategies, or a combination of them, |
| 436 | according to the following diagram: |
| 437 | |
| 438 | (mrr function calls) |
| 439 | | |
| 440 | +----------------->-----------------+ |
| 441 | | | |
| 442 | ___________v______________ _______________v________________ |
| 443 | / default: use lookup keys \ / KEY-ORDERED RETRIEVAL: \ |
| 444 | | (or ranges) in whatever | | sort lookup keys and then make | |
| 445 | | order they are supplied | | index lookups in index order | |
| 446 | \__________________________/ \________________________________/ |
| 447 | | | | | | |
| 448 | +---<---+ | +--------------->-----------|----+ |
| 449 | | | | | |
| 450 | | | +---------------+ | |
| 451 | | ______v___ ______ | _______________v_______________ |
| 452 | | / default: read \ | / ROWID-ORDERED RETRIEVAL: \ |
| 453 | | | table records | | | Before reading table records, | |
| 454 | v | in random order | v | sort their rowids and then | |
| 455 | | \_________________/ | | read them in rowid order | |
| 456 | | | | \_______________________________/ |
| 457 | | | | | |
| 458 | | | | | |
| 459 | +-->---+ | +----<------+-----------<--------+ |
| 460 | | | | |
| 461 | v v v |
| 462 | (table records and range_ids) |
| 463 | |
| 464 | The choice of strategy depends on MRR scan properties, table properties |
| 465 | (whether we're scanning clustered primary key), and @@optimizer_switch |
| 466 | settings. |
| 467 | |
| 468 | Key-Ordered Retrieval |
| 469 | --------------------- |
| 470 | The idea is: if MRR scan is essentially a series of lookups on |
| 471 | |
| 472 | tbl.key=value1 OR tbl.key=value2 OR ... OR tbl.key=valueN |
| 473 | |
| 474 | then it makes sense to collect and order the set of lookup values, i.e. |
| 475 | |
| 476 | sort(value1, value2, .. valueN) |
| 477 | |
| 478 | and then do index lookups in index order. This results in fewer index page |
| 479 | fetch operations, and we also can avoid making multiple index lookups for the |
| 480 | same value. That is, if value1=valueN we can easily discover that after |
| 481 | sorting and make one index lookup for them instead of two. |
| 482 | |
| 483 | Rowid-Ordered Retrieval |
| 484 | ----------------------- |
| 485 | If we do a regular index scan or a series of index lookups, we'll be hitting |
| 486 | table records at random. For disk-based engines, this is much slower than |
| 487 | reading the same records in disk order. We assume that disk ordering of |
| 488 | rows is the same as ordering of their rowids (which is provided by |
| 489 | handler::cmp_ref()) |
| 490 | In order to retrieve records in different order, we must separate index |
| 491 | scanning and record fetching, that is, MRR scan uses the following steps: |
| 492 | |
| 493 | 1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and |
| 494 | fill a buffer with {rowid, range_id} pairs |
| 495 | 2. Sort the buffer by rowid value |
| 496 | 3. for each {rowid, range_id} pair in the buffer |
| 497 | get record by rowid and return the {record, range_id} pair |
| 498 | 4. Repeat the above steps until we've exhausted the list of ranges we're |
| 499 | scanning. |
| 500 | |
| 501 | Buffer space management considerations |
| 502 | -------------------------------------- |
| 503 | With regards to buffer/memory management, MRR interface specifies that |
| 504 | - SQL layer provides multi_range_read_init() with buffer of certain size. |
| 505 | - MRR implementation may use (i.e. have at its disposal till the end of |
| 506 | the MRR scan) all of the buffer, or return the unused end of the buffer |
| 507 | to SQL layer. |
| 508 | |
| 509 | DS-MRR needs buffer in order to accumulate and sort rowids and/or keys. When |
| 510 | we need to accumulate/sort only keys (or only rowids), it is fairly trivial. |
| 511 | |
| 512 | When we need to accumulate/sort both keys and rowids, efficient buffer use |
| 513 | gets complicated. We need to: |
| 514 | - First, accumulate keys and sort them |
| 515 | - Then use the keys (smaller values go first) to obtain rowids. A key is not |
| 516 | needed after we've got matching rowids for it. |
| 517 | - Make sure that rowids are accumulated at the front of the buffer, so that we |
| 518 | can return the end part of the buffer to SQL layer, should there be too |
| 519 | few rowid values to occupy the buffer. |
| 520 | |
| 521 | All of these goals are achieved by using the following scheme: |
| 522 | |
| 523 | | | We get an empty buffer from SQL layer. |
| 524 | |
| 525 | | *-| |
| 526 | | *----| First, we fill the buffer with keys. Key_buffer |
| 527 | | *-------| part grows from end of the buffer space to start |
| 528 | | *----------| (In this picture, the buffer is big enough to |
| 529 | | *-------------| accomodate all keys and even have some space left) |
| 530 | |
| 531 | | *=============| We want to do key-ordered index scan, so we sort |
| 532 | the keys |
| 533 | |
| 534 | |-x *===========| Then we use the keys get rowids. Rowids are |
| 535 | |----x *========| stored from start of buffer space towards the end. |
| 536 | |--------x *=====| The part of the buffer occupied with keys |
| 537 | |------------x *===| gradually frees up space for rowids. In this |
| 538 | |--------------x *=| picture we run out of keys before we've ran out |
| 539 | |----------------x | of buffer space (it can be other way as well). |
| 540 | |
| 541 | |================x | Then we sort the rowids. |
| 542 | |
| 543 | | |~~~| The unused part of the buffer is at the end, so |
| 544 | we can return it to the SQL layer. |
| 545 | |
| 546 | |================* Sorted rowids are then used to read table records |
| 547 | in disk order |
| 548 | |
| 549 | */ |
| 550 | |
| 551 | class DsMrr_impl |
| 552 | { |
| 553 | public: |
| 554 | typedef void (handler::*range_check_toggle_func_t)(bool on); |
| 555 | |
| 556 | DsMrr_impl() |
| 557 | : secondary_file(NULL) {}; |
| 558 | |
| 559 | void init(handler *h_arg, TABLE *table_arg) |
| 560 | { |
| 561 | primary_file= h_arg; |
| 562 | table= table_arg; |
| 563 | } |
| 564 | int dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, |
| 565 | void *seq_init_param, uint n_ranges, uint mode, |
| 566 | HANDLER_BUFFER *buf); |
| 567 | void dsmrr_close(); |
| 568 | int dsmrr_next(range_id_t *range_info); |
| 569 | |
| 570 | ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts, |
| 571 | uint *bufsz, uint *flags, Cost_estimate *cost); |
| 572 | |
| 573 | ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, |
| 574 | void *seq_init_param, uint n_ranges, uint *bufsz, |
| 575 | uint *flags, Cost_estimate *cost); |
| 576 | |
| 577 | int dsmrr_explain_info(uint mrr_mode, char *str, size_t size); |
| 578 | private: |
| 579 | /* Buffer to store (key, range_id) pairs */ |
| 580 | Lifo_buffer *key_buffer; |
| 581 | |
| 582 | /* |
| 583 | The "owner" handler object (the one that is expected to "own" this object |
| 584 | and call its functions). |
| 585 | */ |
| 586 | handler *primary_file; |
| 587 | TABLE *table; /* Always equal to primary_file->table */ |
| 588 | |
| 589 | /* |
| 590 | Secondary handler object. (created when needed, we need it when we need |
| 591 | to run both index scan and rnd_pos() scan at the same time) |
| 592 | */ |
| 593 | handler *secondary_file; |
| 594 | |
| 595 | uint keyno; /* index we're running the scan on */ |
| 596 | /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */ |
| 597 | bool is_mrr_assoc; |
| 598 | |
| 599 | Mrr_reader_factory reader_factory; |
| 600 | |
| 601 | Mrr_reader *strategy; |
| 602 | bool strategy_exhausted; |
| 603 | |
| 604 | Mrr_index_reader *index_strategy; |
| 605 | |
| 606 | /* The whole buffer space that we're using */ |
| 607 | uchar *full_buf; |
| 608 | uchar *full_buf_end; |
| 609 | |
| 610 | /* |
| 611 | When using both rowid and key buffers: the boundary between key and rowid |
| 612 | parts of the buffer. This is the "original" value, actual memory ranges |
| 613 | used by key and rowid parts may be different because of dynamic space |
| 614 | reallocation between them. |
| 615 | */ |
| 616 | uchar *rowid_buffer_end; |
| 617 | |
| 618 | /* |
| 619 | One of the following two is used for key buffer: forward is used when |
| 620 | we only need key buffer, backward is used when we need both key and rowid |
| 621 | buffers. |
| 622 | */ |
| 623 | Forward_lifo_buffer forward_key_buf; |
| 624 | Backward_lifo_buffer backward_key_buf; |
| 625 | |
| 626 | /* |
| 627 | Buffer to store (rowid, range_id) pairs, or just rowids if |
| 628 | is_mrr_assoc==FALSE |
| 629 | */ |
| 630 | Forward_lifo_buffer rowid_buffer; |
| 631 | |
| 632 | bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, |
| 633 | Cost_estimate *cost); |
| 634 | bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, |
| 635 | uint *buffer_size, Cost_estimate *cost); |
| 636 | bool check_cpk_scan(THD *thd, TABLE_SHARE *share, uint keyno, uint mrr_flags); |
| 637 | |
| 638 | bool setup_buffer_sharing(uint key_size_in_keybuf, key_part_map key_tuple_map); |
| 639 | |
| 640 | /* Buffer_manager and its member functions */ |
| 641 | Buffer_manager buf_manager; |
| 642 | static void redistribute_buffer_space(void *dsmrr_arg); |
| 643 | static void reset_buffer_sizes(void *dsmrr_arg); |
| 644 | static void do_nothing(void *dsmrr_arg); |
| 645 | |
| 646 | Lifo_buffer* get_key_buffer() { return key_buffer; } |
| 647 | |
| 648 | friend class Key_value_records_iterator; |
| 649 | friend class Mrr_ordered_index_reader; |
| 650 | friend class Mrr_ordered_rndpos_reader; |
| 651 | |
| 652 | int setup_two_handlers(); |
| 653 | void close_second_handler(); |
| 654 | }; |
| 655 | |
| 656 | /** |
| 657 | @} (end of group DS-MRR declarations) |
| 658 | */ |
| 659 | |
| 660 | |