| 1 | /* Copyright (C) 2000-2006 MySQL AB |
| 2 | |
| 3 | This program is free software; you can redistribute it and/or modify |
| 4 | it under the terms of the GNU General Public License as published by |
| 5 | the Free Software Foundation; version 2 of the License. |
| 6 | |
| 7 | This program is distributed in the hope that it will be useful, |
| 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | GNU General Public License for more details. |
| 11 | |
| 12 | You should have received a copy of the GNU General Public License |
| 13 | along with this program; if not, write to the Free Software |
| 14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
| 15 | |
| 16 | /** |
| 17 | @file |
| 18 | |
| 19 | @brief |
| 20 | join cache optimizations |
| 21 | |
| 22 | @defgroup Query_Optimizer Query Optimizer |
| 23 | @{ |
| 24 | */ |
| 25 | |
| 26 | #ifdef USE_PRAGMA_IMPLEMENTATION |
| 27 | #pragma implementation // gcc: Class implementation |
| 28 | #endif |
| 29 | |
| 30 | #include "mariadb.h" |
| 31 | #include "key.h" |
| 32 | #include "sql_base.h" |
| 33 | #include "sql_select.h" |
| 34 | #include "opt_subselect.h" |
| 35 | |
| 36 | #define NO_MORE_RECORDS_IN_BUFFER (uint)(-1) |
| 37 | |
| 38 | static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save); |
| 39 | |
| 40 | /***************************************************************************** |
| 41 | * Join cache module |
| 42 | ******************************************************************************/ |
| 43 | |
| 44 | /* |
| 45 | Fill in the descriptor of a flag field associated with a join cache |
| 46 | |
| 47 | SYNOPSIS |
| 48 | add_field_flag_to_join_cache() |
| 49 | str position in a record buffer to copy the field from/to |
| 50 | length length of the field |
| 51 | field IN/OUT pointer to the field descriptor to fill in |
| 52 | |
| 53 | DESCRIPTION |
| 54 | The function fill in the descriptor of a cache flag field to which |
| 55 | the parameter 'field' points to. The function uses the first two |
| 56 | parameters to set the position in the record buffer from/to which |
| 57 | the field value is to be copied and the length of the copied fragment. |
| 58 | Before returning the result the function increments the value of |
| 59 | *field by 1. |
| 60 | The function ignores the fields 'blob_length' and 'ofset' of the |
| 61 | descriptor. |
| 62 | |
| 63 | RETURN VALUE |
| 64 | the length of the field |
| 65 | */ |
| 66 | |
| 67 | static |
| 68 | uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field) |
| 69 | { |
| 70 | CACHE_FIELD *copy= *field; |
| 71 | copy->str= str; |
| 72 | copy->length= length; |
| 73 | copy->type= 0; |
| 74 | copy->field= 0; |
| 75 | copy->referenced_field_no= 0; |
| 76 | (*field)++; |
| 77 | return length; |
| 78 | } |
| 79 | |
| 80 | |
| 81 | /* |
| 82 | Fill in the descriptors of table data fields associated with a join cache |
| 83 | |
| 84 | SYNOPSIS |
| 85 | add_table_data_fields_to_join_cache() |
| 86 | tab descriptors of fields from this table are to be filled |
| 87 | field_set descriptors for only these fields are to be created |
| 88 | field_cnt IN/OUT counter of data fields |
| 89 | descr IN/OUT pointer to the first descriptor to be filled |
| 90 | field_ptr_cnt IN/OUT counter of pointers to the data fields |
| 91 | descr_ptr IN/OUT pointer to the first pointer to blob descriptors |
| 92 | |
| 93 | DESCRIPTION |
| 94 | The function fills in the descriptors of cache data fields from the table |
| 95 | 'tab'. The descriptors are filled only for the fields marked in the |
| 96 | bitmap 'field_set'. |
| 97 | The function fills the descriptors starting from the position pointed |
| 98 | by 'descr'. If an added field is of a BLOB type then a pointer to the |
| 99 | its descriptor is added to the array descr_ptr. |
| 100 | At the return 'descr' points to the position after the last added |
| 101 | descriptor while 'descr_ptr' points to the position right after the |
| 102 | last added pointer. |
| 103 | |
| 104 | RETURN VALUE |
| 105 | the total length of the added fields |
| 106 | */ |
| 107 | |
| 108 | static |
| 109 | uint add_table_data_fields_to_join_cache(JOIN_TAB *tab, |
| 110 | MY_BITMAP *field_set, |
| 111 | uint *field_cnt, |
| 112 | CACHE_FIELD **descr, |
| 113 | uint *field_ptr_cnt, |
| 114 | CACHE_FIELD ***descr_ptr) |
| 115 | { |
| 116 | Field **fld_ptr; |
| 117 | uint len= 0; |
| 118 | CACHE_FIELD *copy= *descr; |
| 119 | CACHE_FIELD **copy_ptr= *descr_ptr; |
| 120 | uint used_fields= bitmap_bits_set(field_set); |
| 121 | for (fld_ptr= tab->table->field; used_fields; fld_ptr++) |
| 122 | { |
| 123 | if (bitmap_is_set(field_set, (*fld_ptr)->field_index)) |
| 124 | { |
| 125 | len+= (*fld_ptr)->fill_cache_field(copy); |
| 126 | if (copy->type == CACHE_BLOB) |
| 127 | { |
| 128 | *copy_ptr= copy; |
| 129 | copy_ptr++; |
| 130 | (*field_ptr_cnt)++; |
| 131 | } |
| 132 | copy->field= *fld_ptr; |
| 133 | copy->referenced_field_no= 0; |
| 134 | copy++; |
| 135 | (*field_cnt)++; |
| 136 | used_fields--; |
| 137 | } |
| 138 | } |
| 139 | *descr= copy; |
| 140 | *descr_ptr= copy_ptr; |
| 141 | return len; |
| 142 | } |
| 143 | |
| 144 | /* |
| 145 | Determine different counters of fields associated with a record in the cache |
| 146 | |
| 147 | SYNOPSIS |
| 148 | calc_record_fields() |
| 149 | |
| 150 | DESCRIPTION |
| 151 | The function counts the number of total fields stored in a record |
| 152 | of the cache and saves this number in the 'fields' member. It also |
| 153 | determines the number of flag fields and the number of blobs. |
| 154 | The function sets 'with_match_flag' on if 'join_tab' needs a match flag |
| 155 | i.e. if it is the first inner table of an outer join or a semi-join. |
| 156 | |
| 157 | RETURN VALUE |
| 158 | none |
| 159 | */ |
| 160 | |
| 161 | void JOIN_CACHE::calc_record_fields() |
| 162 | { |
| 163 | JOIN_TAB *tab; |
| 164 | |
| 165 | if (prev_cache) |
| 166 | tab= prev_cache->join_tab; |
| 167 | else |
| 168 | { |
| 169 | if (join_tab->bush_root_tab) |
| 170 | { |
| 171 | /* |
| 172 | --ot1--SJM1--------------ot2--... |
| 173 | | |
| 174 | | |
| 175 | +-it1--...--itN |
| 176 | ^____________ this->join_tab is somewhere here, |
| 177 | inside an sjm nest. |
| 178 | |
| 179 | The join buffer should store the values of it1.*, it2.*, .. |
| 180 | It should not store values of ot1.*. |
| 181 | */ |
| 182 | tab= join_tab->bush_root_tab->bush_children->start; |
| 183 | } |
| 184 | else |
| 185 | { |
| 186 | /* |
| 187 | -ot1--ot2--SJM1--SJM2--------------ot3--...--otN |
| 188 | | | ^ |
| 189 | | +-it21--...--it2N | |
| 190 | | \-- we're somewhere here, |
| 191 | +-it11--...--it1N at the top level |
| 192 | |
| 193 | The join buffer should store the values of |
| 194 | |
| 195 | ot1.*, ot2.*, it1{i}, it2{j}.*, ot3.*, ... |
| 196 | |
| 197 | that is, we should start from the first non-const top-level table. |
| 198 | |
| 199 | We will need to store columns of SJ-inner tables (it_X_Y.*), but we're |
| 200 | not interested in storing the columns of materialization tables |
| 201 | themselves. Beause of that, if the first non-const top-level table is a |
| 202 | materialized table, we move to its bush_children: |
| 203 | */ |
| 204 | tab= join->join_tab + join->const_tables; |
| 205 | if (tab->bush_children) |
| 206 | tab= tab->bush_children->start; |
| 207 | } |
| 208 | } |
| 209 | DBUG_ASSERT(!tab->bush_children); |
| 210 | |
| 211 | start_tab= tab; |
| 212 | fields= 0; |
| 213 | blobs= 0; |
| 214 | flag_fields= 0; |
| 215 | data_field_count= 0; |
| 216 | data_field_ptr_count= 0; |
| 217 | referenced_fields= 0; |
| 218 | |
| 219 | /* |
| 220 | The following loop will get inside SJM nests, because data may be unpacked |
| 221 | to sjm-inner tables. |
| 222 | */ |
| 223 | for (; tab != join_tab ; tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
| 224 | { |
| 225 | tab->calc_used_field_length(FALSE); |
| 226 | flag_fields+= MY_TEST(tab->used_null_fields || tab->used_uneven_bit_fields); |
| 227 | flag_fields+= MY_TEST(tab->table->maybe_null); |
| 228 | fields+= tab->used_fields; |
| 229 | blobs+= tab->used_blobs; |
| 230 | } |
| 231 | if ((with_match_flag= join_tab->use_match_flag())) |
| 232 | flag_fields++; |
| 233 | fields+= flag_fields; |
| 234 | } |
| 235 | |
| 236 | |
| 237 | /* |
| 238 | Collect information on join key arguments |
| 239 | |
| 240 | SYNOPSIS |
| 241 | collect_info_on_key_args() |
| 242 | |
| 243 | DESCRIPTION |
| 244 | The function traverses the ref expressions that are used to access the |
| 245 | joined table join_tab. For each table 'tab' whose fields are to be stored |
| 246 | in the join buffer of the cache the function finds the fields from 'tab' |
| 247 | that occur in the ref expressions and marks these fields in the bitmap |
| 248 | tab->table->tmp_set. The function counts the number of them stored |
| 249 | in this cache and the total number of them stored in the previous caches |
| 250 | and saves the results of the counting in 'local_key_arg_fields' and |
| 251 | 'external_key_arg_fields' respectively. |
| 252 | |
| 253 | NOTES |
| 254 | The function does not do anything if no key is used to join the records |
| 255 | from join_tab. |
| 256 | |
| 257 | RETURN VALUE |
| 258 | none |
| 259 | */ |
| 260 | |
| 261 | void JOIN_CACHE::collect_info_on_key_args() |
| 262 | { |
| 263 | JOIN_TAB *tab; |
| 264 | JOIN_CACHE *cache; |
| 265 | local_key_arg_fields= 0; |
| 266 | external_key_arg_fields= 0; |
| 267 | |
| 268 | if (!is_key_access()) |
| 269 | return; |
| 270 | |
| 271 | TABLE_REF *ref= &join_tab->ref; |
| 272 | cache= this; |
| 273 | do |
| 274 | { |
| 275 | for (tab= cache->start_tab; tab != cache->join_tab; |
| 276 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
| 277 | { |
| 278 | uint key_args; |
| 279 | bitmap_clear_all(&tab->table->tmp_set); |
| 280 | for (uint i= 0; i < ref->key_parts; i++) |
| 281 | { |
| 282 | Item *ref_item= ref->items[i]; |
| 283 | if (!(tab->table->map & ref_item->used_tables())) |
| 284 | continue; |
| 285 | ref_item->walk(&Item::add_field_to_set_processor, 1, tab->table); |
| 286 | } |
| 287 | if ((key_args= bitmap_bits_set(&tab->table->tmp_set))) |
| 288 | { |
| 289 | if (cache == this) |
| 290 | local_key_arg_fields+= key_args; |
| 291 | else |
| 292 | external_key_arg_fields+= key_args; |
| 293 | } |
| 294 | } |
| 295 | cache= cache->prev_cache; |
| 296 | } |
| 297 | while (cache); |
| 298 | |
| 299 | return; |
| 300 | } |
| 301 | |
| 302 | |
| 303 | /* |
| 304 | Allocate memory for descriptors and pointers to them associated with the cache |
| 305 | |
| 306 | SYNOPSIS |
| 307 | alloc_fields() |
| 308 | |
| 309 | DESCRIPTION |
| 310 | The function allocates memory for the array of fields descriptors |
| 311 | and the array of pointers to the field descriptors used to copy |
| 312 | join record data from record buffers into the join buffer and |
| 313 | backward. Some pointers refer to the field descriptor associated |
| 314 | with previous caches. They are placed at the beginning of the array |
| 315 | of pointers and its total number is stored in external_key_arg_fields. |
| 316 | The pointer of the first array is assigned to field_descr and the number |
| 317 | of the elements in it is precalculated by the function calc_record_fields. |
| 318 | The allocated arrays are adjacent. |
| 319 | |
| 320 | NOTES |
| 321 | The memory is allocated in join->thd->mem_root |
| 322 | |
| 323 | RETURN VALUE |
| 324 | pointer to the first array |
| 325 | */ |
| 326 | |
| 327 | int JOIN_CACHE::alloc_fields() |
| 328 | { |
| 329 | uint ptr_cnt= external_key_arg_fields+blobs+1; |
| 330 | uint fields_size= sizeof(CACHE_FIELD)*fields; |
| 331 | field_descr= (CACHE_FIELD*) join->thd->alloc(fields_size + |
| 332 | sizeof(CACHE_FIELD*)*ptr_cnt); |
| 333 | blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size); |
| 334 | return (field_descr == NULL); |
| 335 | } |
| 336 | |
| 337 | |
| 338 | /* |
| 339 | Create descriptors of the record flag fields stored in the join buffer |
| 340 | |
| 341 | SYNOPSIS |
| 342 | create_flag_fields() |
| 343 | |
| 344 | DESCRIPTION |
| 345 | The function creates descriptors of the record flag fields stored |
| 346 | in the join buffer. These are descriptors for: |
| 347 | - an optional match flag field, |
| 348 | - table null bitmap fields, |
| 349 | - table null row fields. |
| 350 | The match flag field is created when 'join_tab' is the first inner |
| 351 | table of an outer join our a semi-join. A null bitmap field is |
| 352 | created for any table whose fields are to be stored in the join |
| 353 | buffer if at least one of these fields is nullable or is a BIT field |
| 354 | whose bits are partially stored with null bits. A null row flag |
| 355 | is created for any table assigned to the cache if it is an inner |
| 356 | table of an outer join. |
| 357 | The descriptor for flag fields are placed one after another at the |
| 358 | beginning of the array of field descriptors 'field_descr' that |
| 359 | contains 'fields' elements. If there is a match flag field the |
| 360 | descriptor for it is always first in the sequence of flag fields. |
| 361 | The descriptors for other flag fields can follow in an arbitrary |
| 362 | order. |
| 363 | The flag field values follow in a record stored in the join buffer |
| 364 | in the same order as field descriptors, with the match flag always |
| 365 | following first. |
| 366 | The function sets the value of 'flag_fields' to the total number |
| 367 | of the descriptors created for the flag fields. |
| 368 | The function sets the value of 'length' to the total length of the |
| 369 | flag fields. |
| 370 | |
| 371 | RETURN VALUE |
| 372 | none |
| 373 | */ |
| 374 | |
| 375 | void JOIN_CACHE::create_flag_fields() |
| 376 | { |
| 377 | CACHE_FIELD *copy; |
| 378 | JOIN_TAB *tab; |
| 379 | |
| 380 | copy= field_descr; |
| 381 | |
| 382 | length=0; |
| 383 | |
| 384 | /* If there is a match flag the first field is always used for this flag */ |
| 385 | if (with_match_flag) |
| 386 | length+= add_flag_field_to_join_cache((uchar*) &join_tab->found, |
| 387 | sizeof(join_tab->found), |
| 388 | ©); |
| 389 | |
| 390 | /* Create fields for all null bitmaps and null row flags that are needed */ |
| 391 | for (tab= start_tab; tab != join_tab; |
| 392 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
| 393 | { |
| 394 | TABLE *table= tab->table; |
| 395 | |
| 396 | /* Create a field for the null bitmap from table if needed */ |
| 397 | if (tab->used_null_fields || tab->used_uneven_bit_fields) |
| 398 | length+= add_flag_field_to_join_cache(table->null_flags, |
| 399 | table->s->null_bytes, |
| 400 | ©); |
| 401 | |
| 402 | /* Create table for the null row flag if needed */ |
| 403 | if (table->maybe_null) |
| 404 | length+= add_flag_field_to_join_cache((uchar*) &table->null_row, |
| 405 | sizeof(table->null_row), |
| 406 | ©); |
| 407 | } |
| 408 | |
| 409 | /* Theoretically the new value of flag_fields can be less than the old one */ |
| 410 | flag_fields= (uint)(copy-field_descr); |
| 411 | } |
| 412 | |
| 413 | |
| 414 | /* |
| 415 | Create descriptors of the fields used to build access keys to the joined table |
| 416 | |
| 417 | SYNOPSIS |
| 418 | create_key_arg_fields() |
| 419 | |
| 420 | DESCRIPTION |
| 421 | The function creates descriptors of the record fields stored in the join |
| 422 | buffer that are used to build access keys to the joined table. These |
| 423 | fields are put into the buffer ahead of other records fields stored in |
| 424 | the buffer. Such placement helps to optimize construction of access keys. |
| 425 | For each field that is used to build access keys to the joined table but |
| 426 | is stored in some other join cache buffer the function saves a pointer |
| 427 | to the the field descriptor. The array of such pointers are placed in the |
| 428 | the join cache structure just before the array of pointers to the |
| 429 | blob fields blob_ptr. |
| 430 | Any field stored in a join cache buffer that is used to construct keys |
| 431 | to access tables associated with other join caches is called a referenced |
| 432 | field. It receives a unique number that is saved by the function in the |
| 433 | member 'referenced_field_no' of the CACHE_FIELD descriptor for the field. |
| 434 | This number is used as index to the array of offsets to the referenced |
| 435 | fields that are saved and put in the join cache buffer after all record |
| 436 | fields. |
| 437 | The function also finds out whether that the keys to access join_tab |
| 438 | can be considered as embedded and, if so, sets the flag 'use_emb_key' in |
| 439 | this join cache appropriately. |
| 440 | |
| 441 | NOTES. |
| 442 | When a key to access the joined table 'join_tab' is constructed the array |
| 443 | of pointers to the field descriptors for the external fields is looked |
| 444 | through. For each of this pointers we find out in what previous key cache |
| 445 | the referenced field is stored. The value of 'referenced_field_no' |
| 446 | provides us with the index into the array of offsets for referenced |
| 447 | fields stored in the join cache. The offset read by the the index allows |
| 448 | us to read the field without reading all other fields of the record |
| 449 | stored the join cache buffer. This optimizes the construction of keys |
| 450 | to access 'join_tab' when some key arguments are stored in the previous |
| 451 | join caches. |
| 452 | |
| 453 | NOTES |
| 454 | The function does not do anything if no key is used to join the records |
| 455 | from join_tab. |
| 456 | |
| 457 | RETURN VALUE |
| 458 | none |
| 459 | */ |
| 460 | void JOIN_CACHE::create_key_arg_fields() |
| 461 | { |
| 462 | JOIN_TAB *tab; |
| 463 | JOIN_CACHE *cache; |
| 464 | |
| 465 | if (!is_key_access()) |
| 466 | return; |
| 467 | |
| 468 | /* |
| 469 | Save pointers to the cache fields in previous caches |
| 470 | that are used to build keys for this key access. |
| 471 | */ |
| 472 | cache= this; |
| 473 | uint ext_key_arg_cnt= external_key_arg_fields; |
| 474 | CACHE_FIELD *copy; |
| 475 | CACHE_FIELD **copy_ptr= blob_ptr; |
| 476 | while (ext_key_arg_cnt) |
| 477 | { |
| 478 | cache= cache->prev_cache; |
| 479 | for (tab= cache->start_tab; tab != cache->join_tab; |
| 480 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
| 481 | { |
| 482 | CACHE_FIELD *copy_end; |
| 483 | MY_BITMAP *key_read_set= &tab->table->tmp_set; |
| 484 | /* key_read_set contains the bitmap of tab's fields referenced by ref */ |
| 485 | if (bitmap_is_clear_all(key_read_set)) |
| 486 | continue; |
| 487 | copy_end= cache->field_descr+cache->fields; |
| 488 | for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++) |
| 489 | { |
| 490 | /* |
| 491 | (1) - when we store rowids for DuplicateWeedout, they have |
| 492 | copy->field==NULL |
| 493 | */ |
| 494 | if (copy->field && // (1) |
| 495 | copy->field->table == tab->table && |
| 496 | bitmap_is_set(key_read_set, copy->field->field_index)) |
| 497 | { |
| 498 | *copy_ptr++= copy; |
| 499 | ext_key_arg_cnt--; |
| 500 | if (!copy->referenced_field_no) |
| 501 | { |
| 502 | /* |
| 503 | Register the referenced field 'copy': |
| 504 | - set the offset number in copy->referenced_field_no, |
| 505 | - adjust the value of the flag 'with_length', |
| 506 | - adjust the values of 'pack_length' and |
| 507 | of 'pack_length_with_blob_ptrs'. |
| 508 | */ |
| 509 | copy->referenced_field_no= ++cache->referenced_fields; |
| 510 | if (!cache->with_length) |
| 511 | { |
| 512 | cache->with_length= TRUE; |
| 513 | uint sz= cache->get_size_of_rec_length(); |
| 514 | cache->base_prefix_length+= sz; |
| 515 | cache->pack_length+= sz; |
| 516 | cache->pack_length_with_blob_ptrs+= sz; |
| 517 | } |
| 518 | cache->pack_length+= cache->get_size_of_fld_offset(); |
| 519 | cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset(); |
| 520 | } |
| 521 | } |
| 522 | } |
| 523 | } |
| 524 | } |
| 525 | /* After this 'blob_ptr' shall not be be changed */ |
| 526 | blob_ptr= copy_ptr; |
| 527 | |
| 528 | /* Now create local fields that are used to build ref for this key access */ |
| 529 | copy= field_descr+flag_fields; |
| 530 | for (tab= start_tab; tab != join_tab; |
| 531 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
| 532 | { |
| 533 | length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set, |
| 534 | &data_field_count, ©, |
| 535 | &data_field_ptr_count, |
| 536 | ©_ptr); |
| 537 | } |
| 538 | |
| 539 | use_emb_key= check_emb_key_usage(); |
| 540 | |
| 541 | return; |
| 542 | } |
| 543 | |
| 544 | |
| 545 | /* |
| 546 | Create descriptors of all remaining data fields stored in the join buffer |
| 547 | |
| 548 | SYNOPSIS |
| 549 | create_remaining_fields() |
| 550 | |
| 551 | DESCRIPTION |
| 552 | The function creates descriptors for all remaining data fields of a |
| 553 | record from the join buffer. If the value returned by is_key_access() is |
| 554 | false the function creates fields for all read record fields that |
| 555 | comprise the partial join record joined with join_tab. Otherwise, |
| 556 | for each table tab, the set of the read fields for which the descriptors |
| 557 | have to be added is determined as the difference between all read fields |
| 558 | and and those for which the descriptors have been already created. |
| 559 | The latter are supposed to be marked in the bitmap tab->table->tmp_set. |
| 560 | The function increases the value of 'length' to the the total length of |
| 561 | the added fields. |
| 562 | |
| 563 | NOTES |
| 564 | If is_key_access() returns true the function modifies the value of |
| 565 | tab->table->tmp_set for a each table whose fields are stored in the cache. |
| 566 | The function calls the method Field::fill_cache_field to figure out |
| 567 | the type of the cache field and the maximal length of its representation |
| 568 | in the join buffer. If this is a blob field then additionally a pointer |
| 569 | to this field is added as an element of the array blob_ptr. For a blob |
| 570 | field only the size of the length of the blob data is taken into account. |
| 571 | It is assumed that 'data_field_count' contains the number of descriptors |
| 572 | for data fields that have been already created and 'data_field_ptr_count' |
| 573 | contains the number of the pointers to such descriptors having been |
| 574 | stored up to the moment. |
| 575 | |
| 576 | RETURN VALUE |
| 577 | none |
| 578 | */ |
| 579 | |
| 580 | void JOIN_CACHE::create_remaining_fields() |
| 581 | { |
| 582 | JOIN_TAB *tab; |
| 583 | bool all_read_fields= !is_key_access(); |
| 584 | CACHE_FIELD *copy= field_descr+flag_fields+data_field_count; |
| 585 | CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count; |
| 586 | |
| 587 | for (tab= start_tab; tab != join_tab; |
| 588 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
| 589 | { |
| 590 | MY_BITMAP *rem_field_set; |
| 591 | TABLE *table= tab->table; |
| 592 | |
| 593 | if (all_read_fields) |
| 594 | rem_field_set= table->read_set; |
| 595 | else |
| 596 | { |
| 597 | bitmap_invert(&table->tmp_set); |
| 598 | bitmap_intersect(&table->tmp_set, table->read_set); |
| 599 | rem_field_set= &table->tmp_set; |
| 600 | } |
| 601 | |
| 602 | length+= add_table_data_fields_to_join_cache(tab, rem_field_set, |
| 603 | &data_field_count, ©, |
| 604 | &data_field_ptr_count, |
| 605 | ©_ptr); |
| 606 | |
| 607 | /* SemiJoinDuplicateElimination: allocate space for rowid if needed */ |
| 608 | if (tab->keep_current_rowid) |
| 609 | { |
| 610 | copy->str= table->file->ref; |
| 611 | if (copy->str) |
| 612 | copy->length= table->file->ref_length; |
| 613 | else |
| 614 | { |
| 615 | /* This may happen only for materialized derived tables and views */ |
| 616 | copy->length= 0; |
| 617 | copy->str= (uchar *) table; |
| 618 | } |
| 619 | copy->type= CACHE_ROWID; |
| 620 | copy->field= 0; |
| 621 | copy->referenced_field_no= 0; |
| 622 | /* |
| 623 | Note: this may seem odd, but at this point we have |
| 624 | table->file->ref==NULL while table->file->ref_length is already set |
| 625 | to correct value. |
| 626 | */ |
| 627 | length += table->file->ref_length; |
| 628 | data_field_count++; |
| 629 | copy++; |
| 630 | } |
| 631 | } |
| 632 | } |
| 633 | |
| 634 | |
| 635 | |
| 636 | /* |
| 637 | Calculate and set all cache constants |
| 638 | |
| 639 | SYNOPSIS |
| 640 | set_constants() |
| 641 | |
| 642 | DESCRIPTION |
| 643 | The function calculates and set all precomputed constants that are used |
| 644 | when writing records into the join buffer and reading them from it. |
| 645 | It calculates the size of offsets of a record within the join buffer |
| 646 | and of a field within a record. It also calculates the number of bytes |
| 647 | used to store record lengths. |
| 648 | The function also calculates the maximal length of the representation |
| 649 | of record in the cache excluding blob_data. This value is used when |
| 650 | making a dicision whether more records should be added into the join |
| 651 | buffer or not. |
| 652 | |
| 653 | RETURN VALUE |
| 654 | none |
| 655 | */ |
| 656 | |
| 657 | void JOIN_CACHE::set_constants() |
| 658 | { |
| 659 | /* |
| 660 | Any record from a BKA cache is prepended with the record length. |
| 661 | We use the record length when reading the buffer and building key values |
| 662 | for each record. The length allows us not to read the fields that are |
| 663 | not needed for keys. |
| 664 | If a record has match flag it also may be skipped when the match flag |
| 665 | is on. It happens if the cache is used for a semi-join operation or |
| 666 | for outer join when the 'not exist' optimization can be applied. |
| 667 | If some of the fields are referenced from other caches then |
| 668 | the record length allows us to easily reach the saved offsets for |
| 669 | these fields since the offsets are stored at the very end of the record. |
| 670 | However at this moment we don't know whether we have referenced fields for |
| 671 | the cache or not. Later when a referenced field is registered for the cache |
| 672 | we adjust the value of the flag 'with_length'. |
| 673 | */ |
| 674 | with_length= is_key_access() || |
| 675 | join_tab->is_inner_table_of_semi_join_with_first_match() || |
| 676 | join_tab->is_inner_table_of_outer_join(); |
| 677 | /* |
| 678 | At this moment we don't know yet the value of 'referenced_fields', |
| 679 | but in any case it can't be greater than the value of 'fields'. |
| 680 | */ |
| 681 | uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) + |
| 682 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + |
| 683 | sizeof(ulong); |
| 684 | /* |
| 685 | The values of size_of_rec_ofs, size_of_rec_len, size_of_fld_ofs, |
| 686 | base_prefix_length, pack_length, pack_length_with_blob_ptrs |
| 687 | will be recalculated later in this function when we get the estimate |
| 688 | for the actual value of the join buffer size. |
| 689 | */ |
| 690 | size_of_rec_ofs= size_of_rec_len= size_of_fld_ofs= 4; |
| 691 | base_prefix_length= (with_length ? size_of_rec_len : 0) + |
| 692 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); |
| 693 | pack_length= (with_length ? size_of_rec_len : 0) + |
| 694 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + |
| 695 | length + fields*sizeof(uint); |
| 696 | pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *); |
| 697 | min_buff_size= 0; |
| 698 | min_records= 1; |
| 699 | buff_size= (size_t)MY_MAX(join->thd->variables.join_buff_size, |
| 700 | get_min_join_buffer_size()); |
| 701 | size_of_rec_ofs= offset_size(buff_size); |
| 702 | size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len); |
| 703 | size_of_fld_ofs= size_of_rec_len; |
| 704 | base_prefix_length= (with_length ? size_of_rec_len : 0) + |
| 705 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); |
| 706 | /* |
| 707 | The size of the offsets for referenced fields will be added later. |
| 708 | The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted |
| 709 | every time when the first reference to the referenced field is registered. |
| 710 | */ |
| 711 | pack_length= (with_length ? size_of_rec_len : 0) + |
| 712 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + |
| 713 | length; |
| 714 | pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *); |
| 715 | } |
| 716 | |
| 717 | |
| 718 | /* |
| 719 | Get maximum total length of all affixes of a record in the join cache buffer |
| 720 | |
| 721 | SYNOPSIS |
| 722 | get_record_max_affix_length() |
| 723 | |
| 724 | DESCRIPTION |
| 725 | The function calculates the maximum possible total length of all affixes |
| 726 | of a record in the join cache buffer, that is made of: |
| 727 | - the length of all prefixes used in this cache, |
| 728 | - the length of the match flag if it's needed |
| 729 | - the total length of the maximum possible offsets to the fields of |
| 730 | a record in the buffer. |
| 731 | |
| 732 | RETURN VALUE |
| 733 | The maximum total length of all affixes of a record in the join buffer |
| 734 | */ |
| 735 | |
| 736 | uint JOIN_CACHE::get_record_max_affix_length() |
| 737 | { |
| 738 | uint len= get_prefix_length() + |
| 739 | MY_TEST(with_match_flag) + |
| 740 | size_of_fld_ofs * data_field_count; |
| 741 | return len; |
| 742 | } |
| 743 | |
| 744 | |
| 745 | /* |
| 746 | Get the minimum possible size of the cache join buffer |
| 747 | |
| 748 | SYNOPSIS |
| 749 | get_min_join_buffer_size() |
| 750 | |
| 751 | DESCRIPTION |
| 752 | At the first its invocation for the cache the function calculates the |
| 753 | minimum possible size of the join buffer of the cache. This value depends |
| 754 | on the minimal number of records 'min_records' to be stored in the join |
| 755 | buffer. The number is supposed to be determined by the procedure that |
| 756 | chooses the best access path to the joined table join_tab in the execution |
| 757 | plan. After the calculation of the interesting size the function saves it |
| 758 | in the field 'min_buff_size' in order to use it directly at the next |
| 759 | invocations of the function. |
| 760 | |
| 761 | NOTES |
| 762 | Currently the number of minimal records is just set to 1. |
| 763 | |
| 764 | RETURN VALUE |
| 765 | The minimal possible size of the join buffer of this cache |
| 766 | */ |
| 767 | |
| 768 | size_t JOIN_CACHE::get_min_join_buffer_size() |
| 769 | { |
| 770 | if (!min_buff_size) |
| 771 | { |
| 772 | size_t len= 0; |
| 773 | size_t len_last= 0; |
| 774 | for (JOIN_TAB *tab= start_tab; tab != join_tab; |
| 775 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
| 776 | { |
| 777 | len+= tab->get_max_used_fieldlength(); |
| 778 | len_last+= tab->get_used_fieldlength(); |
| 779 | } |
| 780 | size_t len_addon= get_record_max_affix_length() + |
| 781 | get_max_key_addon_space_per_record(); |
| 782 | len+= len_addon; |
| 783 | len_last+= len_addon; |
| 784 | size_t min_sz= len*(min_records-1) + len_last; |
| 785 | min_sz+= pack_length_with_blob_ptrs; |
| 786 | size_t add_sz= 0; |
| 787 | for (uint i=0; i < min_records; i++) |
| 788 | add_sz+= join_tab_scan->aux_buffer_incr(i+1); |
| 789 | avg_aux_buffer_incr= add_sz/min_records; |
| 790 | min_sz+= add_sz; |
| 791 | set_if_bigger(min_sz, 1); |
| 792 | min_buff_size= min_sz; |
| 793 | } |
| 794 | return min_buff_size; |
| 795 | } |
| 796 | |
| 797 | |
| 798 | /* |
| 799 | Get the maximum possible size of the cache join buffer |
| 800 | |
| 801 | SYNOPSIS |
| 802 | get_max_join_buffer_size() |
| 803 | |
| 804 | optimize_buff_size FALSE <-> do not take more memory than needed for |
| 805 | the estimated number of records in the partial join |
| 806 | |
| 807 | DESCRIPTION |
| 808 | At the first its invocation for the cache the function calculates the |
| 809 | maximum possible size of join buffer for the cache. If the parameter |
| 810 | optimize_buff_size true then this value does not exceed the size of the |
| 811 | space needed for the estimated number of records 'max_records' in the |
| 812 | partial join that joins tables from the first one through join_tab. This |
| 813 | value is also capped off by the value of join_tab->join_buffer_size_limit, |
| 814 | if it has been set a to non-zero value, and by the value of the system |
| 815 | parameter join_buffer_size - otherwise. After the calculation of the |
| 816 | interesting size the function saves the value in the field 'max_buff_size' |
| 817 | in order to use it directly at the next invocations of the function. |
| 818 | |
| 819 | NOTES |
| 820 | Currently the value of join_tab->join_buffer_size_limit is initialized |
| 821 | to 0 and is never reset. |
| 822 | |
| 823 | RETURN VALUE |
| 824 | The maximum possible size of the join buffer of this cache |
| 825 | */ |
| 826 | |
| 827 | size_t JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size) |
| 828 | { |
| 829 | if (!max_buff_size) |
| 830 | { |
| 831 | size_t max_sz; |
| 832 | size_t min_sz= get_min_join_buffer_size(); |
| 833 | size_t len= 0; |
| 834 | for (JOIN_TAB *tab= start_tab; tab != join_tab; |
| 835 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
| 836 | { |
| 837 | len+= tab->get_used_fieldlength(); |
| 838 | } |
| 839 | len+= get_record_max_affix_length(); |
| 840 | avg_record_length= len; |
| 841 | len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr; |
| 842 | space_per_record= len; |
| 843 | |
| 844 | size_t limit_sz= (size_t)join->thd->variables.join_buff_size; |
| 845 | if (join_tab->join_buffer_size_limit) |
| 846 | set_if_smaller(limit_sz, join_tab->join_buffer_size_limit); |
| 847 | if (!optimize_buff_size) |
| 848 | max_sz= limit_sz; |
| 849 | else |
| 850 | { |
| 851 | if (limit_sz / max_records > space_per_record) |
| 852 | max_sz= space_per_record * max_records; |
| 853 | else |
| 854 | max_sz= limit_sz; |
| 855 | max_sz+= pack_length_with_blob_ptrs; |
| 856 | set_if_smaller(max_sz, limit_sz); |
| 857 | } |
| 858 | set_if_bigger(max_sz, min_sz); |
| 859 | max_buff_size= max_sz; |
| 860 | } |
| 861 | return max_buff_size; |
| 862 | } |
| 863 | |
| 864 | |
| 865 | /* |
| 866 | Allocate memory for a join buffer |
| 867 | |
| 868 | SYNOPSIS |
| 869 | alloc_buffer() |
| 870 | |
| 871 | DESCRIPTION |
| 872 | The function allocates a lump of memory for the cache join buffer. |
| 873 | Initially the function sets the size of the buffer buff_size equal to |
| 874 | the value returned by get_max_join_buffer_size(). If the total size of |
| 875 | the space intended to be used for the join buffers employed by the |
| 876 | tables from the first one through join_tab exceeds the value of the |
| 877 | system parameter join_buff_space_limit, then the function first tries |
| 878 | to shrink the used buffers to make the occupied space fit the maximum |
| 879 | memory allowed to be used for all join buffers in total. After |
| 880 | this the function tries to allocate a join buffer for join_tab. |
| 881 | If it fails to do so, it decrements the requested size of the join |
| 882 | buffer, shrinks proportionally the join buffers used for the previous |
| 883 | tables and tries to allocate a buffer for join_tab. In the case of a |
| 884 | failure the function repeats its attempts with smaller and smaller |
| 885 | requested sizes of the buffer, but not more than 4 times. |
| 886 | |
| 887 | RETURN VALUE |
| 888 | 0 if the memory has been successfully allocated |
| 889 | 1 otherwise |
| 890 | */ |
| 891 | |
| 892 | int JOIN_CACHE::alloc_buffer() |
| 893 | { |
| 894 | JOIN_TAB *tab; |
| 895 | JOIN_CACHE *cache; |
| 896 | ulonglong curr_buff_space_sz= 0; |
| 897 | ulonglong curr_min_buff_space_sz= 0; |
| 898 | ulonglong join_buff_space_limit= |
| 899 | join->thd->variables.join_buff_space_limit; |
| 900 | bool optimize_buff_size= |
| 901 | optimizer_flag(join->thd, OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE); |
| 902 | double partial_join_cardinality= (join_tab-1)->get_partial_join_cardinality(); |
| 903 | buff= NULL; |
| 904 | min_buff_size= 0; |
| 905 | max_buff_size= 0; |
| 906 | min_records= 1; |
| 907 | max_records= (size_t) (partial_join_cardinality <= join_buff_space_limit ? |
| 908 | (ulonglong) partial_join_cardinality : join_buff_space_limit); |
| 909 | set_if_bigger(max_records, 10); |
| 910 | min_buff_size= get_min_join_buffer_size(); |
| 911 | buff_size= get_max_join_buffer_size(optimize_buff_size); |
| 912 | |
| 913 | for (tab= start_tab; tab!= join_tab; |
| 914 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
| 915 | { |
| 916 | cache= tab->cache; |
| 917 | if (cache) |
| 918 | { |
| 919 | curr_min_buff_space_sz+= cache->get_min_join_buffer_size(); |
| 920 | curr_buff_space_sz+= cache->get_join_buffer_size(); |
| 921 | } |
| 922 | } |
| 923 | curr_min_buff_space_sz+= min_buff_size; |
| 924 | curr_buff_space_sz+= buff_size; |
| 925 | |
| 926 | if (curr_min_buff_space_sz > join_buff_space_limit || |
| 927 | (curr_buff_space_sz > join_buff_space_limit && |
| 928 | (!optimize_buff_size || |
| 929 | join->shrink_join_buffers(join_tab, curr_buff_space_sz, |
| 930 | join_buff_space_limit)))) |
| 931 | goto fail; |
| 932 | |
| 933 | if (for_explain_only) |
| 934 | return 0; |
| 935 | |
| 936 | for (size_t buff_size_decr= (buff_size-min_buff_size)/4 + 1; ; ) |
| 937 | { |
| 938 | size_t next_buff_size; |
| 939 | |
| 940 | if ((buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC)))) |
| 941 | break; |
| 942 | |
| 943 | next_buff_size= buff_size > buff_size_decr ? buff_size-buff_size_decr : 0; |
| 944 | if (next_buff_size < min_buff_size || |
| 945 | join->shrink_join_buffers(join_tab, curr_buff_space_sz, |
| 946 | curr_buff_space_sz-buff_size_decr)) |
| 947 | goto fail; |
| 948 | buff_size= next_buff_size; |
| 949 | |
| 950 | curr_buff_space_sz= 0; |
| 951 | for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++) |
| 952 | { |
| 953 | cache= tab->cache; |
| 954 | if (cache) |
| 955 | curr_buff_space_sz+= cache->get_join_buffer_size(); |
| 956 | } |
| 957 | } |
| 958 | return 0; |
| 959 | |
| 960 | fail: |
| 961 | buff_size= 0; |
| 962 | return 1; |
| 963 | } |
| 964 | |
| 965 | |
| 966 | /* |
| 967 | Shrink the size if the cache join buffer in a given ratio |
| 968 | |
| 969 | SYNOPSIS |
| 970 | shrink_join_buffer_in_ratio() |
| 971 | n nominator of the ratio to shrink the buffer in |
| 972 | d denominator if the ratio |
| 973 | |
| 974 | DESCRIPTION |
| 975 | The function first deallocates the join buffer of the cache. Then |
| 976 | it allocates a buffer that is (n/d) times smaller. |
| 977 | |
| 978 | RETURN VALUE |
| 979 | FALSE on success with allocation of the smaller join buffer |
| 980 | TRUE otherwise |
| 981 | */ |
| 982 | |
| 983 | bool JOIN_CACHE::shrink_join_buffer_in_ratio(ulonglong n, ulonglong d) |
| 984 | { |
| 985 | size_t next_buff_size; |
| 986 | if (n < d) |
| 987 | return FALSE; |
| 988 | next_buff_size= (size_t) ((double) buff_size / n * d); |
| 989 | set_if_bigger(next_buff_size, min_buff_size); |
| 990 | buff_size= next_buff_size; |
| 991 | return realloc_buffer(); |
| 992 | } |
| 993 | |
| 994 | |
| 995 | /* |
| 996 | Reallocate the join buffer of a join cache |
| 997 | |
| 998 | SYNOPSIS |
| 999 | realloc_buffer() |
| 1000 | |
| 1001 | DESCRITION |
| 1002 | The function reallocates the join buffer of the join cache. After this |
| 1003 | it resets the buffer for writing. |
| 1004 | |
| 1005 | NOTES |
| 1006 | The function assumes that buff_size contains the new value for the join |
| 1007 | buffer size. |
| 1008 | |
| 1009 | RETURN VALUE |
| 1010 | 0 if the buffer has been successfully reallocated |
| 1011 | 1 otherwise |
| 1012 | */ |
| 1013 | |
| 1014 | int JOIN_CACHE::realloc_buffer() |
| 1015 | { |
| 1016 | int rc; |
| 1017 | free(); |
| 1018 | rc= MY_TEST(!(buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC)))); |
| 1019 | reset(TRUE); |
| 1020 | return rc; |
| 1021 | } |
| 1022 | |
| 1023 | |
| 1024 | /* |
| 1025 | Initialize a join cache |
| 1026 | |
| 1027 | SYNOPSIS |
| 1028 | init() |
| 1029 | for_explain join buffer is initialized for explain only |
| 1030 | |
| 1031 | DESCRIPTION |
| 1032 | The function initializes the join cache structure. It supposed to be called |
| 1033 | by init methods for classes derived from the JOIN_CACHE. |
| 1034 | The function allocates memory for the join buffer and for descriptors of |
| 1035 | the record fields stored in the buffer. |
| 1036 | |
| 1037 | NOTES |
| 1038 | The code of this function should have been included into the constructor |
| 1039 | code itself. However the new operator for the class JOIN_CACHE would |
| 1040 | never fail while memory allocation for the join buffer is not absolutely |
| 1041 | unlikely to fail. That's why this memory allocation has to be placed in a |
| 1042 | separate function that is called in a couple with a cache constructor. |
| 1043 | It is quite natural to put almost all other constructor actions into |
| 1044 | this function. |
| 1045 | |
| 1046 | RETURN VALUE |
| 1047 | 0 initialization with buffer allocations has been succeeded |
| 1048 | 1 otherwise |
| 1049 | */ |
| 1050 | |
| 1051 | int JOIN_CACHE::init(bool for_explain) |
| 1052 | { |
| 1053 | DBUG_ENTER("JOIN_CACHE::init" ); |
| 1054 | |
| 1055 | for_explain_only= for_explain; |
| 1056 | |
| 1057 | calc_record_fields(); |
| 1058 | |
| 1059 | collect_info_on_key_args(); |
| 1060 | |
| 1061 | if (alloc_fields()) |
| 1062 | DBUG_RETURN(1); |
| 1063 | |
| 1064 | create_flag_fields(); |
| 1065 | |
| 1066 | create_key_arg_fields(); |
| 1067 | |
| 1068 | create_remaining_fields(); |
| 1069 | |
| 1070 | set_constants(); |
| 1071 | |
| 1072 | if (alloc_buffer()) |
| 1073 | DBUG_RETURN(1); |
| 1074 | |
| 1075 | reset(TRUE); |
| 1076 | |
| 1077 | DBUG_RETURN(0); |
| 1078 | } |
| 1079 | |
| 1080 | |
| 1081 | /* |
| 1082 | Check the possibility to read the access keys directly from the join buffer |
| 1083 | SYNOPSIS |
| 1084 | check_emb_key_usage() |
| 1085 | |
| 1086 | DESCRIPTION |
| 1087 | The function checks some conditions at which the key values can be read |
| 1088 | directly from the join buffer. This is possible when the key values can be |
| 1089 | composed by concatenation of the record fields stored in the join buffer. |
| 1090 | Sometimes when the access key is multi-component the function has to re-order |
| 1091 | the fields written into the join buffer to make keys embedded. If key |
| 1092 | values for the key access are detected as embedded then 'use_emb_key' |
| 1093 | is set to TRUE. |
| 1094 | |
| 1095 | EXAMPLE |
| 1096 | Let table t2 has an index defined on the columns a,b . Let's assume also |
| 1097 | that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all |
| 1098 | of the integer type. Then if the query |
| 1099 | SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b |
| 1100 | is executed with a join cache in such a way that t1 is the driving |
| 1101 | table then the key values to access table t2 can be read directly |
| 1102 | from the join buffer. |
| 1103 | |
| 1104 | NOTES |
| 1105 | In some cases key values could be read directly from the join buffer but |
| 1106 | we still do not consider them embedded. In the future we'll expand the |
| 1107 | the class of keys which we identify as embedded. |
| 1108 | |
| 1109 | NOTES |
| 1110 | The function returns FALSE if no key is used to join the records |
| 1111 | from join_tab. |
| 1112 | |
| 1113 | RETURN VALUE |
| 1114 | TRUE key values will be considered as embedded, |
| 1115 | FALSE otherwise. |
| 1116 | */ |
| 1117 | |
| 1118 | bool JOIN_CACHE::check_emb_key_usage() |
| 1119 | { |
| 1120 | |
| 1121 | if (!is_key_access()) |
| 1122 | return FALSE; |
| 1123 | |
| 1124 | uint i; |
| 1125 | Item *item; |
| 1126 | KEY_PART_INFO *key_part; |
| 1127 | CACHE_FIELD *copy; |
| 1128 | CACHE_FIELD *copy_end; |
| 1129 | uint len= 0; |
| 1130 | TABLE_REF *ref= &join_tab->ref; |
| 1131 | KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key); |
| 1132 | |
| 1133 | /* |
| 1134 | If some of the key arguments are not from the local cache the key |
| 1135 | is not considered as embedded. |
| 1136 | TODO: |
| 1137 | Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0. |
| 1138 | */ |
| 1139 | if (external_key_arg_fields != 0) |
| 1140 | return FALSE; |
| 1141 | /* |
| 1142 | If the number of the local key arguments is not equal to the number |
| 1143 | of key parts the key value cannot be read directly from the join buffer. |
| 1144 | */ |
| 1145 | if (local_key_arg_fields != ref->key_parts) |
| 1146 | return FALSE; |
| 1147 | |
| 1148 | /* |
| 1149 | A key is not considered embedded if one of the following is true: |
| 1150 | - one of its key parts is not equal to a field |
| 1151 | - it is a partial key |
| 1152 | - definition of the argument field does not coincide with the |
| 1153 | definition of the corresponding key component |
| 1154 | - some of the key components are nullable |
| 1155 | */ |
| 1156 | for (i=0; i < ref->key_parts; i++) |
| 1157 | { |
| 1158 | item= ref->items[i]->real_item(); |
| 1159 | if (item->type() != Item::FIELD_ITEM) |
| 1160 | return FALSE; |
| 1161 | key_part= keyinfo->key_part+i; |
| 1162 | if (key_part->key_part_flag & HA_PART_KEY_SEG) |
| 1163 | return FALSE; |
| 1164 | if (!key_part->field->eq_def(((Item_field *) item)->field)) |
| 1165 | return FALSE; |
| 1166 | if (key_part->field->maybe_null()) |
| 1167 | return FALSE; |
| 1168 | } |
| 1169 | |
| 1170 | copy= field_descr+flag_fields; |
| 1171 | copy_end= copy+local_key_arg_fields; |
| 1172 | for ( ; copy < copy_end; copy++) |
| 1173 | { |
| 1174 | /* |
| 1175 | If some of the key arguments are of variable length the key |
| 1176 | is not considered as embedded. |
| 1177 | */ |
| 1178 | if (copy->type != 0) |
| 1179 | return FALSE; |
| 1180 | /* |
| 1181 | If some of the key arguments are bit fields whose bits are partially |
| 1182 | stored with null bits the key is not considered as embedded. |
| 1183 | */ |
| 1184 | if (copy->field->type() == MYSQL_TYPE_BIT && |
| 1185 | ((Field_bit*) (copy->field))->bit_len) |
| 1186 | return FALSE; |
| 1187 | len+= copy->length; |
| 1188 | } |
| 1189 | |
| 1190 | emb_key_length= len; |
| 1191 | |
| 1192 | /* |
| 1193 | Make sure that key fields follow the order of the corresponding |
| 1194 | key components these fields are equal to. For this the descriptors |
| 1195 | of the fields that comprise the key might be re-ordered. |
| 1196 | */ |
| 1197 | for (i= 0; i < ref->key_parts; i++) |
| 1198 | { |
| 1199 | uint j; |
| 1200 | Item *item= ref->items[i]->real_item(); |
| 1201 | Field *fld= ((Item_field *) item)->field; |
| 1202 | CACHE_FIELD *init_copy= field_descr+flag_fields+i; |
| 1203 | for (j= i, copy= init_copy; i < local_key_arg_fields; i++, copy++) |
| 1204 | { |
| 1205 | if (fld->eq(copy->field)) |
| 1206 | { |
| 1207 | if (j != i) |
| 1208 | { |
| 1209 | CACHE_FIELD key_part_copy= *copy; |
| 1210 | *copy= *init_copy; |
| 1211 | *init_copy= key_part_copy; |
| 1212 | } |
| 1213 | break; |
| 1214 | } |
| 1215 | } |
| 1216 | } |
| 1217 | |
| 1218 | return TRUE; |
| 1219 | } |
| 1220 | |
| 1221 | |
| 1222 | /* |
| 1223 | Write record fields and their required offsets into the join cache buffer |
| 1224 | |
| 1225 | SYNOPSIS |
| 1226 | write_record_data() |
| 1227 | link a reference to the associated info in the previous cache |
| 1228 | is_full OUT true if it has been decided that no more records will be |
| 1229 | added to the join buffer |
| 1230 | |
| 1231 | DESCRIPTION |
| 1232 | This function put into the cache buffer the following info that it reads |
| 1233 | from the join record buffers or computes somehow: |
| 1234 | (1) the length of all fields written for the record (optional) |
| 1235 | (2) an offset to the associated info in the previous cache (if there is any) |
| 1236 | determined by the link parameter |
| 1237 | (3) all flag fields of the tables whose data field are put into the cache: |
| 1238 | - match flag (optional), |
| 1239 | - null bitmaps for all tables, |
| 1240 | - null row flags for all tables |
| 1241 | (4) values of all data fields including |
| 1242 | - full images of those fixed legth data fields that cannot have |
| 1243 | trailing spaces |
| 1244 | - significant part of fixed length fields that can have trailing spaces |
| 1245 | with the prepanded length |
| 1246 | - data of non-blob variable length fields with the prepanded data length |
| 1247 | - blob data from blob fields with the prepanded data length |
| 1248 | (5) record offset values for the data fields that are referred to from |
| 1249 | other caches |
| 1250 | |
| 1251 | The record is written at the current position stored in the field 'pos'. |
| 1252 | At the end of the function 'pos' points at the position right after the |
| 1253 | written record data. |
| 1254 | The function increments the number of records in the cache that is stored |
| 1255 | in the 'records' field by 1. The function also modifies the values of |
| 1256 | 'curr_rec_pos' and 'last_rec_pos' to point to the written record. |
| 1257 | The 'end_pos' cursor is modified accordingly. |
| 1258 | The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data |
| 1259 | remains in the record buffers and not copied to the join buffer. It may |
| 1260 | happen only to the blob data from the last record added into the cache. |
| 1261 | If on_precond is attached to join_tab and it is not evaluated to TRUE |
| 1262 | then MATCH_IMPOSSIBLE is placed in the match flag field of the record |
| 1263 | written into the join buffer. |
| 1264 | |
| 1265 | RETURN VALUE |
| 1266 | length of the written record data |
| 1267 | */ |
| 1268 | |
| 1269 | uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full) |
| 1270 | { |
| 1271 | uint len; |
| 1272 | bool last_record; |
| 1273 | CACHE_FIELD *copy; |
| 1274 | CACHE_FIELD *copy_end; |
| 1275 | uchar *flags_pos; |
| 1276 | uchar *cp= pos; |
| 1277 | uchar *init_pos= cp; |
| 1278 | uchar *rec_len_ptr= 0; |
| 1279 | uint = extra_key_length(); |
| 1280 | |
| 1281 | records++; /* Increment the counter of records in the cache */ |
| 1282 | |
| 1283 | len= pack_length + key_extra; |
| 1284 | |
| 1285 | /* Make an adjustment for the size of the auxiliary buffer if there is any */ |
| 1286 | uint incr= aux_buffer_incr(records); |
| 1287 | size_t rem= rem_space(); |
| 1288 | aux_buff_size+= len+incr < rem ? incr : rem; |
| 1289 | |
| 1290 | /* |
| 1291 | For each blob to be put into cache save its length and a pointer |
| 1292 | to the value in the corresponding element of the blob_ptr array. |
| 1293 | Blobs with null values are skipped. |
| 1294 | Increment 'len' by the total length of all these blobs. |
| 1295 | */ |
| 1296 | if (blobs) |
| 1297 | { |
| 1298 | CACHE_FIELD **copy_ptr= blob_ptr; |
| 1299 | CACHE_FIELD **copy_ptr_end= copy_ptr+blobs; |
| 1300 | for ( ; copy_ptr < copy_ptr_end; copy_ptr++) |
| 1301 | { |
| 1302 | Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field; |
| 1303 | if (!blob_field->is_null()) |
| 1304 | { |
| 1305 | uint blob_len= blob_field->get_length(); |
| 1306 | (*copy_ptr)->blob_length= blob_len; |
| 1307 | len+= blob_len; |
| 1308 | (*copy_ptr)->str= blob_field->get_ptr(); |
| 1309 | } |
| 1310 | } |
| 1311 | } |
| 1312 | |
| 1313 | /* |
| 1314 | Check whether we won't be able to add any new record into the cache after |
| 1315 | this one because the cache will be full. Set last_record to TRUE if it's so. |
| 1316 | The assume that the cache will be full after the record has been written |
| 1317 | into it if either the remaining space of the cache is not big enough for the |
| 1318 | record's blob values or if there is a chance that not all non-blob fields |
| 1319 | of the next record can be placed there. |
| 1320 | This function is called only in the case when there is enough space left in |
| 1321 | the cache to store at least non-blob parts of the current record. |
| 1322 | */ |
| 1323 | last_record= (len+pack_length_with_blob_ptrs+key_extra) > rem_space(); |
| 1324 | |
| 1325 | /* |
| 1326 | Save the position for the length of the record in the cache if it's needed. |
| 1327 | The length of the record will be inserted here when all fields of the record |
| 1328 | are put into the cache. |
| 1329 | */ |
| 1330 | if (with_length) |
| 1331 | { |
| 1332 | rec_len_ptr= cp; |
| 1333 | DBUG_ASSERT(cp + size_of_rec_len <= buff + buff_size); |
| 1334 | cp+= size_of_rec_len; |
| 1335 | } |
| 1336 | |
| 1337 | /* |
| 1338 | Put a reference to the fields of the record that are stored in the previous |
| 1339 | cache if there is any. This reference is passed by the 'link' parameter. |
| 1340 | */ |
| 1341 | if (prev_cache) |
| 1342 | { |
| 1343 | DBUG_ASSERT(cp + prev_cache->get_size_of_rec_offset() <= buff + buff_size); |
| 1344 | cp+= prev_cache->get_size_of_rec_offset(); |
| 1345 | prev_cache->store_rec_ref(cp, link); |
| 1346 | } |
| 1347 | |
| 1348 | curr_rec_pos= cp; |
| 1349 | |
| 1350 | /* If the there is a match flag set its value to 0 */ |
| 1351 | copy= field_descr; |
| 1352 | if (with_match_flag) |
| 1353 | *copy[0].str= 0; |
| 1354 | |
| 1355 | /* First put into the cache the values of all flag fields */ |
| 1356 | copy_end= field_descr+flag_fields; |
| 1357 | flags_pos= cp; |
| 1358 | for ( ; copy < copy_end; copy++) |
| 1359 | { |
| 1360 | DBUG_ASSERT(cp + copy->length <= buff + buff_size); |
| 1361 | memcpy(cp, copy->str, copy->length); |
| 1362 | cp+= copy->length; |
| 1363 | } |
| 1364 | |
| 1365 | /* Now put the values of the remaining fields as soon as they are not nulls */ |
| 1366 | copy_end= field_descr+fields; |
| 1367 | for ( ; copy < copy_end; copy++) |
| 1368 | { |
| 1369 | Field *field= copy->field; |
| 1370 | if (field && field->maybe_null() && field->is_null()) |
| 1371 | { |
| 1372 | if (copy->referenced_field_no) |
| 1373 | copy->offset= 0; |
| 1374 | continue; |
| 1375 | } |
| 1376 | /* Save the offset of the field to put it later at the end of the record */ |
| 1377 | if (copy->referenced_field_no) |
| 1378 | copy->offset= (uint)(cp-curr_rec_pos); |
| 1379 | |
| 1380 | switch (copy->type) { |
| 1381 | case CACHE_BLOB: |
| 1382 | { |
| 1383 | Field_blob *blob_field= (Field_blob *) copy->field; |
| 1384 | if (last_record) |
| 1385 | { |
| 1386 | last_rec_blob_data_is_in_rec_buff= 1; |
| 1387 | /* Put down the length of the blob and the pointer to the data */ |
| 1388 | DBUG_ASSERT(cp + copy->length + sizeof(char*) <= buff + buff_size); |
| 1389 | blob_field->get_image(cp, copy->length+sizeof(char*), |
| 1390 | blob_field->charset()); |
| 1391 | cp+= copy->length+sizeof(char*); |
| 1392 | } |
| 1393 | else |
| 1394 | { |
| 1395 | /* First put down the length of the blob and then copy the data */ |
| 1396 | blob_field->get_image(cp, copy->length, |
| 1397 | blob_field->charset()); |
| 1398 | DBUG_ASSERT(cp + copy->length + copy->blob_length <= buff + buff_size); |
| 1399 | memcpy(cp+copy->length, copy->str, copy->blob_length); |
| 1400 | cp+= copy->length+copy->blob_length; |
| 1401 | } |
| 1402 | break; |
| 1403 | } |
| 1404 | case CACHE_VARSTR1: |
| 1405 | /* Copy the significant part of the short varstring field */ |
| 1406 | len= (uint) copy->str[0] + 1; |
| 1407 | DBUG_ASSERT(cp + len <= buff + buff_size); |
| 1408 | memcpy(cp, copy->str, len); |
| 1409 | cp+= len; |
| 1410 | break; |
| 1411 | case CACHE_VARSTR2: |
| 1412 | /* Copy the significant part of the long varstring field */ |
| 1413 | len= uint2korr(copy->str) + 2; |
| 1414 | DBUG_ASSERT(cp + len <= buff + buff_size); |
| 1415 | memcpy(cp, copy->str, len); |
| 1416 | cp+= len; |
| 1417 | break; |
| 1418 | case CACHE_STRIPPED: |
| 1419 | { |
| 1420 | /* |
| 1421 | Put down the field value stripping all trailing spaces off. |
| 1422 | After this insert the length of the written sequence of bytes. |
| 1423 | */ |
| 1424 | uchar *str, *end; |
| 1425 | for (str= copy->str, end= str+copy->length; |
| 1426 | end > str && end[-1] == ' '; |
| 1427 | end--) ; |
| 1428 | len=(uint) (end-str); |
| 1429 | DBUG_ASSERT(cp + len + 2 <= buff + buff_size); |
| 1430 | int2store(cp, len); |
| 1431 | memcpy(cp+2, str, len); |
| 1432 | cp+= len+2; |
| 1433 | break; |
| 1434 | } |
| 1435 | case CACHE_ROWID: |
| 1436 | if (!copy->length) |
| 1437 | { |
| 1438 | /* |
| 1439 | This may happen only for ROWID fields of materialized |
| 1440 | derived tables and views. |
| 1441 | */ |
| 1442 | TABLE *table= (TABLE *) copy->str; |
| 1443 | copy->str= table->file->ref; |
| 1444 | copy->length= table->file->ref_length; |
| 1445 | if (!copy->str) |
| 1446 | { |
| 1447 | /* |
| 1448 | If table is an empty inner table of an outer join and it is |
| 1449 | a materialized derived table then table->file->ref == NULL. |
| 1450 | */ |
| 1451 | cp+= copy->length; |
| 1452 | break; |
| 1453 | } |
| 1454 | } |
| 1455 | /* fall through */ |
| 1456 | default: |
| 1457 | /* Copy the entire image of the field from the record buffer */ |
| 1458 | DBUG_ASSERT(cp + copy->length <= buff + buff_size); |
| 1459 | if (copy->str) |
| 1460 | memcpy(cp, copy->str, copy->length); |
| 1461 | cp+= copy->length; |
| 1462 | } |
| 1463 | } |
| 1464 | |
| 1465 | /* Add the offsets of the fields that are referenced from other caches */ |
| 1466 | if (referenced_fields) |
| 1467 | { |
| 1468 | uint cnt= 0; |
| 1469 | for (copy= field_descr+flag_fields; copy < copy_end ; copy++) |
| 1470 | { |
| 1471 | if (copy->referenced_field_no) |
| 1472 | { |
| 1473 | store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1), |
| 1474 | copy->offset); |
| 1475 | cnt++; |
| 1476 | } |
| 1477 | } |
| 1478 | DBUG_ASSERT(cp + size_of_fld_ofs*cnt <= buff + buff_size); |
| 1479 | cp+= size_of_fld_ofs*cnt; |
| 1480 | } |
| 1481 | |
| 1482 | if (rec_len_ptr) |
| 1483 | store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len)); |
| 1484 | last_rec_pos= curr_rec_pos; |
| 1485 | end_pos= pos= cp; |
| 1486 | *is_full= last_record; |
| 1487 | |
| 1488 | last_written_is_null_compl= 0; |
| 1489 | if (!join_tab->first_unmatched && join_tab->on_precond) |
| 1490 | { |
| 1491 | join_tab->found= 0; |
| 1492 | join_tab->not_null_compl= 1; |
| 1493 | if (!join_tab->on_precond->val_int()) |
| 1494 | { |
| 1495 | flags_pos[0]= MATCH_IMPOSSIBLE; |
| 1496 | last_written_is_null_compl= 1; |
| 1497 | } |
| 1498 | } |
| 1499 | |
| 1500 | return (uint) (cp-init_pos); |
| 1501 | } |
| 1502 | |
| 1503 | |
| 1504 | /* |
| 1505 | Reset the join buffer for reading/writing: default implementation |
| 1506 | |
| 1507 | SYNOPSIS |
| 1508 | reset() |
| 1509 | for_writing if it's TRUE the function reset the buffer for writing |
| 1510 | |
| 1511 | DESCRIPTION |
| 1512 | This default implementation of the virtual function reset() resets |
| 1513 | the join buffer for reading or writing. |
| 1514 | If the buffer is reset for reading only the 'pos' value is reset |
| 1515 | to point to the very beginning of the join buffer. If the buffer is |
| 1516 | reset for writing additionally: |
| 1517 | - the counter of the records in the buffer is set to 0, |
| 1518 | - the the value of 'last_rec_pos' gets pointing at the position just |
| 1519 | before the buffer, |
| 1520 | - 'end_pos' is set to point to the beginning of the join buffer, |
| 1521 | - the size of the auxiliary buffer is reset to 0, |
| 1522 | - the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0. |
| 1523 | |
| 1524 | RETURN VALUE |
| 1525 | none |
| 1526 | */ |
| 1527 | void JOIN_CACHE::reset(bool for_writing) |
| 1528 | { |
| 1529 | pos= buff; |
| 1530 | curr_rec_link= 0; |
| 1531 | if (for_writing) |
| 1532 | { |
| 1533 | records= 0; |
| 1534 | last_rec_pos= buff; |
| 1535 | aux_buff_size= 0; |
| 1536 | end_pos= pos; |
| 1537 | last_rec_blob_data_is_in_rec_buff= 0; |
| 1538 | } |
| 1539 | } |
| 1540 | |
| 1541 | |
| 1542 | /* |
| 1543 | Add a record into the join buffer: the default implementation |
| 1544 | |
| 1545 | SYNOPSIS |
| 1546 | put_record() |
| 1547 | |
| 1548 | DESCRIPTION |
| 1549 | This default implementation of the virtual function put_record writes |
| 1550 | the next matching record into the join buffer. |
| 1551 | It also links the record having been written into the join buffer with |
| 1552 | the matched record in the previous cache if there is any. |
| 1553 | The implementation assumes that the function get_curr_link() |
| 1554 | will return exactly the pointer to this matched record. |
| 1555 | |
| 1556 | RETURN VALUE |
| 1557 | TRUE if it has been decided that it should be the last record |
| 1558 | in the join buffer, |
| 1559 | FALSE otherwise |
| 1560 | */ |
| 1561 | |
| 1562 | bool JOIN_CACHE::put_record() |
| 1563 | { |
| 1564 | bool is_full; |
| 1565 | uchar *link= 0; |
| 1566 | if (prev_cache) |
| 1567 | link= prev_cache->get_curr_rec_link(); |
| 1568 | write_record_data(link, &is_full); |
| 1569 | return is_full; |
| 1570 | } |
| 1571 | |
| 1572 | |
| 1573 | /* |
| 1574 | Read the next record from the join buffer: the default implementation |
| 1575 | |
| 1576 | SYNOPSIS |
| 1577 | get_record() |
| 1578 | |
| 1579 | DESCRIPTION |
| 1580 | This default implementation of the virtual function get_record |
| 1581 | reads fields of the next record from the join buffer of this cache. |
| 1582 | The function also reads all other fields associated with this record |
| 1583 | from the the join buffers of the previous caches. The fields are read |
| 1584 | into the corresponding record buffers. |
| 1585 | It is supposed that 'pos' points to the position in the buffer |
| 1586 | right after the previous record when the function is called. |
| 1587 | When the function returns the 'pos' values is updated to point |
| 1588 | to the position after the read record. |
| 1589 | The value of 'curr_rec_pos' is also updated by the function to |
| 1590 | point to the beginning of the first field of the record in the |
| 1591 | join buffer. |
| 1592 | |
| 1593 | RETURN VALUE |
| 1594 | TRUE there are no more records to read from the join buffer |
| 1595 | FALSE otherwise |
| 1596 | */ |
| 1597 | |
| 1598 | bool JOIN_CACHE::get_record() |
| 1599 | { |
| 1600 | bool res; |
| 1601 | uchar *prev_rec_ptr= 0; |
| 1602 | if (with_length) |
| 1603 | pos+= size_of_rec_len; |
| 1604 | if (prev_cache) |
| 1605 | { |
| 1606 | pos+= prev_cache->get_size_of_rec_offset(); |
| 1607 | prev_rec_ptr= prev_cache->get_rec_ref(pos); |
| 1608 | } |
| 1609 | curr_rec_pos= pos; |
| 1610 | if (!(res= read_all_record_fields() == NO_MORE_RECORDS_IN_BUFFER)) |
| 1611 | { |
| 1612 | pos+= referenced_fields*size_of_fld_ofs; |
| 1613 | if (prev_cache) |
| 1614 | prev_cache->get_record_by_pos(prev_rec_ptr); |
| 1615 | } |
| 1616 | return res; |
| 1617 | } |
| 1618 | |
| 1619 | |
| 1620 | /* |
| 1621 | Read a positioned record from the join buffer: the default implementation |
| 1622 | |
| 1623 | SYNOPSIS |
| 1624 | get_record_by_pos() |
| 1625 | rec_ptr position of the first field of the record in the join buffer |
| 1626 | |
| 1627 | DESCRIPTION |
| 1628 | This default implementation of the virtual function get_record_pos |
| 1629 | reads the fields of the record positioned at 'rec_ptr' from the join buffer. |
| 1630 | The function also reads all other fields associated with this record |
| 1631 | from the the join buffers of the previous caches. The fields are read |
| 1632 | into the corresponding record buffers. |
| 1633 | |
| 1634 | RETURN VALUE |
| 1635 | none |
| 1636 | */ |
| 1637 | |
| 1638 | void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr) |
| 1639 | { |
| 1640 | uchar *save_pos= pos; |
| 1641 | pos= rec_ptr; |
| 1642 | read_all_record_fields(); |
| 1643 | pos= save_pos; |
| 1644 | if (prev_cache) |
| 1645 | { |
| 1646 | uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr); |
| 1647 | prev_cache->get_record_by_pos(prev_rec_ptr); |
| 1648 | } |
| 1649 | } |
| 1650 | |
| 1651 | |
| 1652 | /* |
| 1653 | Get the match flag from the referenced record: the default implementation |
| 1654 | |
| 1655 | SYNOPSIS |
| 1656 | get_match_flag_by_pos() |
| 1657 | rec_ptr position of the first field of the record in the join buffer |
| 1658 | |
| 1659 | DESCRIPTION |
| 1660 | This default implementation of the virtual function get_match_flag_by_pos |
| 1661 | get the match flag for the record pointed by the reference at the position |
| 1662 | rec_ptr. If the match flag is placed in one of the previous buffers the |
| 1663 | function first reaches the linked record fields in this buffer. |
| 1664 | |
| 1665 | RETURN VALUE |
| 1666 | match flag for the record at the position rec_ptr |
| 1667 | */ |
| 1668 | |
| 1669 | enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr) |
| 1670 | { |
| 1671 | Match_flag match_fl= MATCH_NOT_FOUND; |
| 1672 | if (with_match_flag) |
| 1673 | { |
| 1674 | match_fl= (enum Match_flag) rec_ptr[0]; |
| 1675 | return match_fl; |
| 1676 | } |
| 1677 | if (prev_cache) |
| 1678 | { |
| 1679 | uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr); |
| 1680 | return prev_cache->get_match_flag_by_pos(prev_rec_ptr); |
| 1681 | } |
| 1682 | DBUG_ASSERT(0); |
| 1683 | return match_fl; |
| 1684 | } |
| 1685 | |
| 1686 | |
| 1687 | /* |
| 1688 | Calculate the increment of the auxiliary buffer for a record write |
| 1689 | |
| 1690 | SYNOPSIS |
| 1691 | aux_buffer_incr() |
| 1692 | recno the number of the record the increment to be calculated for |
| 1693 | |
| 1694 | DESCRIPTION |
| 1695 | This function calls the aux_buffer_incr the method of the |
| 1696 | companion member join_tab_scan to calculate the growth of the |
| 1697 | auxiliary buffer when the recno-th record is added to the |
| 1698 | join_buffer of this cache. |
| 1699 | |
| 1700 | RETURN VALUE |
| 1701 | the number of bytes in the increment |
| 1702 | */ |
| 1703 | |
| 1704 | uint JOIN_CACHE::aux_buffer_incr(size_t recno) |
| 1705 | { |
| 1706 | return join_tab_scan->aux_buffer_incr(recno); |
| 1707 | } |
| 1708 | |
| 1709 | /* |
| 1710 | Read all flag and data fields of a record from the join buffer |
| 1711 | |
| 1712 | SYNOPSIS |
| 1713 | read_all_record_fields() |
| 1714 | |
| 1715 | DESCRIPTION |
| 1716 | The function reads all flag and data fields of a record from the join |
| 1717 | buffer into the corresponding record buffers. |
| 1718 | The fields are read starting from the position 'pos' which is |
| 1719 | supposed to point to the beginning of the first record field. |
| 1720 | The function increments the value of 'pos' by the length of the |
| 1721 | read data. |
| 1722 | |
| 1723 | RETURN VALUE |
| 1724 | (-1) if there is no more records in the join buffer |
| 1725 | length of the data read from the join buffer - otherwise |
| 1726 | */ |
| 1727 | |
| 1728 | uint JOIN_CACHE::read_all_record_fields() |
| 1729 | { |
| 1730 | uchar *init_pos= pos; |
| 1731 | |
| 1732 | if (pos > last_rec_pos || !records) |
| 1733 | return NO_MORE_RECORDS_IN_BUFFER; |
| 1734 | |
| 1735 | /* First match flag, read null bitmaps and null_row flag for each table */ |
| 1736 | read_flag_fields(); |
| 1737 | |
| 1738 | /* Now read the remaining table fields if needed */ |
| 1739 | CACHE_FIELD *copy= field_descr+flag_fields; |
| 1740 | CACHE_FIELD *copy_end= field_descr+fields; |
| 1741 | bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos); |
| 1742 | for ( ; copy < copy_end; copy++) |
| 1743 | read_record_field(copy, blob_in_rec_buff); |
| 1744 | |
| 1745 | return (uint) (pos-init_pos); |
| 1746 | } |
| 1747 | |
| 1748 | |
| 1749 | /* |
| 1750 | Read all flag fields of a record from the join buffer |
| 1751 | |
| 1752 | SYNOPSIS |
| 1753 | read_flag_fields() |
| 1754 | |
| 1755 | DESCRIPTION |
| 1756 | The function reads all flag fields of a record from the join |
| 1757 | buffer into the corresponding record buffers. |
| 1758 | The fields are read starting from the position 'pos'. |
| 1759 | The function increments the value of 'pos' by the length of the |
| 1760 | read data. |
| 1761 | |
| 1762 | RETURN VALUE |
| 1763 | length of the data read from the join buffer |
| 1764 | */ |
| 1765 | |
| 1766 | uint JOIN_CACHE::read_flag_fields() |
| 1767 | { |
| 1768 | uchar *init_pos= pos; |
| 1769 | CACHE_FIELD *copy= field_descr; |
| 1770 | CACHE_FIELD *copy_end= copy+flag_fields; |
| 1771 | if (with_match_flag) |
| 1772 | { |
| 1773 | copy->str[0]= MY_TEST((Match_flag) pos[0] == MATCH_FOUND); |
| 1774 | pos+= copy->length; |
| 1775 | copy++; |
| 1776 | } |
| 1777 | for ( ; copy < copy_end; copy++) |
| 1778 | { |
| 1779 | memcpy(copy->str, pos, copy->length); |
| 1780 | pos+= copy->length; |
| 1781 | } |
| 1782 | return (uint)(pos-init_pos); |
| 1783 | } |
| 1784 | |
| 1785 | |
| 1786 | /* |
| 1787 | Read a data record field from the join buffer |
| 1788 | |
| 1789 | SYNOPSIS |
| 1790 | read_record_field() |
| 1791 | copy the descriptor of the data field to be read |
| 1792 | blob_in_rec_buff indicates whether this is the field from the record |
| 1793 | whose blob data are in record buffers |
| 1794 | |
| 1795 | DESCRIPTION |
| 1796 | The function reads the data field specified by the parameter copy |
| 1797 | from the join buffer into the corresponding record buffer. |
| 1798 | The field is read starting from the position 'pos'. |
| 1799 | The data of blob values is not copied from the join buffer. |
| 1800 | The function increments the value of 'pos' by the length of the |
| 1801 | read data. |
| 1802 | |
| 1803 | RETURN VALUE |
| 1804 | length of the data read from the join buffer |
| 1805 | */ |
| 1806 | |
| 1807 | uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff) |
| 1808 | { |
| 1809 | uint len; |
| 1810 | /* Do not copy the field if its value is null */ |
| 1811 | if (copy->field && copy->field->maybe_null() && copy->field->is_null()) |
| 1812 | return 0; |
| 1813 | switch (copy->type) { |
| 1814 | case CACHE_BLOB: |
| 1815 | { |
| 1816 | Field_blob *blob_field= (Field_blob *) copy->field; |
| 1817 | /* |
| 1818 | Copy the length and the pointer to data but not the blob data |
| 1819 | itself to the record buffer |
| 1820 | */ |
| 1821 | if (blob_in_rec_buff) |
| 1822 | { |
| 1823 | blob_field->set_image(pos, copy->length + sizeof(char*), |
| 1824 | blob_field->charset()); |
| 1825 | len= copy->length + sizeof(char*); |
| 1826 | } |
| 1827 | else |
| 1828 | { |
| 1829 | blob_field->set_ptr(pos, pos+copy->length); |
| 1830 | len= copy->length + blob_field->get_length(); |
| 1831 | } |
| 1832 | } |
| 1833 | break; |
| 1834 | case CACHE_VARSTR1: |
| 1835 | /* Copy the significant part of the short varstring field */ |
| 1836 | len= (uint) pos[0] + 1; |
| 1837 | memcpy(copy->str, pos, len); |
| 1838 | break; |
| 1839 | case CACHE_VARSTR2: |
| 1840 | /* Copy the significant part of the long varstring field */ |
| 1841 | len= uint2korr(pos) + 2; |
| 1842 | memcpy(copy->str, pos, len); |
| 1843 | break; |
| 1844 | case CACHE_STRIPPED: |
| 1845 | /* Pad the value by spaces that has been stripped off */ |
| 1846 | len= uint2korr(pos); |
| 1847 | memcpy(copy->str, pos+2, len); |
| 1848 | memset(copy->str+len, ' ', copy->length-len); |
| 1849 | len+= 2; |
| 1850 | break; |
| 1851 | case CACHE_ROWID: |
| 1852 | if (!copy->str) |
| 1853 | { |
| 1854 | len= copy->length; |
| 1855 | break; |
| 1856 | } |
| 1857 | /* fall through */ |
| 1858 | default: |
| 1859 | /* Copy the entire image of the field from the record buffer */ |
| 1860 | len= copy->length; |
| 1861 | memcpy(copy->str, pos, len); |
| 1862 | } |
| 1863 | pos+= len; |
| 1864 | return len; |
| 1865 | } |
| 1866 | |
| 1867 | |
| 1868 | /* |
| 1869 | Read a referenced field from the join buffer |
| 1870 | |
| 1871 | SYNOPSIS |
| 1872 | read_referenced_field() |
| 1873 | copy pointer to the descriptor of the referenced field |
| 1874 | rec_ptr pointer to the record that may contain this field |
| 1875 | len IN/OUT total length of the record fields |
| 1876 | |
| 1877 | DESCRIPTION |
| 1878 | The function checks whether copy points to a data field descriptor |
| 1879 | for this cache object. If it does not then the function returns |
| 1880 | FALSE. Otherwise the function reads the field of the record in |
| 1881 | the join buffer pointed by 'rec_ptr' into the corresponding record |
| 1882 | buffer and returns TRUE. |
| 1883 | If the value of *len is 0 then the function sets it to the total |
| 1884 | length of the record fields including possible trailing offset |
| 1885 | values. Otherwise *len is supposed to provide this value that |
| 1886 | has been obtained earlier. |
| 1887 | |
| 1888 | NOTE |
| 1889 | If the value of the referenced field is null then the offset |
| 1890 | for the value is set to 0. If the value of a field can be null |
| 1891 | then the value of flag_fields is always positive. So the offset |
| 1892 | for any non-null value cannot be 0 in this case. |
| 1893 | |
| 1894 | RETURN VALUE |
| 1895 | TRUE 'copy' points to a data descriptor of this join cache |
| 1896 | FALSE otherwise |
| 1897 | */ |
| 1898 | |
| 1899 | bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy, |
| 1900 | uchar *rec_ptr, |
| 1901 | uint *len) |
| 1902 | { |
| 1903 | uchar *ptr; |
| 1904 | uint offset; |
| 1905 | if (copy < field_descr || copy >= field_descr+fields) |
| 1906 | return FALSE; |
| 1907 | if (!*len) |
| 1908 | { |
| 1909 | /* Get the total length of the record fields */ |
| 1910 | uchar *len_ptr= rec_ptr; |
| 1911 | if (prev_cache) |
| 1912 | len_ptr-= prev_cache->get_size_of_rec_offset(); |
| 1913 | *len= get_rec_length(len_ptr-size_of_rec_len); |
| 1914 | } |
| 1915 | |
| 1916 | ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0); |
| 1917 | offset= get_fld_offset(ptr+ *len - |
| 1918 | size_of_fld_ofs* |
| 1919 | (referenced_fields+1-copy->referenced_field_no)); |
| 1920 | bool is_null= FALSE; |
| 1921 | Field *field= copy->field; |
| 1922 | if (offset == 0 && flag_fields) |
| 1923 | is_null= TRUE; |
| 1924 | if (is_null) |
| 1925 | { |
| 1926 | field->set_null(); |
| 1927 | if (!field->real_maybe_null()) |
| 1928 | field->table->null_row= 1; |
| 1929 | } |
| 1930 | else |
| 1931 | { |
| 1932 | uchar *save_pos= pos; |
| 1933 | field->set_notnull(); |
| 1934 | if (!field->real_maybe_null()) |
| 1935 | field->table->null_row= 0; |
| 1936 | pos= rec_ptr+offset; |
| 1937 | read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr)); |
| 1938 | pos= save_pos; |
| 1939 | } |
| 1940 | return TRUE; |
| 1941 | } |
| 1942 | |
| 1943 | |
| 1944 | /* |
| 1945 | Skip record from join buffer if's already matched: default implementation |
| 1946 | |
| 1947 | SYNOPSIS |
| 1948 | skip_if_matched() |
| 1949 | |
| 1950 | DESCRIPTION |
| 1951 | This default implementation of the virtual function skip_if_matched |
| 1952 | skips the next record from the join buffer if its match flag is set to |
| 1953 | MATCH_FOUND. |
| 1954 | If the record is skipped the value of 'pos' is set to point to the position |
| 1955 | right after the record. |
| 1956 | |
| 1957 | RETURN VALUE |
| 1958 | TRUE the match flag is set to MATCH_FOUND and the record has been skipped |
| 1959 | FALSE otherwise |
| 1960 | */ |
| 1961 | |
| 1962 | bool JOIN_CACHE::skip_if_matched() |
| 1963 | { |
| 1964 | DBUG_ASSERT(with_length); |
| 1965 | uint offset= size_of_rec_len; |
| 1966 | if (prev_cache) |
| 1967 | offset+= prev_cache->get_size_of_rec_offset(); |
| 1968 | /* Check whether the match flag is MATCH_FOUND */ |
| 1969 | if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND) |
| 1970 | { |
| 1971 | pos+= size_of_rec_len + get_rec_length(pos); |
| 1972 | return TRUE; |
| 1973 | } |
| 1974 | return FALSE; |
| 1975 | } |
| 1976 | |
| 1977 | |
| 1978 | /* |
| 1979 | Skip record from join buffer if the match isn't needed: default implementation |
| 1980 | |
| 1981 | SYNOPSIS |
| 1982 | skip_if_not_needed_match() |
| 1983 | |
| 1984 | DESCRIPTION |
| 1985 | This default implementation of the virtual function skip_if_not_needed_match |
| 1986 | skips the next record from the join buffer if its match flag is not |
| 1987 | MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the |
| 1988 | first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE |
| 1989 | and join_tab is the first inner table of an outer join. |
| 1990 | If the record is skipped the value of 'pos' is set to point to the position |
| 1991 | right after the record. |
| 1992 | |
| 1993 | RETURN VALUE |
| 1994 | TRUE the record has to be skipped |
| 1995 | FALSE otherwise |
| 1996 | */ |
| 1997 | |
| 1998 | bool JOIN_CACHE::skip_if_not_needed_match() |
| 1999 | { |
| 2000 | DBUG_ASSERT(with_length); |
| 2001 | enum Match_flag match_fl; |
| 2002 | uint offset= size_of_rec_len; |
| 2003 | if (prev_cache) |
| 2004 | offset+= prev_cache->get_size_of_rec_offset(); |
| 2005 | |
| 2006 | if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND && |
| 2007 | (join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) ) |
| 2008 | { |
| 2009 | pos+= size_of_rec_len + get_rec_length(pos); |
| 2010 | return TRUE; |
| 2011 | } |
| 2012 | return FALSE; |
| 2013 | } |
| 2014 | |
| 2015 | |
| 2016 | /* |
| 2017 | Restore the fields of the last record from the join buffer |
| 2018 | |
| 2019 | SYNOPSIS |
| 2020 | restore_last_record() |
| 2021 | |
| 2022 | DESCRIPTION |
| 2023 | This function restore the values of the fields of the last record put |
| 2024 | into join buffer in record buffers. The values most probably have been |
| 2025 | overwritten by the field values from other records when they were read |
| 2026 | from the join buffer into the record buffer in order to check pushdown |
| 2027 | predicates. |
| 2028 | |
| 2029 | RETURN |
| 2030 | none |
| 2031 | */ |
| 2032 | |
| 2033 | void JOIN_CACHE::restore_last_record() |
| 2034 | { |
| 2035 | if (records) |
| 2036 | get_record_by_pos(last_rec_pos); |
| 2037 | } |
| 2038 | |
| 2039 | |
| 2040 | /* |
| 2041 | Join records from the join buffer with records from the next join table |
| 2042 | |
| 2043 | SYNOPSIS |
| 2044 | join_records() |
| 2045 | skip_last do not find matches for the last record from the buffer |
| 2046 | |
| 2047 | DESCRIPTION |
| 2048 | The functions extends all records from the join buffer by the matched |
| 2049 | records from join_tab. In the case of outer join operation it also |
| 2050 | adds null complementing extensions for the records from the join buffer |
| 2051 | that have no match. |
| 2052 | No extensions are generated for the last record from the buffer if |
| 2053 | skip_last is true. |
| 2054 | |
| 2055 | NOTES |
| 2056 | The function must make sure that if linked join buffers are used then |
| 2057 | a join buffer cannot be refilled again until all extensions in the |
| 2058 | buffers chained to this one are generated. |
| 2059 | Currently an outer join operation with several inner tables always uses |
| 2060 | at least two linked buffers with the match join flags placed in the |
| 2061 | first buffer. Any record composed of rows of the inner tables that |
| 2062 | matches a record in this buffer must refer to the position of the |
| 2063 | corresponding match flag. |
| 2064 | |
| 2065 | IMPLEMENTATION |
| 2066 | When generating extensions for outer tables of an outer join operation |
| 2067 | first we generate all extensions for those records from the join buffer |
| 2068 | that have matches, after which null complementing extension for all |
| 2069 | unmatched records from the join buffer are generated. |
| 2070 | |
| 2071 | RETURN VALUE |
| 2072 | return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS. |
| 2073 | */ |
| 2074 | |
| 2075 | enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last) |
| 2076 | { |
| 2077 | JOIN_TAB *tab; |
| 2078 | enum_nested_loop_state rc= NESTED_LOOP_OK; |
| 2079 | bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join(); |
| 2080 | DBUG_ENTER("JOIN_CACHE::join_records" ); |
| 2081 | |
| 2082 | if (outer_join_first_inner && !join_tab->first_unmatched) |
| 2083 | join_tab->not_null_compl= TRUE; |
| 2084 | |
| 2085 | if (!join_tab->first_unmatched) |
| 2086 | { |
| 2087 | /* Find all records from join_tab that match records from join buffer */ |
| 2088 | rc= join_matching_records(skip_last); |
| 2089 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
| 2090 | goto finish; |
| 2091 | if (outer_join_first_inner) |
| 2092 | { |
| 2093 | if (next_cache && join_tab != join_tab->last_inner) |
| 2094 | { |
| 2095 | /* |
| 2096 | Ensure that all matches for outer records from join buffer are to be |
| 2097 | found. Now we ensure that all full records are found for records from |
| 2098 | join buffer. Generally this is an overkill. |
| 2099 | TODO: Ensure that only matches of the inner table records have to be |
| 2100 | found for the records from join buffer. |
| 2101 | */ |
| 2102 | rc= next_cache->join_records(skip_last); |
| 2103 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
| 2104 | goto finish; |
| 2105 | } |
| 2106 | join_tab->not_null_compl= FALSE; |
| 2107 | /* Prepare for generation of null complementing extensions */ |
| 2108 | for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++) |
| 2109 | tab->first_unmatched= join_tab->first_inner; |
| 2110 | } |
| 2111 | } |
| 2112 | if (join_tab->first_unmatched) |
| 2113 | { |
| 2114 | if (is_key_access()) |
| 2115 | restore_last_record(); |
| 2116 | |
| 2117 | /* |
| 2118 | Generate all null complementing extensions for the records from |
| 2119 | join buffer that don't have any matching rows from the inner tables. |
| 2120 | */ |
| 2121 | reset(FALSE); |
| 2122 | rc= join_null_complements(skip_last); |
| 2123 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
| 2124 | goto finish; |
| 2125 | } |
| 2126 | if(next_cache) |
| 2127 | { |
| 2128 | /* |
| 2129 | When using linked caches we must ensure the records in the next caches |
| 2130 | that refer to the records in the join buffer are fully extended. |
| 2131 | Otherwise we could have references to the records that have been |
| 2132 | already erased from the join buffer and replaced for new records. |
| 2133 | */ |
| 2134 | rc= next_cache->join_records(skip_last); |
| 2135 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
| 2136 | goto finish; |
| 2137 | } |
| 2138 | |
| 2139 | if (skip_last) |
| 2140 | { |
| 2141 | DBUG_ASSERT(!is_key_access()); |
| 2142 | /* |
| 2143 | Restore the last record from the join buffer to generate |
| 2144 | all extentions for it. |
| 2145 | */ |
| 2146 | get_record(); |
| 2147 | } |
| 2148 | |
| 2149 | finish: |
| 2150 | if (outer_join_first_inner) |
| 2151 | { |
| 2152 | /* |
| 2153 | All null complemented rows have been already generated for all |
| 2154 | outer records from join buffer. Restore the state of the |
| 2155 | first_unmatched values to 0 to avoid another null complementing. |
| 2156 | */ |
| 2157 | for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++) |
| 2158 | tab->first_unmatched= 0; |
| 2159 | } |
| 2160 | restore_last_record(); |
| 2161 | reset(TRUE); |
| 2162 | DBUG_PRINT("exit" , ("rc: %d" , rc)); |
| 2163 | DBUG_RETURN(rc); |
| 2164 | } |
| 2165 | |
| 2166 | |
| 2167 | /* |
| 2168 | Find matches from the next table for records from the join buffer |
| 2169 | |
| 2170 | SYNOPSIS |
| 2171 | join_matching_records() |
| 2172 | skip_last do not look for matches for the last partial join record |
| 2173 | |
| 2174 | DESCRIPTION |
| 2175 | The function retrieves rows of the join_tab table and checks whether they |
| 2176 | match partial join records from the join buffer. If a match is found |
| 2177 | the function will call the sub_select function trying to look for matches |
| 2178 | for the remaining join operations. |
| 2179 | This function currently is called only from the function join_records. |
| 2180 | If the value of skip_last is true the function writes the partial join |
| 2181 | record from the record buffer into the join buffer to save its value for |
| 2182 | the future processing in the caller function. |
| 2183 | |
| 2184 | NOTES |
| 2185 | If employed by BNL or BNLH join algorithms the function performs a full |
| 2186 | scan of join_tab for each refill of the join buffer. If BKA or BKAH |
| 2187 | algorithms are used then the function iterates only over those records |
| 2188 | from join_tab that can be accessed by keys built over records in the join |
| 2189 | buffer. To apply a proper method of iteration the function just calls |
| 2190 | virtual iterator methods (open, next, close) of the member join_tab_scan. |
| 2191 | The member can be either of the JOIN_TAB_SCAN or JOIN_TAB_SCAN_MMR type. |
| 2192 | The class JOIN_TAB_SCAN provides the iterator methods for BNL/BNLH join |
| 2193 | algorithms. The class JOIN_TAB_SCAN_MRR provides the iterator methods |
| 2194 | for BKA/BKAH join algorithms. |
| 2195 | When the function looks for records from the join buffer that would |
| 2196 | match a record from join_tab it iterates either over all records in |
| 2197 | the buffer or only over selected records. If BNL join operation is |
| 2198 | performed all records are checked for the match. If BNLH or BKAH |
| 2199 | algorithm is employed to join join_tab then the function looks only |
| 2200 | through the records with the same join key as the record from join_tab. |
| 2201 | With the BKA join algorithm only one record from the join buffer is checked |
| 2202 | for a match for any record from join_tab. To iterate over the candidates |
| 2203 | for a match the virtual function get_next_candidate_for_match is used, |
| 2204 | while the virtual function prepare_look_for_matches is called to prepare |
| 2205 | for such iteration process. |
| 2206 | |
| 2207 | NOTES |
| 2208 | The function produces all matching extensions for the records in the |
| 2209 | join buffer following the path of the employed blocked algorithm. |
| 2210 | When an outer join operation is performed all unmatched records from |
| 2211 | the join buffer must be extended by null values. The function |
| 2212 | 'join_null_complements' serves this purpose. |
| 2213 | |
| 2214 | RETURN VALUE |
| 2215 | return one of enum_nested_loop_state |
| 2216 | */ |
| 2217 | |
| 2218 | enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last) |
| 2219 | { |
| 2220 | int error; |
| 2221 | enum_nested_loop_state rc= NESTED_LOOP_OK; |
| 2222 | join_tab->table->null_row= 0; |
| 2223 | bool check_only_first_match= join_tab->check_only_first_match(); |
| 2224 | bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join(); |
| 2225 | DBUG_ENTER("JOIN_CACHE::join_matching_records" ); |
| 2226 | |
| 2227 | /* Return at once if there are no records in the join buffer */ |
| 2228 | if (!records) |
| 2229 | DBUG_RETURN(NESTED_LOOP_OK); |
| 2230 | |
| 2231 | /* |
| 2232 | When joining we read records from the join buffer back into record buffers. |
| 2233 | If matches for the last partial join record are found through a call to |
| 2234 | the sub_select function then this partial join record must be saved in the |
| 2235 | join buffer in order to be restored just before the sub_select call. |
| 2236 | */ |
| 2237 | if (skip_last) |
| 2238 | put_record(); |
| 2239 | |
| 2240 | if (join_tab->use_quick == 2 && join_tab->select->quick) |
| 2241 | { |
| 2242 | /* A dynamic range access was used last. Clean up after it */ |
| 2243 | delete join_tab->select->quick; |
| 2244 | join_tab->select->quick= 0; |
| 2245 | } |
| 2246 | |
| 2247 | if ((rc= join_tab_execution_startup(join_tab)) < 0) |
| 2248 | goto finish2; |
| 2249 | |
| 2250 | /* Prepare to retrieve all records of the joined table */ |
| 2251 | if (unlikely((error= join_tab_scan->open()))) |
| 2252 | { |
| 2253 | /* |
| 2254 | TODO: if we get here, we will assert in net_send_statement(). Add test |
| 2255 | coverage and fix. |
| 2256 | */ |
| 2257 | goto finish; |
| 2258 | } |
| 2259 | |
| 2260 | while (!(error= join_tab_scan->next())) |
| 2261 | { |
| 2262 | if (unlikely(join->thd->check_killed())) |
| 2263 | { |
| 2264 | /* The user has aborted the execution of the query */ |
| 2265 | join->thd->send_kill_message(); |
| 2266 | rc= NESTED_LOOP_KILLED; |
| 2267 | goto finish; |
| 2268 | } |
| 2269 | |
| 2270 | if (join_tab->keep_current_rowid) |
| 2271 | join_tab->table->file->position(join_tab->table->record[0]); |
| 2272 | |
| 2273 | /* Prepare to read matching candidates from the join buffer */ |
| 2274 | if (prepare_look_for_matches(skip_last)) |
| 2275 | continue; |
| 2276 | join_tab->jbuf_tracker->r_scans++; |
| 2277 | |
| 2278 | uchar *rec_ptr; |
| 2279 | /* Read each possible candidate from the buffer and look for matches */ |
| 2280 | while ((rec_ptr= get_next_candidate_for_match())) |
| 2281 | { |
| 2282 | join_tab->jbuf_tracker->r_rows++; |
| 2283 | /* |
| 2284 | If only the first match is needed, and, it has been already found for |
| 2285 | the next record read from the join buffer, then the record is skipped. |
| 2286 | Also those records that must be null complemented are not considered |
| 2287 | as candidates for matches. |
| 2288 | */ |
| 2289 | if ((!check_only_first_match && !outer_join_first_inner) || |
| 2290 | !skip_next_candidate_for_match(rec_ptr)) |
| 2291 | { |
| 2292 | read_next_candidate_for_match(rec_ptr); |
| 2293 | rc= generate_full_extensions(rec_ptr); |
| 2294 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
| 2295 | goto finish; |
| 2296 | } |
| 2297 | } |
| 2298 | } |
| 2299 | |
| 2300 | finish: |
| 2301 | if (error) |
| 2302 | rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR; |
| 2303 | finish2: |
| 2304 | join_tab_scan->close(); |
| 2305 | DBUG_RETURN(rc); |
| 2306 | } |
| 2307 | |
| 2308 | |
| 2309 | /* |
| 2310 | Set match flag for a record in join buffer if it has not been set yet |
| 2311 | |
| 2312 | SYNOPSIS |
| 2313 | set_match_flag_if_none() |
| 2314 | first_inner the join table to which this flag is attached to |
| 2315 | rec_ptr pointer to the record in the join buffer |
| 2316 | |
| 2317 | DESCRIPTION |
| 2318 | If the records of the table are accumulated in a join buffer the function |
| 2319 | sets the match flag for the record in the buffer that is referred to by |
| 2320 | the record from this cache positioned at 'rec_ptr'. |
| 2321 | The function also sets the match flag 'found' of the table first inner |
| 2322 | if it has not been set before. |
| 2323 | |
| 2324 | NOTES |
| 2325 | The function assumes that the match flag for any record in any cache |
| 2326 | is placed in the first byte occupied by the record fields. |
| 2327 | |
| 2328 | RETURN VALUE |
| 2329 | TRUE the match flag is set by this call for the first time |
| 2330 | FALSE the match flag has been set before this call |
| 2331 | */ |
| 2332 | |
| 2333 | bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner, |
| 2334 | uchar *rec_ptr) |
| 2335 | { |
| 2336 | if (!first_inner->cache) |
| 2337 | { |
| 2338 | /* |
| 2339 | Records of the first inner table to which the flag is attached to |
| 2340 | are not accumulated in a join buffer. |
| 2341 | */ |
| 2342 | if (first_inner->found) |
| 2343 | return FALSE; |
| 2344 | else |
| 2345 | { |
| 2346 | first_inner->found= 1; |
| 2347 | return TRUE; |
| 2348 | } |
| 2349 | } |
| 2350 | JOIN_CACHE *cache= this; |
| 2351 | while (cache->join_tab != first_inner) |
| 2352 | { |
| 2353 | cache= cache->prev_cache; |
| 2354 | DBUG_ASSERT(cache); |
| 2355 | rec_ptr= cache->get_rec_ref(rec_ptr); |
| 2356 | } |
| 2357 | if ((Match_flag) rec_ptr[0] != MATCH_FOUND) |
| 2358 | { |
| 2359 | rec_ptr[0]= MATCH_FOUND; |
| 2360 | first_inner->found= 1; |
| 2361 | return TRUE; |
| 2362 | } |
| 2363 | return FALSE; |
| 2364 | } |
| 2365 | |
| 2366 | |
| 2367 | /* |
| 2368 | Generate all full extensions for a partial join record in the buffer |
| 2369 | |
| 2370 | SYNOPSIS |
| 2371 | generate_full_extensions() |
| 2372 | rec_ptr pointer to the record from join buffer to generate extensions |
| 2373 | |
| 2374 | DESCRIPTION |
| 2375 | The function first checks whether the current record of 'join_tab' matches |
| 2376 | the partial join record from join buffer located at 'rec_ptr'. If it is the |
| 2377 | case the function calls the join_tab->next_select method to generate |
| 2378 | all full extension for this partial join match. |
| 2379 | |
| 2380 | RETURN VALUE |
| 2381 | return one of enum_nested_loop_state. |
| 2382 | */ |
| 2383 | |
| 2384 | enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr) |
| 2385 | { |
| 2386 | enum_nested_loop_state rc= NESTED_LOOP_OK; |
| 2387 | DBUG_ENTER("JOIN_CACHE::generate_full_extensions" ); |
| 2388 | |
| 2389 | /* |
| 2390 | Check whether the extended partial join record meets |
| 2391 | the pushdown conditions. |
| 2392 | */ |
| 2393 | if (check_match(rec_ptr)) |
| 2394 | { |
| 2395 | int res= 0; |
| 2396 | |
| 2397 | if (!join_tab->check_weed_out_table || |
| 2398 | !(res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd))) |
| 2399 | { |
| 2400 | set_curr_rec_link(rec_ptr); |
| 2401 | rc= (join_tab->next_select)(join, join_tab+1, 0); |
| 2402 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
| 2403 | { |
| 2404 | reset(TRUE); |
| 2405 | DBUG_RETURN(rc); |
| 2406 | } |
| 2407 | } |
| 2408 | if (res == -1) |
| 2409 | { |
| 2410 | rc= NESTED_LOOP_ERROR; |
| 2411 | DBUG_RETURN(rc); |
| 2412 | } |
| 2413 | } |
| 2414 | else if (unlikely(join->thd->is_error())) |
| 2415 | rc= NESTED_LOOP_ERROR; |
| 2416 | DBUG_RETURN(rc); |
| 2417 | } |
| 2418 | |
| 2419 | |
| 2420 | /* |
| 2421 | Check matching to a partial join record from the join buffer |
| 2422 | |
| 2423 | SYNOPSIS |
| 2424 | check_match() |
| 2425 | rec_ptr pointer to the record from join buffer to check matching to |
| 2426 | |
| 2427 | DESCRIPTION |
| 2428 | The function checks whether the current record of 'join_tab' matches |
| 2429 | the partial join record from join buffer located at 'rec_ptr'. If this is |
| 2430 | the case and 'join_tab' is the last inner table of a semi-join or an outer |
| 2431 | join the function turns on the match flag for the 'rec_ptr' record unless |
| 2432 | it has been already set. |
| 2433 | |
| 2434 | NOTES |
| 2435 | Setting the match flag on can trigger re-evaluation of pushdown conditions |
| 2436 | for the record when join_tab is the last inner table of an outer join. |
| 2437 | |
| 2438 | RETURN VALUE |
| 2439 | TRUE there is a match |
| 2440 | FALSE there is no match |
| 2441 | In this case the caller must also check thd->is_error() to see |
| 2442 | if there was a fatal error for the query. |
| 2443 | */ |
| 2444 | |
| 2445 | inline bool JOIN_CACHE::check_match(uchar *rec_ptr) |
| 2446 | { |
| 2447 | /* Check whether pushdown conditions are satisfied */ |
| 2448 | DBUG_ENTER("JOIN_CACHE:check_match" ); |
| 2449 | |
| 2450 | if (join_tab->select && join_tab->select->skip_record(join->thd) <= 0) |
| 2451 | DBUG_RETURN(FALSE); |
| 2452 | |
| 2453 | join_tab->jbuf_tracker->r_rows_after_where++; |
| 2454 | |
| 2455 | if (!join_tab->is_last_inner_table()) |
| 2456 | DBUG_RETURN(TRUE); |
| 2457 | |
| 2458 | /* |
| 2459 | This is the last inner table of an outer join, |
| 2460 | and maybe of other embedding outer joins, or |
| 2461 | this is the last inner table of a semi-join. |
| 2462 | */ |
| 2463 | JOIN_TAB *first_inner= join_tab->get_first_inner_table(); |
| 2464 | do |
| 2465 | { |
| 2466 | set_match_flag_if_none(first_inner, rec_ptr); |
| 2467 | if (first_inner->check_only_first_match() && |
| 2468 | !join_tab->first_inner) |
| 2469 | DBUG_RETURN(TRUE); |
| 2470 | /* |
| 2471 | This is the first match for the outer table row. |
| 2472 | The function set_match_flag_if_none has turned the flag |
| 2473 | first_inner->found on. The pushdown predicates for |
| 2474 | inner tables must be re-evaluated with this flag on. |
| 2475 | Note that, if first_inner is the first inner table |
| 2476 | of a semi-join, but is not an inner table of an outer join |
| 2477 | such that 'not exists' optimization can be applied to it, |
| 2478 | the re-evaluation of the pushdown predicates is not needed. |
| 2479 | */ |
| 2480 | for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++) |
| 2481 | { |
| 2482 | if (tab->select && tab->select->skip_record(join->thd) <= 0) |
| 2483 | DBUG_RETURN(FALSE); |
| 2484 | } |
| 2485 | } |
| 2486 | while ((first_inner= first_inner->first_upper) && |
| 2487 | first_inner->last_inner == join_tab); |
| 2488 | DBUG_RETURN(TRUE); |
| 2489 | } |
| 2490 | |
| 2491 | |
| 2492 | /* |
| 2493 | Add null complements for unmatched outer records from join buffer |
| 2494 | |
| 2495 | SYNOPSIS |
| 2496 | join_null_complements() |
| 2497 | skip_last do not add null complements for the last record |
| 2498 | |
| 2499 | DESCRIPTION |
| 2500 | This function is called only for inner tables of outer joins. |
| 2501 | The function retrieves all rows from the join buffer and adds null |
| 2502 | complements for those of them that do not have matches for outer |
| 2503 | table records. |
| 2504 | If the 'join_tab' is the last inner table of the embedding outer |
| 2505 | join and the null complemented record satisfies the outer join |
| 2506 | condition then the the corresponding match flag is turned on |
| 2507 | unless it has been set earlier. This setting may trigger |
| 2508 | re-evaluation of pushdown conditions for the record. |
| 2509 | |
| 2510 | NOTES |
| 2511 | The same implementation of the virtual method join_null_complements |
| 2512 | is used for BNL/BNLH/BKA/BKA join algorthm. |
| 2513 | |
| 2514 | RETURN VALUE |
| 2515 | return one of enum_nested_loop_state. |
| 2516 | */ |
| 2517 | |
| 2518 | enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last) |
| 2519 | { |
| 2520 | ulonglong cnt; |
| 2521 | enum_nested_loop_state rc= NESTED_LOOP_OK; |
| 2522 | bool is_first_inner= join_tab == join_tab->first_unmatched; |
| 2523 | DBUG_ENTER("JOIN_CACHE::join_null_complements" ); |
| 2524 | |
| 2525 | /* Return at once if there are no records in the join buffer */ |
| 2526 | if (!records) |
| 2527 | DBUG_RETURN(NESTED_LOOP_OK); |
| 2528 | |
| 2529 | cnt= records - (is_key_access() ? 0 : MY_TEST(skip_last)); |
| 2530 | |
| 2531 | /* This function may be called only for inner tables of outer joins */ |
| 2532 | DBUG_ASSERT(join_tab->first_inner); |
| 2533 | |
| 2534 | for ( ; cnt; cnt--) |
| 2535 | { |
| 2536 | if (unlikely(join->thd->check_killed())) |
| 2537 | { |
| 2538 | /* The user has aborted the execution of the query */ |
| 2539 | join->thd->send_kill_message(); |
| 2540 | rc= NESTED_LOOP_KILLED; |
| 2541 | goto finish; |
| 2542 | } |
| 2543 | /* Just skip the whole record if a match for it has been already found */ |
| 2544 | if (!is_first_inner || !skip_if_matched()) |
| 2545 | { |
| 2546 | get_record(); |
| 2547 | /* The outer row is complemented by nulls for each inner table */ |
| 2548 | restore_record(join_tab->table, s->default_values); |
| 2549 | mark_as_null_row(join_tab->table); |
| 2550 | rc= generate_full_extensions(get_curr_rec()); |
| 2551 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
| 2552 | goto finish; |
| 2553 | } |
| 2554 | } |
| 2555 | |
| 2556 | finish: |
| 2557 | DBUG_RETURN(rc); |
| 2558 | } |
| 2559 | |
| 2560 | |
| 2561 | /* |
| 2562 | Save data on the join algorithm employed by the join cache |
| 2563 | |
| 2564 | SYNOPSIS |
| 2565 | save_explain_data() |
| 2566 | str string to add the comment on the employed join algorithm to |
| 2567 | |
| 2568 | DESCRIPTION |
| 2569 | This function puts info about the type of the used join buffer (flat or |
| 2570 | incremental) and on the type of the the employed join algorithm (BNL, |
| 2571 | BNLH, BKA or BKAH) to the data structure |
| 2572 | |
| 2573 | RETURN VALUE |
| 2574 | 0 ok |
| 2575 | 1 error |
| 2576 | */ |
| 2577 | |
| 2578 | bool JOIN_CACHE::save_explain_data(EXPLAIN_BKA_TYPE *explain) |
| 2579 | { |
| 2580 | explain->incremental= MY_TEST(prev_cache); |
| 2581 | |
| 2582 | explain->join_buffer_size= get_join_buffer_size(); |
| 2583 | |
| 2584 | switch (get_join_alg()) { |
| 2585 | case BNL_JOIN_ALG: |
| 2586 | explain->join_alg= "BNL" ; |
| 2587 | break; |
| 2588 | case BNLH_JOIN_ALG: |
| 2589 | explain->join_alg= "BNLH" ; |
| 2590 | break; |
| 2591 | case BKA_JOIN_ALG: |
| 2592 | explain->join_alg= "BKA" ; |
| 2593 | break; |
| 2594 | case BKAH_JOIN_ALG: |
| 2595 | explain->join_alg= "BKAH" ; |
| 2596 | break; |
| 2597 | default: |
| 2598 | DBUG_ASSERT(0); |
| 2599 | } |
| 2600 | return 0; |
| 2601 | } |
| 2602 | |
| 2603 | /** |
| 2604 | get thread handle. |
| 2605 | */ |
| 2606 | |
| 2607 | THD *JOIN_CACHE::thd() |
| 2608 | { |
| 2609 | return join->thd; |
| 2610 | } |
| 2611 | |
| 2612 | |
| 2613 | static bool add_mrr_explain_info(String *str, uint mrr_mode, handler *file) |
| 2614 | { |
| 2615 | char mrr_str_buf[128]={0}; |
| 2616 | int len; |
| 2617 | len= file->multi_range_read_explain_info(mrr_mode, mrr_str_buf, |
| 2618 | sizeof(mrr_str_buf)); |
| 2619 | if (len > 0) |
| 2620 | { |
| 2621 | if (str->length()) |
| 2622 | { |
| 2623 | if (str->append(STRING_WITH_LEN("; " ))) |
| 2624 | return 1; |
| 2625 | } |
| 2626 | if (str->append(mrr_str_buf, len)) |
| 2627 | return 1; |
| 2628 | } |
| 2629 | return 0; |
| 2630 | } |
| 2631 | |
| 2632 | |
| 2633 | bool JOIN_CACHE_BKA::save_explain_data(EXPLAIN_BKA_TYPE *explain) |
| 2634 | { |
| 2635 | if (JOIN_CACHE::save_explain_data(explain)) |
| 2636 | return 1; |
| 2637 | return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file); |
| 2638 | } |
| 2639 | |
| 2640 | |
| 2641 | bool JOIN_CACHE_BKAH::save_explain_data(EXPLAIN_BKA_TYPE *explain) |
| 2642 | { |
| 2643 | if (JOIN_CACHE::save_explain_data(explain)) |
| 2644 | return 1; |
| 2645 | return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file); |
| 2646 | } |
| 2647 | |
| 2648 | |
| 2649 | /* |
| 2650 | Initialize a hashed join cache |
| 2651 | |
| 2652 | SYNOPSIS |
| 2653 | init() |
| 2654 | for_explain join buffer is initialized for explain only |
| 2655 | |
| 2656 | DESCRIPTION |
| 2657 | The function initializes the cache structure with a hash table in it. |
| 2658 | The hash table will be used to store key values for the records from |
| 2659 | the join buffer. |
| 2660 | The function allocates memory for the join buffer and for descriptors of |
| 2661 | the record fields stored in the buffer. |
| 2662 | The function also initializes a hash table for record keys within the join |
| 2663 | buffer space. |
| 2664 | |
| 2665 | NOTES VALUE |
| 2666 | The function is supposed to be called by the init methods of the classes |
| 2667 | derived from JOIN_CACHE_HASHED. |
| 2668 | |
| 2669 | RETURN VALUE |
| 2670 | 0 initialization with buffer allocations has been succeeded |
| 2671 | 1 otherwise |
| 2672 | */ |
| 2673 | |
| 2674 | int JOIN_CACHE_HASHED::init(bool for_explain) |
| 2675 | { |
| 2676 | int rc= 0; |
| 2677 | TABLE_REF *ref= &join_tab->ref; |
| 2678 | |
| 2679 | DBUG_ENTER("JOIN_CACHE_HASHED::init" ); |
| 2680 | |
| 2681 | hash_table= 0; |
| 2682 | key_entries= 0; |
| 2683 | |
| 2684 | key_length= ref->key_length; |
| 2685 | |
| 2686 | if ((rc= JOIN_CACHE::init(for_explain)) || for_explain) |
| 2687 | DBUG_RETURN (rc); |
| 2688 | |
| 2689 | if (!(key_buff= (uchar*) join->thd->alloc(key_length))) |
| 2690 | DBUG_RETURN(1); |
| 2691 | |
| 2692 | /* Take into account a reference to the next record in the key chain */ |
| 2693 | pack_length+= get_size_of_rec_offset(); |
| 2694 | pack_length_with_blob_ptrs+= get_size_of_rec_offset(); |
| 2695 | |
| 2696 | ref_key_info= join_tab->get_keyinfo_by_key_no(join_tab->ref.key); |
| 2697 | ref_used_key_parts= join_tab->ref.key_parts; |
| 2698 | |
| 2699 | hash_func= &JOIN_CACHE_HASHED::get_hash_idx_simple; |
| 2700 | hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_simple; |
| 2701 | |
| 2702 | KEY_PART_INFO *key_part= ref_key_info->key_part; |
| 2703 | KEY_PART_INFO *key_part_end= key_part+ref_used_key_parts; |
| 2704 | for ( ; key_part < key_part_end; key_part++) |
| 2705 | { |
| 2706 | if (!key_part->field->eq_cmp_as_binary()) |
| 2707 | { |
| 2708 | hash_func= &JOIN_CACHE_HASHED::get_hash_idx_complex; |
| 2709 | hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_complex; |
| 2710 | break; |
| 2711 | } |
| 2712 | } |
| 2713 | |
| 2714 | init_hash_table(); |
| 2715 | |
| 2716 | rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+ |
| 2717 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); |
| 2718 | |
| 2719 | data_fields_offset= 0; |
| 2720 | if (use_emb_key) |
| 2721 | { |
| 2722 | CACHE_FIELD *copy= field_descr; |
| 2723 | CACHE_FIELD *copy_end= copy+flag_fields; |
| 2724 | for ( ; copy < copy_end; copy++) |
| 2725 | data_fields_offset+= copy->length; |
| 2726 | } |
| 2727 | |
| 2728 | DBUG_RETURN(0); |
| 2729 | } |
| 2730 | |
| 2731 | |
| 2732 | /* |
| 2733 | Initialize the hash table of a hashed join cache |
| 2734 | |
| 2735 | SYNOPSIS |
| 2736 | init_hash_table() |
| 2737 | |
| 2738 | DESCRIPTION |
| 2739 | The function estimates the number of hash table entries in the hash |
| 2740 | table to be used and initializes this hash table within the join buffer |
| 2741 | space. |
| 2742 | |
| 2743 | RETURN VALUE |
| 2744 | Currently the function always returns 0; |
| 2745 | */ |
| 2746 | |
| 2747 | int JOIN_CACHE_HASHED::init_hash_table() |
| 2748 | { |
| 2749 | hash_table= 0; |
| 2750 | key_entries= 0; |
| 2751 | |
| 2752 | /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */ |
| 2753 | uint max_size_of_key_ofs= MY_MAX(2, get_size_of_rec_offset()); |
| 2754 | for (size_of_key_ofs= 2; |
| 2755 | size_of_key_ofs <= max_size_of_key_ofs; |
| 2756 | size_of_key_ofs+= 2) |
| 2757 | { |
| 2758 | key_entry_length= get_size_of_rec_offset() + // key chain header |
| 2759 | size_of_key_ofs + // reference to the next key |
| 2760 | (use_emb_key ? get_size_of_rec_offset() : key_length); |
| 2761 | |
| 2762 | size_t space_per_rec= avg_record_length + |
| 2763 | avg_aux_buffer_incr + |
| 2764 | key_entry_length+size_of_key_ofs; |
| 2765 | size_t n= buff_size / space_per_rec; |
| 2766 | |
| 2767 | /* |
| 2768 | TODO: Make a better estimate for this upper bound of |
| 2769 | the number of records in in the join buffer. |
| 2770 | */ |
| 2771 | size_t max_n= buff_size / (pack_length-length+ |
| 2772 | key_entry_length+size_of_key_ofs); |
| 2773 | |
| 2774 | hash_entries= (uint) (n / 0.7); |
| 2775 | set_if_bigger(hash_entries, 1); |
| 2776 | |
| 2777 | if (offset_size((uint)(max_n*key_entry_length)) <= |
| 2778 | size_of_key_ofs) |
| 2779 | break; |
| 2780 | } |
| 2781 | |
| 2782 | /* Initialize the hash table */ |
| 2783 | hash_table= buff + (buff_size-hash_entries*size_of_key_ofs); |
| 2784 | cleanup_hash_table(); |
| 2785 | curr_key_entry= hash_table; |
| 2786 | |
| 2787 | return 0; |
| 2788 | } |
| 2789 | |
| 2790 | |
| 2791 | /* |
| 2792 | Reallocate the join buffer of a hashed join cache |
| 2793 | |
| 2794 | SYNOPSIS |
| 2795 | realloc_buffer() |
| 2796 | |
| 2797 | DESCRITION |
| 2798 | The function reallocates the join buffer of the hashed join cache. |
| 2799 | After this it initializes a hash table within the buffer space and |
| 2800 | resets the join cache for writing. |
| 2801 | |
| 2802 | NOTES |
| 2803 | The function assumes that buff_size contains the new value for the join |
| 2804 | buffer size. |
| 2805 | |
| 2806 | RETURN VALUE |
| 2807 | 0 if the buffer has been successfully reallocated |
| 2808 | 1 otherwise |
| 2809 | */ |
| 2810 | |
| 2811 | int JOIN_CACHE_HASHED::realloc_buffer() |
| 2812 | { |
| 2813 | int rc; |
| 2814 | free(); |
| 2815 | rc= MY_TEST(!(buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC)))); |
| 2816 | init_hash_table(); |
| 2817 | reset(TRUE); |
| 2818 | return rc; |
| 2819 | } |
| 2820 | |
| 2821 | |
| 2822 | /* |
| 2823 | Get maximum size of the additional space per record used for record keys |
| 2824 | |
| 2825 | SYNOPSYS |
| 2826 | get_max_key_addon_space_per_record() |
| 2827 | |
| 2828 | DESCRIPTION |
| 2829 | The function returns the size of the space occupied by one key entry |
| 2830 | and one hash table entry. |
| 2831 | |
| 2832 | RETURN VALUE |
| 2833 | maximum size of the additional space per record that is used to store |
| 2834 | record keys in the hash table |
| 2835 | */ |
| 2836 | |
| 2837 | uint JOIN_CACHE_HASHED::get_max_key_addon_space_per_record() |
| 2838 | { |
| 2839 | ulong len; |
| 2840 | TABLE_REF *ref= &join_tab->ref; |
| 2841 | /* |
| 2842 | The total number of hash entries in the hash tables is bounded by |
| 2843 | ceiling(N/0.7) where N is the maximum number of records in the buffer. |
| 2844 | That's why the multiplier 2 is used in the formula below. |
| 2845 | */ |
| 2846 | len= (use_emb_key ? get_size_of_rec_offset() : ref->key_length) + |
| 2847 | size_of_rec_ofs + // size of the key chain header |
| 2848 | size_of_rec_ofs + // >= size of the reference to the next key |
| 2849 | 2*size_of_rec_ofs; // >= 2*( size of hash table entry) |
| 2850 | return len; |
| 2851 | } |
| 2852 | |
| 2853 | |
| 2854 | /* |
| 2855 | Reset the buffer of a hashed join cache for reading/writing |
| 2856 | |
| 2857 | SYNOPSIS |
| 2858 | reset() |
| 2859 | for_writing if it's TRUE the function reset the buffer for writing |
| 2860 | |
| 2861 | DESCRIPTION |
| 2862 | This implementation of the virtual function reset() resets the join buffer |
| 2863 | of the JOIN_CACHE_HASHED class for reading or writing. |
| 2864 | Additionally to what the default implementation does this function |
| 2865 | cleans up the hash table allocated within the buffer. |
| 2866 | |
| 2867 | RETURN VALUE |
| 2868 | none |
| 2869 | */ |
| 2870 | |
| 2871 | void JOIN_CACHE_HASHED::reset(bool for_writing) |
| 2872 | { |
| 2873 | this->JOIN_CACHE::reset(for_writing); |
| 2874 | if (for_writing && hash_table) |
| 2875 | cleanup_hash_table(); |
| 2876 | curr_key_entry= hash_table; |
| 2877 | } |
| 2878 | |
| 2879 | |
| 2880 | /* |
| 2881 | Add a record into the buffer of a hashed join cache |
| 2882 | |
| 2883 | SYNOPSIS |
| 2884 | put_record() |
| 2885 | |
| 2886 | DESCRIPTION |
| 2887 | This implementation of the virtual function put_record writes the next |
| 2888 | matching record into the join buffer of the JOIN_CACHE_HASHED class. |
| 2889 | Additionally to what the default implementation does this function |
| 2890 | performs the following. |
| 2891 | It extracts from the record the key value used in lookups for matching |
| 2892 | records and searches for this key in the hash tables from the join cache. |
| 2893 | If it finds the key in the hash table it joins the record to the chain |
| 2894 | of records with this key. If the key is not found in the hash table the |
| 2895 | key is placed into it and a chain containing only the newly added record |
| 2896 | is attached to the key entry. The key value is either placed in the hash |
| 2897 | element added for the key or, if the use_emb_key flag is set, remains in |
| 2898 | the record from the partial join. |
| 2899 | If the match flag field of a record contains MATCH_IMPOSSIBLE the key is |
| 2900 | not created for this record. |
| 2901 | |
| 2902 | RETURN VALUE |
| 2903 | TRUE if it has been decided that it should be the last record |
| 2904 | in the join buffer, |
| 2905 | FALSE otherwise |
| 2906 | */ |
| 2907 | |
| 2908 | bool JOIN_CACHE_HASHED::put_record() |
| 2909 | { |
| 2910 | bool is_full; |
| 2911 | uchar *key; |
| 2912 | uint key_len= key_length; |
| 2913 | uchar *key_ref_ptr; |
| 2914 | uchar *link= 0; |
| 2915 | TABLE_REF *ref= &join_tab->ref; |
| 2916 | uchar *next_ref_ptr= pos; |
| 2917 | |
| 2918 | pos+= get_size_of_rec_offset(); |
| 2919 | /* Write the record into the join buffer */ |
| 2920 | if (prev_cache) |
| 2921 | link= prev_cache->get_curr_rec_link(); |
| 2922 | write_record_data(link, &is_full); |
| 2923 | |
| 2924 | if (last_written_is_null_compl) |
| 2925 | return is_full; |
| 2926 | |
| 2927 | if (use_emb_key) |
| 2928 | key= get_curr_emb_key(); |
| 2929 | else |
| 2930 | { |
| 2931 | /* Build the key over the fields read into the record buffers */ |
| 2932 | cp_buffer_from_ref(join->thd, join_tab->table, ref); |
| 2933 | key= ref->key_buff; |
| 2934 | } |
| 2935 | |
| 2936 | /* Look for the key in the hash table */ |
| 2937 | if (key_search(key, key_len, &key_ref_ptr)) |
| 2938 | { |
| 2939 | uchar *last_next_ref_ptr; |
| 2940 | /* |
| 2941 | The key is found in the hash table. |
| 2942 | Add the record to the circular list of the records attached to this key. |
| 2943 | Below 'rec' is the record to be added into the record chain for the found |
| 2944 | key, 'key_ref' points to a flatten representation of the st_key_entry |
| 2945 | structure that contains the key and the head of the record chain. |
| 2946 | */ |
| 2947 | last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset()); |
| 2948 | /* rec->next_rec= key_entry->last_rec->next_rec */ |
| 2949 | memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset()); |
| 2950 | /* key_entry->last_rec->next_rec= rec */ |
| 2951 | store_next_rec_ref(last_next_ref_ptr, next_ref_ptr); |
| 2952 | /* key_entry->last_rec= rec */ |
| 2953 | store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr); |
| 2954 | } |
| 2955 | else |
| 2956 | { |
| 2957 | /* |
| 2958 | The key is not found in the hash table. |
| 2959 | Put the key into the join buffer linking it with the keys for the |
| 2960 | corresponding hash entry. Create a circular list with one element |
| 2961 | referencing the record and attach the list to the key in the buffer. |
| 2962 | */ |
| 2963 | uchar *cp= last_key_entry; |
| 2964 | cp-= get_size_of_rec_offset()+get_size_of_key_offset(); |
| 2965 | store_next_key_ref(key_ref_ptr, cp); |
| 2966 | store_null_key_ref(cp); |
| 2967 | store_next_rec_ref(next_ref_ptr, next_ref_ptr); |
| 2968 | store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr); |
| 2969 | if (use_emb_key) |
| 2970 | { |
| 2971 | cp-= get_size_of_rec_offset(); |
| 2972 | store_emb_key_ref(cp, key); |
| 2973 | } |
| 2974 | else |
| 2975 | { |
| 2976 | cp-= key_len; |
| 2977 | memcpy(cp, key, key_len); |
| 2978 | } |
| 2979 | last_key_entry= cp; |
| 2980 | DBUG_ASSERT(last_key_entry >= end_pos); |
| 2981 | /* Increment the counter of key_entries in the hash table */ |
| 2982 | key_entries++; |
| 2983 | } |
| 2984 | return is_full; |
| 2985 | } |
| 2986 | |
| 2987 | |
| 2988 | /* |
| 2989 | Read the next record from the buffer of a hashed join cache |
| 2990 | |
| 2991 | SYNOPSIS |
| 2992 | get_record() |
| 2993 | |
| 2994 | DESCRIPTION |
| 2995 | Additionally to what the default implementation of the virtual |
| 2996 | function get_record does this implementation skips the link element |
| 2997 | used to connect the records with the same key into a chain. |
| 2998 | |
| 2999 | RETURN VALUE |
| 3000 | TRUE there are no more records to read from the join buffer |
| 3001 | FALSE otherwise |
| 3002 | */ |
| 3003 | |
| 3004 | bool JOIN_CACHE_HASHED::get_record() |
| 3005 | { |
| 3006 | pos+= get_size_of_rec_offset(); |
| 3007 | return this->JOIN_CACHE::get_record(); |
| 3008 | } |
| 3009 | |
| 3010 | |
| 3011 | /* |
| 3012 | Skip record from a hashed join buffer if its match flag is set to MATCH_FOUND |
| 3013 | |
| 3014 | SYNOPSIS |
| 3015 | skip_if_matched() |
| 3016 | |
| 3017 | DESCRIPTION |
| 3018 | This implementation of the virtual function skip_if_matched does |
| 3019 | the same as the default implementation does, but it takes into account |
| 3020 | the link element used to connect the records with the same key into a chain. |
| 3021 | |
| 3022 | RETURN VALUE |
| 3023 | TRUE the match flag is MATCH_FOUND and the record has been skipped |
| 3024 | FALSE otherwise |
| 3025 | */ |
| 3026 | |
| 3027 | bool JOIN_CACHE_HASHED::skip_if_matched() |
| 3028 | { |
| 3029 | uchar *save_pos= pos; |
| 3030 | pos+= get_size_of_rec_offset(); |
| 3031 | if (!this->JOIN_CACHE::skip_if_matched()) |
| 3032 | { |
| 3033 | pos= save_pos; |
| 3034 | return FALSE; |
| 3035 | } |
| 3036 | return TRUE; |
| 3037 | } |
| 3038 | |
| 3039 | |
| 3040 | /* |
| 3041 | Skip record from a hashed join buffer if its match flag dictates to do so |
| 3042 | |
| 3043 | SYNOPSIS |
| 3044 | skip_if_uneeded_match() |
| 3045 | |
| 3046 | DESCRIPTION |
| 3047 | This implementation of the virtual function skip_if_not_needed_match does |
| 3048 | the same as the default implementation does, but it takes into account |
| 3049 | the link element used to connect the records with the same key into a chain. |
| 3050 | |
| 3051 | RETURN VALUE |
| 3052 | TRUE the match flag dictates to skip the record |
| 3053 | FALSE the match flag is off |
| 3054 | */ |
| 3055 | |
| 3056 | bool JOIN_CACHE_HASHED::skip_if_not_needed_match() |
| 3057 | { |
| 3058 | uchar *save_pos= pos; |
| 3059 | pos+= get_size_of_rec_offset(); |
| 3060 | if (!this->JOIN_CACHE::skip_if_not_needed_match()) |
| 3061 | { |
| 3062 | pos= save_pos; |
| 3063 | return FALSE; |
| 3064 | } |
| 3065 | return TRUE; |
| 3066 | } |
| 3067 | |
| 3068 | |
| 3069 | /* |
| 3070 | Search for a key in the hash table of the join buffer |
| 3071 | |
| 3072 | SYNOPSIS |
| 3073 | key_search() |
| 3074 | key pointer to the key value |
| 3075 | key_len key value length |
| 3076 | key_ref_ptr OUT position of the reference to the next key from |
| 3077 | the hash element for the found key , or |
| 3078 | a position where the reference to the the hash |
| 3079 | element for the key is to be added in the |
| 3080 | case when the key has not been found |
| 3081 | |
| 3082 | DESCRIPTION |
| 3083 | The function looks for a key in the hash table of the join buffer. |
| 3084 | If the key is found the functionreturns the position of the reference |
| 3085 | to the next key from to the hash element for the given key. |
| 3086 | Otherwise the function returns the position where the reference to the |
| 3087 | newly created hash element for the given key is to be added. |
| 3088 | |
| 3089 | RETURN VALUE |
| 3090 | TRUE the key is found in the hash table |
| 3091 | FALSE otherwise |
| 3092 | */ |
| 3093 | |
| 3094 | bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len, |
| 3095 | uchar **key_ref_ptr) |
| 3096 | { |
| 3097 | bool is_found= FALSE; |
| 3098 | uint idx= (this->*hash_func)(key, key_length); |
| 3099 | uchar *ref_ptr= hash_table+size_of_key_ofs*idx; |
| 3100 | while (!is_null_key_ref(ref_ptr)) |
| 3101 | { |
| 3102 | uchar *next_key; |
| 3103 | ref_ptr= get_next_key_ref(ref_ptr); |
| 3104 | next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) : |
| 3105 | ref_ptr-key_length; |
| 3106 | |
| 3107 | if ((this->*hash_cmp_func)(next_key, key, key_len)) |
| 3108 | { |
| 3109 | is_found= TRUE; |
| 3110 | break; |
| 3111 | } |
| 3112 | } |
| 3113 | *key_ref_ptr= ref_ptr; |
| 3114 | return is_found; |
| 3115 | } |
| 3116 | |
| 3117 | |
| 3118 | /* |
| 3119 | Hash function that considers a key in the hash table as byte array |
| 3120 | |
| 3121 | SYNOPSIS |
| 3122 | get_hash_idx_simple() |
| 3123 | key pointer to the key value |
| 3124 | key_len key value length |
| 3125 | |
| 3126 | DESCRIPTION |
| 3127 | The function calculates an index of the hash entry in the hash table |
| 3128 | of the join buffer for the given key. It considers the key just as |
| 3129 | a sequence of bytes of the length key_len. |
| 3130 | |
| 3131 | RETURN VALUE |
| 3132 | the calculated index of the hash entry for the given key |
| 3133 | */ |
| 3134 | |
| 3135 | inline |
| 3136 | uint JOIN_CACHE_HASHED::get_hash_idx_simple(uchar* key, uint key_len) |
| 3137 | { |
| 3138 | ulong nr= 1; |
| 3139 | ulong nr2= 4; |
| 3140 | uchar *pos= key; |
| 3141 | uchar *end= key+key_len; |
| 3142 | for (; pos < end ; pos++) |
| 3143 | { |
| 3144 | nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8); |
| 3145 | nr2+= 3; |
| 3146 | } |
| 3147 | return nr % hash_entries; |
| 3148 | } |
| 3149 | |
| 3150 | |
| 3151 | /* |
| 3152 | Hash function that takes into account collations of the components of the key |
| 3153 | |
| 3154 | SYNOPSIS |
| 3155 | get_hash_idx_complex() |
| 3156 | key pointer to the key value |
| 3157 | key_len key value length |
| 3158 | |
| 3159 | DESCRIPTION |
| 3160 | The function calculates an index of the hash entry in the hash table |
| 3161 | of the join buffer for the given key. It takes into account that the |
| 3162 | components of the key may be of a varchar type with different collations. |
| 3163 | The function guarantees that the same hash value for any two equal |
| 3164 | keys that may differ as byte sequences. |
| 3165 | The function takes the info about the components of the key, their |
| 3166 | types and used collations from the class member ref_key_info containing |
| 3167 | a pointer to the descriptor of the index that can be used for the join |
| 3168 | operation. |
| 3169 | |
| 3170 | RETURN VALUE |
| 3171 | the calculated index of the hash entry for the given key |
| 3172 | */ |
| 3173 | |
| 3174 | inline |
| 3175 | uint JOIN_CACHE_HASHED::get_hash_idx_complex(uchar *key, uint key_len) |
| 3176 | { |
| 3177 | return |
| 3178 | (uint) (key_hashnr(ref_key_info, ref_used_key_parts, key) % hash_entries); |
| 3179 | } |
| 3180 | |
| 3181 | |
| 3182 | /* |
| 3183 | Compare two key entries in the hash table as sequence of bytes |
| 3184 | |
| 3185 | SYNOPSIS |
| 3186 | equal_keys_simple() |
| 3187 | key1 pointer to the first key entry |
| 3188 | key2 pointer to the second key entry |
| 3189 | key_len the length of the key values |
| 3190 | |
| 3191 | DESCRIPTION |
| 3192 | The function compares two key entries in the hash table key1 and key2 |
| 3193 | as two sequences bytes of the length key_len |
| 3194 | |
| 3195 | RETURN VALUE |
| 3196 | TRUE key1 coincides with key2 |
| 3197 | FALSE otherwise |
| 3198 | */ |
| 3199 | |
| 3200 | inline |
| 3201 | bool JOIN_CACHE_HASHED::equal_keys_simple(uchar *key1, uchar *key2, |
| 3202 | uint key_len) |
| 3203 | { |
| 3204 | return memcmp(key1, key2, key_len) == 0; |
| 3205 | } |
| 3206 | |
| 3207 | |
| 3208 | /* |
| 3209 | Compare two key entries taking into account the used collation |
| 3210 | |
| 3211 | SYNOPSIS |
| 3212 | equal_keys_complex() |
| 3213 | key1 pointer to the first key entry |
| 3214 | key2 pointer to the second key entry |
| 3215 | key_len the length of the key values |
| 3216 | |
| 3217 | DESCRIPTION |
| 3218 | The function checks whether two key entries in the hash table |
| 3219 | key1 and key2 are equal as, possibly, compound keys of a certain |
| 3220 | structure whose components may be of a varchar type and may |
| 3221 | employ different collations. |
| 3222 | The descriptor of the key structure is taken from the class |
| 3223 | member ref_key_info. |
| 3224 | |
| 3225 | RETURN VALUE |
| 3226 | TRUE key1 is equal tokey2 |
| 3227 | FALSE otherwise |
| 3228 | */ |
| 3229 | |
| 3230 | inline |
| 3231 | bool JOIN_CACHE_HASHED::equal_keys_complex(uchar *key1, uchar *key2, |
| 3232 | uint key_len) |
| 3233 | { |
| 3234 | return key_buf_cmp(ref_key_info, ref_used_key_parts, key1, key2) == 0; |
| 3235 | } |
| 3236 | |
| 3237 | |
| 3238 | /* |
| 3239 | Clean up the hash table of the join buffer |
| 3240 | |
| 3241 | SYNOPSIS |
| 3242 | cleanup_hash_table() |
| 3243 | key pointer to the key value |
| 3244 | key_len key value length |
| 3245 | |
| 3246 | DESCRIPTION |
| 3247 | The function cleans up the hash table in the join buffer removing all |
| 3248 | hash elements from the table. |
| 3249 | |
| 3250 | RETURN VALUE |
| 3251 | none |
| 3252 | */ |
| 3253 | |
| 3254 | void JOIN_CACHE_HASHED:: cleanup_hash_table() |
| 3255 | { |
| 3256 | last_key_entry= hash_table; |
| 3257 | bzero(hash_table, (buff+buff_size)-hash_table); |
| 3258 | key_entries= 0; |
| 3259 | } |
| 3260 | |
| 3261 | |
| 3262 | /* |
| 3263 | Check whether all records in a key chain have their match flags set on |
| 3264 | |
| 3265 | SYNOPSIS |
| 3266 | check_all_match_flags_for_key() |
| 3267 | key_chain_ptr |
| 3268 | |
| 3269 | DESCRIPTION |
| 3270 | This function retrieves records in the given circular chain and checks |
| 3271 | whether their match flags are set on. The parameter key_chain_ptr shall |
| 3272 | point to the position in the join buffer storing the reference to the |
| 3273 | last element of this chain. |
| 3274 | |
| 3275 | RETURN VALUE |
| 3276 | TRUE if each retrieved record has its match flag set to MATCH_FOUND |
| 3277 | FALSE otherwise |
| 3278 | */ |
| 3279 | |
| 3280 | bool JOIN_CACHE_HASHED::check_all_match_flags_for_key(uchar *key_chain_ptr) |
| 3281 | { |
| 3282 | uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr); |
| 3283 | uchar *next_rec_ref_ptr= last_rec_ref_ptr; |
| 3284 | do |
| 3285 | { |
| 3286 | next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr); |
| 3287 | uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset; |
| 3288 | if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND) |
| 3289 | return FALSE; |
| 3290 | } |
| 3291 | while (next_rec_ref_ptr != last_rec_ref_ptr); |
| 3292 | return TRUE; |
| 3293 | } |
| 3294 | |
| 3295 | |
| 3296 | /* |
| 3297 | Get the next key built for the records from the buffer of a hashed join cache |
| 3298 | |
| 3299 | SYNOPSIS |
| 3300 | get_next_key() |
| 3301 | key pointer to the buffer where the key value is to be placed |
| 3302 | |
| 3303 | DESCRIPTION |
| 3304 | The function reads the next key value stored in the hash table of the |
| 3305 | join buffer. Depending on the value of the use_emb_key flag of the |
| 3306 | join cache the value is read either from the table itself or from |
| 3307 | the record field where it occurs. |
| 3308 | |
| 3309 | RETURN VALUE |
| 3310 | length of the key value - if the starting value of 'cur_key_entry' refers |
| 3311 | to the position after that referred by the the value of 'last_key_entry', |
| 3312 | 0 - otherwise. |
| 3313 | */ |
| 3314 | |
| 3315 | uint JOIN_CACHE_HASHED::get_next_key(uchar ** key) |
| 3316 | { |
| 3317 | if (curr_key_entry == last_key_entry) |
| 3318 | return 0; |
| 3319 | |
| 3320 | curr_key_entry-= key_entry_length; |
| 3321 | |
| 3322 | *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry; |
| 3323 | |
| 3324 | DBUG_ASSERT(*key >= buff && *key < hash_table); |
| 3325 | |
| 3326 | return key_length; |
| 3327 | } |
| 3328 | |
| 3329 | |
| 3330 | /* |
| 3331 | Initiate an iteration process over records in the joined table |
| 3332 | |
| 3333 | SYNOPSIS |
| 3334 | open() |
| 3335 | |
| 3336 | DESCRIPTION |
| 3337 | The function initiates the process of iteration over records from the |
| 3338 | joined table recurrently performed by the BNL/BKLH join algorithm. |
| 3339 | |
| 3340 | RETURN VALUE |
| 3341 | 0 the initiation is a success |
| 3342 | error code otherwise |
| 3343 | */ |
| 3344 | |
| 3345 | int JOIN_TAB_SCAN::open() |
| 3346 | { |
| 3347 | save_or_restore_used_tabs(join_tab, FALSE); |
| 3348 | is_first_record= TRUE; |
| 3349 | join_tab->tracker->r_scans++; |
| 3350 | return join_init_read_record(join_tab); |
| 3351 | } |
| 3352 | |
| 3353 | |
| 3354 | /* |
| 3355 | Read the next record that can match while scanning the joined table |
| 3356 | |
| 3357 | SYNOPSIS |
| 3358 | next() |
| 3359 | |
| 3360 | DESCRIPTION |
| 3361 | The function reads the next record from the joined table that can |
| 3362 | match some records in the buffer of the join cache 'cache'. To do |
| 3363 | this the function calls the function that scans table records and |
| 3364 | looks for the next one that meets the condition pushed to the |
| 3365 | joined table join_tab. |
| 3366 | |
| 3367 | NOTES |
| 3368 | The function catches the signal that kills the query. |
| 3369 | |
| 3370 | RETURN VALUE |
| 3371 | 0 the next record exists and has been successfully read |
| 3372 | error code otherwise |
| 3373 | */ |
| 3374 | |
| 3375 | int JOIN_TAB_SCAN::next() |
| 3376 | { |
| 3377 | int err= 0; |
| 3378 | int skip_rc; |
| 3379 | READ_RECORD *info= &join_tab->read_record; |
| 3380 | SQL_SELECT *select= join_tab->cache_select; |
| 3381 | THD *thd= join->thd; |
| 3382 | |
| 3383 | if (is_first_record) |
| 3384 | is_first_record= FALSE; |
| 3385 | else |
| 3386 | err= info->read_record(); |
| 3387 | |
| 3388 | if (!err) |
| 3389 | { |
| 3390 | join_tab->tracker->r_rows++; |
| 3391 | } |
| 3392 | |
| 3393 | while (!err && select && (skip_rc= select->skip_record(thd)) <= 0) |
| 3394 | { |
| 3395 | if (unlikely(thd->check_killed()) || skip_rc < 0) |
| 3396 | return 1; |
| 3397 | /* |
| 3398 | Move to the next record if the last retrieved record does not |
| 3399 | meet the condition pushed to the table join_tab. |
| 3400 | */ |
| 3401 | err= info->read_record(); |
| 3402 | if (!err) |
| 3403 | { |
| 3404 | join_tab->tracker->r_rows++; |
| 3405 | } |
| 3406 | } |
| 3407 | |
| 3408 | if (!err) |
| 3409 | join_tab->tracker->r_rows_after_where++; |
| 3410 | return err; |
| 3411 | } |
| 3412 | |
| 3413 | |
| 3414 | /* |
| 3415 | Walk back in join order from join_tab until we encounter a join tab with |
| 3416 | tab->cache!=NULL, and save/restore tab->table->status along the way. |
| 3417 | |
| 3418 | @param save TRUE save |
| 3419 | FALSE restore |
| 3420 | */ |
| 3421 | |
| 3422 | static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save) |
| 3423 | { |
| 3424 | JOIN_TAB *first= join_tab->bush_root_tab? |
| 3425 | join_tab->bush_root_tab->bush_children->start : |
| 3426 | join_tab->join->join_tab + join_tab->join->const_tables; |
| 3427 | |
| 3428 | for (JOIN_TAB *tab= join_tab-1; tab != first && !tab->cache; tab--) |
| 3429 | { |
| 3430 | if (tab->bush_children) |
| 3431 | { |
| 3432 | for (JOIN_TAB *child= tab->bush_children->start; |
| 3433 | child != tab->bush_children->end; |
| 3434 | child++) |
| 3435 | { |
| 3436 | if (save) |
| 3437 | child->table->status= child->status; |
| 3438 | else |
| 3439 | { |
| 3440 | tab->status= tab->table->status; |
| 3441 | tab->table->status= 0; |
| 3442 | } |
| 3443 | } |
| 3444 | } |
| 3445 | |
| 3446 | if (save) |
| 3447 | tab->table->status= tab->status; |
| 3448 | else |
| 3449 | { |
| 3450 | tab->status= tab->table->status; |
| 3451 | tab->table->status= 0; |
| 3452 | } |
| 3453 | } |
| 3454 | } |
| 3455 | |
| 3456 | |
| 3457 | /* |
| 3458 | Perform finalizing actions for a scan over the table records |
| 3459 | |
| 3460 | SYNOPSIS |
| 3461 | close() |
| 3462 | |
| 3463 | DESCRIPTION |
| 3464 | The function performs the necessary restoring actions after |
| 3465 | the table scan over the joined table has been finished. |
| 3466 | |
| 3467 | RETURN VALUE |
| 3468 | none |
| 3469 | */ |
| 3470 | |
| 3471 | void JOIN_TAB_SCAN::close() |
| 3472 | { |
| 3473 | save_or_restore_used_tabs(join_tab, TRUE); |
| 3474 | } |
| 3475 | |
| 3476 | |
| 3477 | /* |
| 3478 | Prepare to iterate over the BNL join cache buffer to look for matches |
| 3479 | |
| 3480 | SYNOPSIS |
| 3481 | prepare_look_for_matches() |
| 3482 | skip_last <-> ignore the last record in the buffer |
| 3483 | |
| 3484 | DESCRIPTION |
| 3485 | The function prepares the join cache for an iteration over the |
| 3486 | records in the join buffer. The iteration is performed when looking |
| 3487 | for matches for the record from the joined table join_tab that |
| 3488 | has been placed into the record buffer of the joined table. |
| 3489 | If the value of the parameter skip_last is TRUE then the last |
| 3490 | record from the join buffer is ignored. |
| 3491 | The function initializes the counter of the records that have been |
| 3492 | not iterated over yet. |
| 3493 | |
| 3494 | RETURN VALUE |
| 3495 | TRUE there are no records in the buffer to iterate over |
| 3496 | FALSE otherwise |
| 3497 | */ |
| 3498 | |
| 3499 | bool JOIN_CACHE_BNL::prepare_look_for_matches(bool skip_last) |
| 3500 | { |
| 3501 | if (!records) |
| 3502 | return TRUE; |
| 3503 | reset(FALSE); |
| 3504 | rem_records= (uint)records - MY_TEST(skip_last); |
| 3505 | return rem_records == 0; |
| 3506 | } |
| 3507 | |
| 3508 | |
| 3509 | /* |
| 3510 | Get next record from the BNL join cache buffer when looking for matches |
| 3511 | |
| 3512 | SYNOPSIS |
| 3513 | get_next_candidate_for_match |
| 3514 | |
| 3515 | DESCRIPTION |
| 3516 | This method is used for iterations over the records from the join |
| 3517 | cache buffer when looking for matches for records from join_tab. |
| 3518 | The methods performs the necessary preparations to read the next record |
| 3519 | from the join buffer into the record buffer by the method |
| 3520 | read_next_candidate_for_match, or, to skip the next record from the join |
| 3521 | buffer by the method skip_recurrent_candidate_for_match. |
| 3522 | This implementation of the virtual method get_next_candidate_for_match |
| 3523 | just decrements the counter of the records that are to be iterated over |
| 3524 | and returns the current value of the cursor 'pos' as the position of |
| 3525 | the record to be processed. |
| 3526 | |
| 3527 | RETURN VALUE |
| 3528 | pointer to the position right after the prefix of the current record |
| 3529 | in the join buffer if the there is another record to iterate over, |
| 3530 | 0 - otherwise. |
| 3531 | */ |
| 3532 | |
| 3533 | uchar *JOIN_CACHE_BNL::get_next_candidate_for_match() |
| 3534 | { |
| 3535 | if (!rem_records) |
| 3536 | return 0; |
| 3537 | rem_records--; |
| 3538 | return pos+base_prefix_length; |
| 3539 | } |
| 3540 | |
| 3541 | |
| 3542 | /* |
| 3543 | Check whether the matching record from the BNL cache is to be skipped |
| 3544 | |
| 3545 | SYNOPSIS |
| 3546 | skip_next_candidate_for_match |
| 3547 | rec_ptr pointer to the position in the join buffer right after the prefix |
| 3548 | of the current record |
| 3549 | |
| 3550 | DESCRIPTION |
| 3551 | This implementation of the virtual function just calls the |
| 3552 | method skip_if_not_needed_match to check whether the record referenced by |
| 3553 | ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the |
| 3554 | first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and |
| 3555 | join_tab is the first inner table of an outer join. |
| 3556 | If so, the function just skips this record setting the value of the |
| 3557 | cursor 'pos' to the position right after it. |
| 3558 | |
| 3559 | RETURN VALUE |
| 3560 | TRUE the record referenced by rec_ptr has been skipped |
| 3561 | FALSE otherwise |
| 3562 | */ |
| 3563 | |
| 3564 | bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr) |
| 3565 | { |
| 3566 | pos= rec_ptr-base_prefix_length; |
| 3567 | return skip_if_not_needed_match(); |
| 3568 | } |
| 3569 | |
| 3570 | |
| 3571 | /* |
| 3572 | Read next record from the BNL join cache buffer when looking for matches |
| 3573 | |
| 3574 | SYNOPSIS |
| 3575 | read_next_candidate_for_match |
| 3576 | rec_ptr pointer to the position in the join buffer right after the prefix |
| 3577 | the current record. |
| 3578 | |
| 3579 | DESCRIPTION |
| 3580 | This implementation of the virtual method read_next_candidate_for_match |
| 3581 | calls the method get_record to read the record referenced by rec_ptr from |
| 3582 | the join buffer into the record buffer. If this record refers to the |
| 3583 | fields in the other join buffers the call of get_record ensures that |
| 3584 | these fields are read into the corresponding record buffers as well. |
| 3585 | This function is supposed to be called after a successful call of |
| 3586 | the method get_next_candidate_for_match. |
| 3587 | |
| 3588 | RETURN VALUE |
| 3589 | none |
| 3590 | */ |
| 3591 | |
| 3592 | void JOIN_CACHE_BNL::read_next_candidate_for_match(uchar *rec_ptr) |
| 3593 | { |
| 3594 | pos= rec_ptr-base_prefix_length; |
| 3595 | get_record(); |
| 3596 | } |
| 3597 | |
| 3598 | |
| 3599 | /* |
| 3600 | Initialize the BNL join cache |
| 3601 | |
| 3602 | SYNOPSIS |
| 3603 | init |
| 3604 | for_explain join buffer is initialized for explain only |
| 3605 | |
| 3606 | DESCRIPTION |
| 3607 | The function initializes the cache structure. It is supposed to be called |
| 3608 | right after a constructor for the JOIN_CACHE_BNL. |
| 3609 | |
| 3610 | NOTES |
| 3611 | The function first constructs a companion object of the type JOIN_TAB_SCAN, |
| 3612 | then it calls the init method of the parent class. |
| 3613 | |
| 3614 | RETURN VALUE |
| 3615 | 0 initialization with buffer allocations has been succeeded |
| 3616 | 1 otherwise |
| 3617 | */ |
| 3618 | |
| 3619 | int JOIN_CACHE_BNL::init(bool for_explain) |
| 3620 | { |
| 3621 | DBUG_ENTER("JOIN_CACHE_BNL::init" ); |
| 3622 | |
| 3623 | if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab))) |
| 3624 | DBUG_RETURN(1); |
| 3625 | |
| 3626 | DBUG_RETURN(JOIN_CACHE::init(for_explain)); |
| 3627 | } |
| 3628 | |
| 3629 | |
| 3630 | /* |
| 3631 | Get the chain of records from buffer matching the current candidate for join |
| 3632 | |
| 3633 | SYNOPSIS |
| 3634 | get_matching_chain_by_join_key() |
| 3635 | |
| 3636 | DESCRIPTION |
| 3637 | This function first build a join key for the record of join_tab that |
| 3638 | currently is in the join buffer for this table. Then it looks for |
| 3639 | the key entry with this key in the hash table of the join cache. |
| 3640 | If such a key entry is found the function returns the pointer to |
| 3641 | the head of the chain of records in the join_buffer that match this |
| 3642 | key. |
| 3643 | |
| 3644 | RETURN VALUE |
| 3645 | The pointer to the corresponding circular list of records if |
| 3646 | the key entry with the join key is found, 0 - otherwise. |
| 3647 | */ |
| 3648 | |
| 3649 | uchar *JOIN_CACHE_BNLH::get_matching_chain_by_join_key() |
| 3650 | { |
| 3651 | uchar *key_ref_ptr; |
| 3652 | TABLE *table= join_tab->table; |
| 3653 | TABLE_REF *ref= &join_tab->ref; |
| 3654 | KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key); |
| 3655 | /* Build the join key value out of the record in the record buffer */ |
| 3656 | key_copy(key_buff, table->record[0], keyinfo, key_length, TRUE); |
| 3657 | /* Look for this key in the join buffer */ |
| 3658 | if (!key_search(key_buff, key_length, &key_ref_ptr)) |
| 3659 | return 0; |
| 3660 | return key_ref_ptr+get_size_of_key_offset(); |
| 3661 | } |
| 3662 | |
| 3663 | |
| 3664 | /* |
| 3665 | Prepare to iterate over the BNLH join cache buffer to look for matches |
| 3666 | |
| 3667 | SYNOPSIS |
| 3668 | prepare_look_for_matches() |
| 3669 | skip_last <-> ignore the last record in the buffer |
| 3670 | |
| 3671 | DESCRIPTION |
| 3672 | The function prepares the join cache for an iteration over the |
| 3673 | records in the join buffer. The iteration is performed when looking |
| 3674 | for matches for the record from the joined table join_tab that |
| 3675 | has been placed into the record buffer of the joined table. |
| 3676 | If the value of the parameter skip_last is TRUE then the last |
| 3677 | record from the join buffer is ignored. |
| 3678 | The function builds the hashed key from the join fields of join_tab |
| 3679 | and uses this key to look in the hash table of the join cache for |
| 3680 | the chain of matching records in in the join buffer. If it finds |
| 3681 | such a chain it sets the member last_rec_ref_ptr to point to the |
| 3682 | last link of the chain while setting the member next_rec_ref_po 0. |
| 3683 | |
| 3684 | RETURN VALUE |
| 3685 | TRUE there are no matching records in the buffer to iterate over |
| 3686 | FALSE otherwise |
| 3687 | */ |
| 3688 | |
| 3689 | bool JOIN_CACHE_BNLH::prepare_look_for_matches(bool skip_last) |
| 3690 | { |
| 3691 | uchar *curr_matching_chain; |
| 3692 | last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0; |
| 3693 | if (!(curr_matching_chain= get_matching_chain_by_join_key())) |
| 3694 | return 1; |
| 3695 | last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain); |
| 3696 | return 0; |
| 3697 | } |
| 3698 | |
| 3699 | |
| 3700 | /* |
| 3701 | Get next record from the BNLH join cache buffer when looking for matches |
| 3702 | |
| 3703 | SYNOPSIS |
| 3704 | get_next_candidate_for_match |
| 3705 | |
| 3706 | DESCRIPTION |
| 3707 | This method is used for iterations over the records from the join |
| 3708 | cache buffer when looking for matches for records from join_tab. |
| 3709 | The methods performs the necessary preparations to read the next record |
| 3710 | from the join buffer into the record buffer by the method |
| 3711 | read_next_candidate_for_match, or, to skip the next record from the join |
| 3712 | buffer by the method skip_next_candidate_for_match. |
| 3713 | This implementation of the virtual method moves to the next record |
| 3714 | in the chain of all records from the join buffer that are to be |
| 3715 | equi-joined with the current record from join_tab. |
| 3716 | |
| 3717 | RETURN VALUE |
| 3718 | pointer to the beginning of the record fields in the join buffer |
| 3719 | if the there is another record to iterate over, 0 - otherwise. |
| 3720 | */ |
| 3721 | |
| 3722 | uchar *JOIN_CACHE_BNLH::get_next_candidate_for_match() |
| 3723 | { |
| 3724 | if (next_matching_rec_ref_ptr == last_matching_rec_ref_ptr) |
| 3725 | return 0; |
| 3726 | next_matching_rec_ref_ptr= get_next_rec_ref(next_matching_rec_ref_ptr ? |
| 3727 | next_matching_rec_ref_ptr : |
| 3728 | last_matching_rec_ref_ptr); |
| 3729 | return next_matching_rec_ref_ptr+rec_fields_offset; |
| 3730 | } |
| 3731 | |
| 3732 | |
| 3733 | /* |
| 3734 | Check whether the matching record from the BNLH cache is to be skipped |
| 3735 | |
| 3736 | SYNOPSIS |
| 3737 | skip_next_candidate_for_match |
| 3738 | rec_ptr pointer to the position in the join buffer right after |
| 3739 | the previous record |
| 3740 | |
| 3741 | DESCRIPTION |
| 3742 | This implementation of the virtual function just calls the |
| 3743 | method get_match_flag_by_pos to check whether the record referenced |
| 3744 | by ref_ptr has its match flag set to MATCH_FOUND. |
| 3745 | |
| 3746 | RETURN VALUE |
| 3747 | TRUE the record referenced by rec_ptr has its match flag set to |
| 3748 | MATCH_FOUND |
| 3749 | FALSE otherwise |
| 3750 | */ |
| 3751 | |
| 3752 | bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr) |
| 3753 | { |
| 3754 | return join_tab->check_only_first_match() && |
| 3755 | (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); |
| 3756 | } |
| 3757 | |
| 3758 | |
| 3759 | /* |
| 3760 | Read next record from the BNLH join cache buffer when looking for matches |
| 3761 | |
| 3762 | SYNOPSIS |
| 3763 | read_next_candidate_for_match |
| 3764 | rec_ptr pointer to the position in the join buffer right after |
| 3765 | the previous record |
| 3766 | |
| 3767 | DESCRIPTION |
| 3768 | This implementation of the virtual method read_next_candidate_for_match |
| 3769 | calls the method get_record_by_pos to read the record referenced by rec_ptr |
| 3770 | from the join buffer into the record buffer. If this record refers to |
| 3771 | fields in the other join buffers the call of get_record_by_po ensures that |
| 3772 | these fields are read into the corresponding record buffers as well. |
| 3773 | This function is supposed to be called after a successful call of |
| 3774 | the method get_next_candidate_for_match. |
| 3775 | |
| 3776 | RETURN VALUE |
| 3777 | none |
| 3778 | */ |
| 3779 | |
| 3780 | void JOIN_CACHE_BNLH::read_next_candidate_for_match(uchar *rec_ptr) |
| 3781 | { |
| 3782 | get_record_by_pos(rec_ptr); |
| 3783 | } |
| 3784 | |
| 3785 | |
| 3786 | /* |
| 3787 | Initialize the BNLH join cache |
| 3788 | |
| 3789 | SYNOPSIS |
| 3790 | init |
| 3791 | for_explain join buffer is initialized for explain only |
| 3792 | |
| 3793 | DESCRIPTION |
| 3794 | The function initializes the cache structure. It is supposed to be called |
| 3795 | right after a constructor for the JOIN_CACHE_BNLH. |
| 3796 | |
| 3797 | NOTES |
| 3798 | The function first constructs a companion object of the type JOIN_TAB_SCAN, |
| 3799 | then it calls the init method of the parent class. |
| 3800 | |
| 3801 | RETURN VALUE |
| 3802 | 0 initialization with buffer allocations has been succeeded |
| 3803 | 1 otherwise |
| 3804 | */ |
| 3805 | |
| 3806 | int JOIN_CACHE_BNLH::init(bool for_explain) |
| 3807 | { |
| 3808 | DBUG_ENTER("JOIN_CACHE_BNLH::init" ); |
| 3809 | |
| 3810 | if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab))) |
| 3811 | DBUG_RETURN(1); |
| 3812 | |
| 3813 | DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain)); |
| 3814 | } |
| 3815 | |
| 3816 | |
| 3817 | /* |
| 3818 | Calculate the increment of the MRR buffer for a record write |
| 3819 | |
| 3820 | SYNOPSIS |
| 3821 | aux_buffer_incr() |
| 3822 | |
| 3823 | DESCRIPTION |
| 3824 | This implementation of the virtual function aux_buffer_incr determines |
| 3825 | for how much the size of the MRR buffer should be increased when another |
| 3826 | record is added to the cache. |
| 3827 | |
| 3828 | RETURN VALUE |
| 3829 | the increment of the size of the MRR buffer for the next record |
| 3830 | */ |
| 3831 | |
| 3832 | uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(size_t recno) |
| 3833 | { |
| 3834 | uint incr= 0; |
| 3835 | TABLE_REF *ref= &join_tab->ref; |
| 3836 | TABLE *tab= join_tab->table; |
| 3837 | ha_rows rec_per_key= |
| 3838 | (ha_rows) tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-1); |
| 3839 | set_if_bigger(rec_per_key, 1); |
| 3840 | if (recno == 1) |
| 3841 | incr= ref->key_length + tab->file->ref_length; |
| 3842 | incr+= (uint)(tab->file->stats.mrr_length_per_rec * rec_per_key); |
| 3843 | return incr; |
| 3844 | } |
| 3845 | |
| 3846 | |
| 3847 | /* |
| 3848 | Initiate iteration over records returned by MRR for the current join buffer |
| 3849 | |
| 3850 | SYNOPSIS |
| 3851 | open() |
| 3852 | |
| 3853 | DESCRIPTION |
| 3854 | The function initiates the process of iteration over the records from |
| 3855 | join_tab returned by the MRR interface functions for records from |
| 3856 | the join buffer. Such an iteration is performed by the BKA/BKAH join |
| 3857 | algorithm for each new refill of the join buffer. |
| 3858 | The function calls the MRR handler function multi_range_read_init to |
| 3859 | initiate this process. |
| 3860 | |
| 3861 | RETURN VALUE |
| 3862 | 0 the initiation is a success |
| 3863 | error code otherwise |
| 3864 | */ |
| 3865 | |
| 3866 | int JOIN_TAB_SCAN_MRR::open() |
| 3867 | { |
| 3868 | handler *file= join_tab->table->file; |
| 3869 | |
| 3870 | join_tab->table->null_row= 0; |
| 3871 | |
| 3872 | |
| 3873 | /* Dynamic range access is never used with BKA */ |
| 3874 | DBUG_ASSERT(join_tab->use_quick != 2); |
| 3875 | |
| 3876 | join_tab->tracker->r_scans++; |
| 3877 | save_or_restore_used_tabs(join_tab, FALSE); |
| 3878 | |
| 3879 | init_mrr_buff(); |
| 3880 | |
| 3881 | /* |
| 3882 | Prepare to iterate over keys from the join buffer and to get |
| 3883 | matching candidates obtained with MMR handler functions. |
| 3884 | */ |
| 3885 | if (!file->inited) |
| 3886 | file->ha_index_init(join_tab->ref.key, 1); |
| 3887 | ranges= cache->get_number_of_ranges_for_mrr(); |
| 3888 | if (!join_tab->cache_idx_cond) |
| 3889 | range_seq_funcs.skip_index_tuple= 0; |
| 3890 | return file->multi_range_read_init(&range_seq_funcs, (void*) cache, |
| 3891 | ranges, mrr_mode, &mrr_buff); |
| 3892 | } |
| 3893 | |
| 3894 | |
| 3895 | /* |
| 3896 | Read the next record returned by MRR for the current join buffer |
| 3897 | |
| 3898 | SYNOPSIS |
| 3899 | next() |
| 3900 | |
| 3901 | DESCRIPTION |
| 3902 | The function reads the next record from the joined table join_tab |
| 3903 | returned by the MRR handler function multi_range_read_next for |
| 3904 | the current refill of the join buffer. The record is read into |
| 3905 | the record buffer used for join_tab records in join operations. |
| 3906 | |
| 3907 | RETURN VALUE |
| 3908 | 0 the next record exists and has been successfully read |
| 3909 | error code otherwise |
| 3910 | */ |
| 3911 | |
| 3912 | int JOIN_TAB_SCAN_MRR::next() |
| 3913 | { |
| 3914 | char **ptr= (char **) cache->get_curr_association_ptr(); |
| 3915 | |
| 3916 | DBUG_ASSERT(sizeof(range_id_t) == sizeof(*ptr)); |
| 3917 | int rc= join_tab->table->file->multi_range_read_next((range_id_t*)ptr) ? -1 : 0; |
| 3918 | if (!rc) |
| 3919 | { |
| 3920 | join_tab->tracker->r_rows++; |
| 3921 | join_tab->tracker->r_rows_after_where++; |
| 3922 | /* |
| 3923 | If a record in in an incremental cache contains no fields then the |
| 3924 | association for the last record in cache will be equal to cache->end_pos |
| 3925 | */ |
| 3926 | /* |
| 3927 | psergey: this makes no sense where HA_MRR_NO_ASSOC is used. |
| 3928 | DBUG_ASSERT(cache->buff <= (uchar *) (*ptr) && |
| 3929 | (uchar *) (*ptr) <= cache->end_pos); |
| 3930 | */ |
| 3931 | } |
| 3932 | return rc; |
| 3933 | } |
| 3934 | |
| 3935 | |
| 3936 | static |
| 3937 | void bka_range_seq_key_info(void *init_params, uint *length, |
| 3938 | key_part_map *map) |
| 3939 | { |
| 3940 | TABLE_REF *ref= &(((JOIN_CACHE*)init_params)->join_tab->ref); |
| 3941 | *length= ref->key_length; |
| 3942 | *map= (key_part_map(1) << ref->key_parts) - 1; |
| 3943 | } |
| 3944 | |
| 3945 | |
| 3946 | /* |
| 3947 | Initialize retrieval of range sequence for BKA join algorithm |
| 3948 | |
| 3949 | SYNOPSIS |
| 3950 | bka_range_seq_init() |
| 3951 | init_params pointer to the BKA join cache object |
| 3952 | n_ranges the number of ranges obtained |
| 3953 | flags combination of MRR flags |
| 3954 | |
| 3955 | DESCRIPTION |
| 3956 | The function interprets init_param as a pointer to a JOIN_CACHE_BKA |
| 3957 | object. The function prepares for an iteration over the join keys |
| 3958 | built for all records from the cache join buffer. |
| 3959 | |
| 3960 | NOTE |
| 3961 | This function are used only as a callback function. |
| 3962 | |
| 3963 | RETURN VALUE |
| 3964 | init_param value that is to be used as a parameter of bka_range_seq_next() |
| 3965 | */ |
| 3966 | |
| 3967 | static |
| 3968 | range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags) |
| 3969 | { |
| 3970 | DBUG_ENTER("bka_range_seq_init" ); |
| 3971 | JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param; |
| 3972 | cache->reset(0); |
| 3973 | DBUG_RETURN((range_seq_t) init_param); |
| 3974 | } |
| 3975 | |
| 3976 | |
| 3977 | /* |
| 3978 | Get the next range/key over records from the join buffer used by a BKA cache |
| 3979 | |
| 3980 | SYNOPSIS |
| 3981 | bka_range_seq_next() |
| 3982 | seq the value returned by bka_range_seq_init |
| 3983 | range OUT reference to the next range |
| 3984 | |
| 3985 | DESCRIPTION |
| 3986 | The function interprets seq as a pointer to a JOIN_CACHE_BKA |
| 3987 | object. The function returns a pointer to the range descriptor |
| 3988 | for the key built over the next record from the join buffer. |
| 3989 | |
| 3990 | NOTE |
| 3991 | This function are used only as a callback function. |
| 3992 | |
| 3993 | RETURN VALUE |
| 3994 | FALSE ok, the range structure filled with info about the next range/key |
| 3995 | TRUE no more ranges |
| 3996 | */ |
| 3997 | |
| 3998 | static |
| 3999 | bool bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) |
| 4000 | { |
| 4001 | DBUG_ENTER("bka_range_seq_next" ); |
| 4002 | JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; |
| 4003 | TABLE_REF *ref= &cache->join_tab->ref; |
| 4004 | key_range *start_key= &range->start_key; |
| 4005 | if ((start_key->length= cache->get_next_key((uchar **) &start_key->key))) |
| 4006 | { |
| 4007 | start_key->keypart_map= (1 << ref->key_parts) - 1; |
| 4008 | start_key->flag= HA_READ_KEY_EXACT; |
| 4009 | range->end_key= *start_key; |
| 4010 | range->end_key.flag= HA_READ_AFTER_KEY; |
| 4011 | range->ptr= (char *) cache->get_curr_rec(); |
| 4012 | range->range_flag= EQ_RANGE; |
| 4013 | DBUG_RETURN(0); |
| 4014 | } |
| 4015 | DBUG_RETURN(1); |
| 4016 | } |
| 4017 | |
| 4018 | |
| 4019 | /* |
| 4020 | Check whether range_info orders to skip the next record from BKA buffer |
| 4021 | |
| 4022 | SYNOPSIS |
| 4023 | bka_range_seq_skip_record() |
| 4024 | seq value returned by bka_range_seq_init() |
| 4025 | range_info information about the next range |
| 4026 | rowid [NOT USED] rowid of the record to be checked |
| 4027 | |
| 4028 | |
| 4029 | DESCRIPTION |
| 4030 | The function interprets seq as a pointer to a JOIN_CACHE_BKA object. |
| 4031 | The function returns TRUE if the record with this range_info |
| 4032 | is to be filtered out from the stream of records returned by |
| 4033 | multi_range_read_next(). |
| 4034 | |
| 4035 | NOTE |
| 4036 | This function are used only as a callback function. |
| 4037 | |
| 4038 | RETURN VALUE |
| 4039 | 1 record with this range_info is to be filtered out from the stream |
| 4040 | of records returned by multi_range_read_next() |
| 4041 | 0 the record is to be left in the stream |
| 4042 | */ |
| 4043 | |
| 4044 | static |
| 4045 | bool bka_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid) |
| 4046 | { |
| 4047 | DBUG_ENTER("bka_range_seq_skip_record" ); |
| 4048 | JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; |
| 4049 | bool res= cache->get_match_flag_by_pos((uchar *) range_info) == |
| 4050 | JOIN_CACHE::MATCH_FOUND; |
| 4051 | DBUG_RETURN(res); |
| 4052 | } |
| 4053 | |
| 4054 | |
| 4055 | /* |
| 4056 | Check if the record combination from BKA cache matches the index condition |
| 4057 | |
| 4058 | SYNOPSIS |
| 4059 | bka_skip_index_tuple() |
| 4060 | rseq value returned by bka_range_seq_init() |
| 4061 | range_info record chain for the next range/key returned by MRR |
| 4062 | |
| 4063 | DESCRIPTION |
| 4064 | This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method, |
| 4065 | see comments there. |
| 4066 | |
| 4067 | NOTE |
| 4068 | This function is used as a RANGE_SEQ_IF::skip_index_tuple callback. |
| 4069 | |
| 4070 | RETURN VALUE |
| 4071 | 0 The record combination satisfies the index condition |
| 4072 | 1 Otherwise |
| 4073 | */ |
| 4074 | |
| 4075 | static |
| 4076 | bool bka_skip_index_tuple(range_seq_t rseq, range_id_t range_info) |
| 4077 | { |
| 4078 | DBUG_ENTER("bka_skip_index_tuple" ); |
| 4079 | JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; |
| 4080 | THD *thd= cache->thd(); |
| 4081 | bool res; |
| 4082 | status_var_increment(thd->status_var.ha_icp_attempts); |
| 4083 | if (!(res= cache->skip_index_tuple(range_info))) |
| 4084 | status_var_increment(thd->status_var.ha_icp_match); |
| 4085 | DBUG_RETURN(res); |
| 4086 | } |
| 4087 | |
| 4088 | |
| 4089 | /* |
| 4090 | Prepare to read the record from BKA cache matching the current joined record |
| 4091 | |
| 4092 | SYNOPSIS |
| 4093 | prepare_look_for_matches() |
| 4094 | skip_last <-> ignore the last record in the buffer (always unused here) |
| 4095 | |
| 4096 | DESCRIPTION |
| 4097 | The function prepares to iterate over records in the join cache buffer |
| 4098 | matching the record loaded into the record buffer for join_tab when |
| 4099 | performing join operation by BKA join algorithm. With BKA algorithms the |
| 4100 | record loaded into the record buffer for join_tab always has a direct |
| 4101 | reference to the matching records from the join buffer. When the regular |
| 4102 | BKA join algorithm is employed the record from join_tab can refer to |
| 4103 | only one such record. |
| 4104 | The function sets the counter of the remaining records from the cache |
| 4105 | buffer that would match the current join_tab record to 1. |
| 4106 | |
| 4107 | RETURN VALUE |
| 4108 | TRUE there are no records in the buffer to iterate over |
| 4109 | FALSE otherwise |
| 4110 | */ |
| 4111 | |
| 4112 | bool JOIN_CACHE_BKA::prepare_look_for_matches(bool skip_last) |
| 4113 | { |
| 4114 | if (!records) |
| 4115 | return TRUE; |
| 4116 | rem_records= 1; |
| 4117 | return FALSE; |
| 4118 | } |
| 4119 | |
| 4120 | |
| 4121 | /* |
| 4122 | Get the record from the BKA cache matching the current joined record |
| 4123 | |
| 4124 | SYNOPSIS |
| 4125 | get_next_candidate_for_match |
| 4126 | |
| 4127 | DESCRIPTION |
| 4128 | This method is used for iterations over the records from the join |
| 4129 | cache buffer when looking for matches for records from join_tab. |
| 4130 | The method performs the necessary preparations to read the next record |
| 4131 | from the join buffer into the record buffer by the method |
| 4132 | read_next_candidate_for_match, or, to skip the next record from the join |
| 4133 | buffer by the method skip_if_not_needed_match. |
| 4134 | This implementation of the virtual method get_next_candidate_for_match |
| 4135 | just decrements the counter of the records that are to be iterated over |
| 4136 | and returns the value of curr_association as a reference to the position |
| 4137 | of the beginning of the record fields in the buffer. |
| 4138 | |
| 4139 | RETURN VALUE |
| 4140 | pointer to the start of the record fields in the join buffer |
| 4141 | if the there is another record to iterate over, 0 - otherwise. |
| 4142 | */ |
| 4143 | |
| 4144 | uchar *JOIN_CACHE_BKA::get_next_candidate_for_match() |
| 4145 | { |
| 4146 | if (!rem_records) |
| 4147 | return 0; |
| 4148 | rem_records--; |
| 4149 | return curr_association; |
| 4150 | } |
| 4151 | |
| 4152 | |
| 4153 | /* |
| 4154 | Check whether the matching record from the BKA cache is to be skipped |
| 4155 | |
| 4156 | SYNOPSIS |
| 4157 | skip_next_candidate_for_match |
| 4158 | rec_ptr pointer to the position in the join buffer right after |
| 4159 | the previous record |
| 4160 | |
| 4161 | DESCRIPTION |
| 4162 | This implementation of the virtual function just calls the |
| 4163 | method get_match_flag_by_pos to check whether the record referenced |
| 4164 | by ref_ptr has its match flag set to MATCH_FOUND. |
| 4165 | |
| 4166 | RETURN VALUE |
| 4167 | TRUE the record referenced by rec_ptr has its match flag set to |
| 4168 | MATCH_FOUND |
| 4169 | FALSE otherwise |
| 4170 | */ |
| 4171 | |
| 4172 | bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr) |
| 4173 | { |
| 4174 | return join_tab->check_only_first_match() && |
| 4175 | (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); |
| 4176 | } |
| 4177 | |
| 4178 | |
| 4179 | /* |
| 4180 | Read the next record from the BKA join cache buffer when looking for matches |
| 4181 | |
| 4182 | SYNOPSIS |
| 4183 | read_next_candidate_for_match |
| 4184 | rec_ptr pointer to the position in the join buffer right after |
| 4185 | the previous record |
| 4186 | |
| 4187 | DESCRIPTION |
| 4188 | This implementation of the virtual method read_next_candidate_for_match |
| 4189 | calls the method get_record_by_pos to read the record referenced by rec_ptr |
| 4190 | from the join buffer into the record buffer. If this record refers to |
| 4191 | fields in the other join buffers the call of get_record_by_po ensures that |
| 4192 | these fields are read into the corresponding record buffers as well. |
| 4193 | This function is supposed to be called after a successful call of |
| 4194 | the method get_next_candidate_for_match. |
| 4195 | |
| 4196 | RETURN VALUE |
| 4197 | none |
| 4198 | */ |
| 4199 | |
| 4200 | void JOIN_CACHE_BKA::read_next_candidate_for_match(uchar *rec_ptr) |
| 4201 | { |
| 4202 | get_record_by_pos(rec_ptr); |
| 4203 | } |
| 4204 | |
| 4205 | |
| 4206 | /* |
| 4207 | Initialize the BKA join cache |
| 4208 | |
| 4209 | SYNOPSIS |
| 4210 | init |
| 4211 | for_explain join buffer is initialized for explain only |
| 4212 | |
| 4213 | |
| 4214 | DESCRIPTION |
| 4215 | The function initializes the cache structure. It is supposed to be called |
| 4216 | right after a constructor for the JOIN_CACHE_BKA. |
| 4217 | |
| 4218 | NOTES |
| 4219 | The function first constructs a companion object of the type |
| 4220 | JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class. |
| 4221 | |
| 4222 | RETURN VALUE |
| 4223 | 0 initialization with buffer allocations has been succeeded |
| 4224 | 1 otherwise |
| 4225 | */ |
| 4226 | |
| 4227 | int JOIN_CACHE_BKA::init(bool for_explain) |
| 4228 | { |
| 4229 | int res; |
| 4230 | bool check_only_first_match= join_tab->check_only_first_match(); |
| 4231 | |
| 4232 | RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info, |
| 4233 | bka_range_seq_init, |
| 4234 | bka_range_seq_next, |
| 4235 | check_only_first_match ? bka_range_seq_skip_record : 0, |
| 4236 | bka_skip_index_tuple }; |
| 4237 | |
| 4238 | DBUG_ENTER("JOIN_CACHE_BKA::init" ); |
| 4239 | |
| 4240 | JOIN_TAB_SCAN_MRR *jsm; |
| 4241 | if (!(join_tab_scan= jsm= new JOIN_TAB_SCAN_MRR(join, join_tab, |
| 4242 | mrr_mode, rs_funcs))) |
| 4243 | DBUG_RETURN(1); |
| 4244 | |
| 4245 | if ((res= JOIN_CACHE::init(for_explain))) |
| 4246 | DBUG_RETURN(res); |
| 4247 | |
| 4248 | if (use_emb_key) |
| 4249 | jsm->mrr_mode |= HA_MRR_MATERIALIZED_KEYS; |
| 4250 | |
| 4251 | DBUG_RETURN(0); |
| 4252 | } |
| 4253 | |
| 4254 | |
| 4255 | /* |
| 4256 | Get the key built over the next record from BKA join buffer |
| 4257 | |
| 4258 | SYNOPSIS |
| 4259 | get_next_key() |
| 4260 | key pointer to the buffer where the key value is to be placed |
| 4261 | |
| 4262 | DESCRIPTION |
| 4263 | The function reads key fields from the current record in the join buffer. |
| 4264 | and builds the key value out of these fields that will be used to access |
| 4265 | the 'join_tab' table. Some of key fields may belong to previous caches. |
| 4266 | They are accessed via record references to the record parts stored in the |
| 4267 | previous join buffers. The other key fields always are placed right after |
| 4268 | the flag fields of the record. |
| 4269 | If the key is embedded, which means that its value can be read directly |
| 4270 | from the join buffer, then *key is set to the beginning of the key in |
| 4271 | this buffer. Otherwise the key is built in the join_tab->ref->key_buff. |
| 4272 | The function returns the length of the key if it succeeds ro read it. |
| 4273 | If is assumed that the functions starts reading at the position of |
| 4274 | the record length which is provided for each records in a BKA cache. |
| 4275 | After the key is built the 'pos' value points to the first position after |
| 4276 | the current record. |
| 4277 | The function just skips the records with MATCH_IMPOSSIBLE in the |
| 4278 | match flag field if there is any. |
| 4279 | The function returns 0 if the initial position is after the beginning |
| 4280 | of the record fields for last record from the join buffer. |
| 4281 | |
| 4282 | RETURN VALUE |
| 4283 | length of the key value - if the starting value of 'pos' points to |
| 4284 | the position before the fields for the last record, |
| 4285 | 0 - otherwise. |
| 4286 | */ |
| 4287 | |
| 4288 | uint JOIN_CACHE_BKA::get_next_key(uchar ** key) |
| 4289 | { |
| 4290 | uint len; |
| 4291 | uint32 rec_len; |
| 4292 | uchar *init_pos; |
| 4293 | JOIN_CACHE *cache; |
| 4294 | |
| 4295 | start: |
| 4296 | |
| 4297 | /* Any record in a BKA cache is prepended with its length */ |
| 4298 | DBUG_ASSERT(with_length); |
| 4299 | |
| 4300 | if ((pos+size_of_rec_len) > last_rec_pos || !records) |
| 4301 | return 0; |
| 4302 | |
| 4303 | /* Read the length of the record */ |
| 4304 | rec_len= get_rec_length(pos); |
| 4305 | pos+= size_of_rec_len; |
| 4306 | init_pos= pos; |
| 4307 | |
| 4308 | /* Read a reference to the previous cache if any */ |
| 4309 | if (prev_cache) |
| 4310 | pos+= prev_cache->get_size_of_rec_offset(); |
| 4311 | |
| 4312 | curr_rec_pos= pos; |
| 4313 | |
| 4314 | /* Read all flag fields of the record */ |
| 4315 | read_flag_fields(); |
| 4316 | |
| 4317 | if (with_match_flag && |
| 4318 | (Match_flag) curr_rec_pos[0] == MATCH_IMPOSSIBLE ) |
| 4319 | { |
| 4320 | pos= init_pos+rec_len; |
| 4321 | goto start; |
| 4322 | } |
| 4323 | |
| 4324 | if (use_emb_key) |
| 4325 | { |
| 4326 | /* An embedded key is taken directly from the join buffer */ |
| 4327 | *key= pos; |
| 4328 | len= emb_key_length; |
| 4329 | } |
| 4330 | else |
| 4331 | { |
| 4332 | /* Read key arguments from previous caches if there are any such fields */ |
| 4333 | if (external_key_arg_fields) |
| 4334 | { |
| 4335 | uchar *rec_ptr= curr_rec_pos; |
| 4336 | uint key_arg_count= external_key_arg_fields; |
| 4337 | CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count; |
| 4338 | for (cache= prev_cache; key_arg_count; cache= cache->prev_cache) |
| 4339 | { |
| 4340 | uint len= 0; |
| 4341 | DBUG_ASSERT(cache); |
| 4342 | rec_ptr= cache->get_rec_ref(rec_ptr); |
| 4343 | while (!cache->referenced_fields) |
| 4344 | { |
| 4345 | cache= cache->prev_cache; |
| 4346 | DBUG_ASSERT(cache); |
| 4347 | rec_ptr= cache->get_rec_ref(rec_ptr); |
| 4348 | } |
| 4349 | while (key_arg_count && |
| 4350 | cache->read_referenced_field(*copy_ptr, rec_ptr, &len)) |
| 4351 | { |
| 4352 | copy_ptr++; |
| 4353 | --key_arg_count; |
| 4354 | } |
| 4355 | } |
| 4356 | } |
| 4357 | |
| 4358 | /* |
| 4359 | Read the other key arguments from the current record. The fields for |
| 4360 | these arguments are always first in the sequence of the record's fields. |
| 4361 | */ |
| 4362 | CACHE_FIELD *copy= field_descr+flag_fields; |
| 4363 | CACHE_FIELD *copy_end= copy+local_key_arg_fields; |
| 4364 | bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos); |
| 4365 | for ( ; copy < copy_end; copy++) |
| 4366 | read_record_field(copy, blob_in_rec_buff); |
| 4367 | |
| 4368 | /* Build the key over the fields read into the record buffers */ |
| 4369 | TABLE_REF *ref= &join_tab->ref; |
| 4370 | cp_buffer_from_ref(join->thd, join_tab->table, ref); |
| 4371 | *key= ref->key_buff; |
| 4372 | len= ref->key_length; |
| 4373 | } |
| 4374 | |
| 4375 | pos= init_pos+rec_len; |
| 4376 | |
| 4377 | return len; |
| 4378 | } |
| 4379 | |
| 4380 | |
| 4381 | /* |
| 4382 | Check the index condition of the joined table for a record from the BKA cache |
| 4383 | |
| 4384 | SYNOPSIS |
| 4385 | skip_index_tuple() |
| 4386 | range_info pointer to the record returned by MRR |
| 4387 | |
| 4388 | DESCRIPTION |
| 4389 | This function is invoked from MRR implementation to check if an index |
| 4390 | tuple matches the index condition. It is used in the case where the index |
| 4391 | condition actually depends on both columns of the used index and columns |
| 4392 | from previous tables. |
| 4393 | |
| 4394 | NOTES |
| 4395 | Accessing columns of the previous tables requires special handling with |
| 4396 | BKA. The idea of BKA is to collect record combinations in a buffer and |
| 4397 | then do a batch of ref access lookups, i.e. by the time we're doing a |
| 4398 | lookup its previous-records-combination is not in prev_table->record[0] |
| 4399 | but somewhere in the join buffer. |
| 4400 | We need to get it from there back into prev_table(s)->record[0] before we |
| 4401 | can evaluate the index condition, and that's why we need this function |
| 4402 | instead of regular IndexConditionPushdown. |
| 4403 | |
| 4404 | NOTES |
| 4405 | Possible optimization: |
| 4406 | Before we unpack the record from a previous table |
| 4407 | check if this table is used in the condition. |
| 4408 | If so then unpack the record otherwise skip the unpacking. |
| 4409 | This should be done by a special virtual method |
| 4410 | get_partial_record_by_pos(). |
| 4411 | |
| 4412 | RETURN VALUE |
| 4413 | 1 the record combination does not satisfies the index condition |
| 4414 | 0 otherwise |
| 4415 | */ |
| 4416 | |
| 4417 | bool JOIN_CACHE_BKA::skip_index_tuple(range_id_t range_info) |
| 4418 | { |
| 4419 | DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple" ); |
| 4420 | get_record_by_pos((uchar*)range_info); |
| 4421 | DBUG_RETURN(!join_tab->cache_idx_cond->val_int()); |
| 4422 | } |
| 4423 | |
| 4424 | |
| 4425 | |
| 4426 | /* |
| 4427 | Initialize retrieval of range sequence for the BKAH join algorithm |
| 4428 | |
| 4429 | SYNOPSIS |
| 4430 | bkah_range_seq_init() |
| 4431 | init_params pointer to the BKAH join cache object |
| 4432 | n_ranges the number of ranges obtained |
| 4433 | flags combination of MRR flags |
| 4434 | |
| 4435 | DESCRIPTION |
| 4436 | The function interprets init_param as a pointer to a JOIN_CACHE_BKAH |
| 4437 | object. The function prepares for an iteration over distinct join keys |
| 4438 | built over the records from the cache join buffer. |
| 4439 | |
| 4440 | NOTE |
| 4441 | This function are used only as a callback function. |
| 4442 | |
| 4443 | RETURN VALUE |
| 4444 | init_param value that is to be used as a parameter of |
| 4445 | bkah_range_seq_next() |
| 4446 | */ |
| 4447 | |
| 4448 | static |
| 4449 | range_seq_t bkah_range_seq_init(void *init_param, uint n_ranges, uint flags) |
| 4450 | { |
| 4451 | DBUG_ENTER("bkah_range_seq_init" ); |
| 4452 | JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) init_param; |
| 4453 | cache->reset(0); |
| 4454 | DBUG_RETURN((range_seq_t) init_param); |
| 4455 | } |
| 4456 | |
| 4457 | |
| 4458 | /* |
| 4459 | Get the next range/key over records from the join buffer of a BKAH cache |
| 4460 | |
| 4461 | SYNOPSIS |
| 4462 | bkah_range_seq_next() |
| 4463 | seq value returned by bkah_range_seq_init() |
| 4464 | range OUT reference to the next range |
| 4465 | |
| 4466 | DESCRIPTION |
| 4467 | The function interprets seq as a pointer to a JOIN_CACHE_BKAH |
| 4468 | object. The function returns a pointer to the range descriptor |
| 4469 | for the next unique key built over records from the join buffer. |
| 4470 | |
| 4471 | NOTE |
| 4472 | This function are used only as a callback function. |
| 4473 | |
| 4474 | RETURN VALUE |
| 4475 | FALSE ok, the range structure filled with info about the next range/key |
| 4476 | TRUE no more ranges |
| 4477 | */ |
| 4478 | |
| 4479 | static |
| 4480 | bool bkah_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) |
| 4481 | { |
| 4482 | DBUG_ENTER("bkah_range_seq_next" ); |
| 4483 | JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; |
| 4484 | TABLE_REF *ref= &cache->join_tab->ref; |
| 4485 | key_range *start_key= &range->start_key; |
| 4486 | if ((start_key->length= cache->get_next_key((uchar **) &start_key->key))) |
| 4487 | { |
| 4488 | start_key->keypart_map= (1 << ref->key_parts) - 1; |
| 4489 | start_key->flag= HA_READ_KEY_EXACT; |
| 4490 | range->end_key= *start_key; |
| 4491 | range->end_key.flag= HA_READ_AFTER_KEY; |
| 4492 | range->ptr= (char *) cache->get_curr_key_chain(); |
| 4493 | range->range_flag= EQ_RANGE; |
| 4494 | DBUG_RETURN(0); |
| 4495 | } |
| 4496 | DBUG_RETURN(1); |
| 4497 | } |
| 4498 | |
| 4499 | |
| 4500 | /* |
| 4501 | Check whether range_info orders to skip the next record from BKAH join buffer |
| 4502 | |
| 4503 | SYNOPSIS |
| 4504 | bkah_range_seq_skip_record() |
| 4505 | seq value returned by bkah_range_seq_init() |
| 4506 | range_info information about the next range/key returned by MRR |
| 4507 | rowid [NOT USED] rowid of the record to be checked (not used) |
| 4508 | |
| 4509 | DESCRIPTION |
| 4510 | The function interprets seq as a pointer to a JOIN_CACHE_BKAH |
| 4511 | object. The function returns TRUE if the record with this range_info |
| 4512 | is to be filtered out from the stream of records returned by |
| 4513 | multi_range_read_next(). |
| 4514 | |
| 4515 | NOTE |
| 4516 | This function are used only as a callback function. |
| 4517 | |
| 4518 | RETURN VALUE |
| 4519 | 1 record with this range_info is to be filtered out from the stream |
| 4520 | of records returned by multi_range_read_next() |
| 4521 | 0 the record is to be left in the stream |
| 4522 | */ |
| 4523 | |
| 4524 | static |
| 4525 | bool bkah_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid) |
| 4526 | { |
| 4527 | DBUG_ENTER("bkah_range_seq_skip_record" ); |
| 4528 | JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; |
| 4529 | bool res= cache->check_all_match_flags_for_key((uchar *) range_info); |
| 4530 | DBUG_RETURN(res); |
| 4531 | } |
| 4532 | |
| 4533 | |
| 4534 | /* |
| 4535 | Check if the record combination from BKAH cache matches the index condition |
| 4536 | |
| 4537 | SYNOPSIS |
| 4538 | bkah_skip_index_tuple() |
| 4539 | rseq value returned by bka_range_seq_init() |
| 4540 | range_info record chain for the next range/key returned by MRR |
| 4541 | |
| 4542 | DESCRIPTION |
| 4543 | This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method, |
| 4544 | see comments there. |
| 4545 | |
| 4546 | NOTE |
| 4547 | This function is used as a RANGE_SEQ_IF::skip_index_tuple callback. |
| 4548 | |
| 4549 | RETURN VALUE |
| 4550 | 0 some records from the chain satisfy the index condition |
| 4551 | 1 otherwise |
| 4552 | */ |
| 4553 | |
| 4554 | static |
| 4555 | bool bkah_skip_index_tuple(range_seq_t rseq, range_id_t range_info) |
| 4556 | { |
| 4557 | DBUG_ENTER("bka_unique_skip_index_tuple" ); |
| 4558 | JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; |
| 4559 | THD *thd= cache->thd(); |
| 4560 | bool res; |
| 4561 | status_var_increment(thd->status_var.ha_icp_attempts); |
| 4562 | if (!(res= cache->skip_index_tuple(range_info))) |
| 4563 | status_var_increment(thd->status_var.ha_icp_match); |
| 4564 | DBUG_RETURN(res); |
| 4565 | } |
| 4566 | |
| 4567 | |
| 4568 | /* |
| 4569 | Prepare to read record from BKAH cache matching the current joined record |
| 4570 | |
| 4571 | SYNOPSIS |
| 4572 | prepare_look_for_matches() |
| 4573 | skip_last <-> ignore the last record in the buffer (always unused here) |
| 4574 | |
| 4575 | DESCRIPTION |
| 4576 | The function prepares to iterate over records in the join cache buffer |
| 4577 | matching the record loaded into the record buffer for join_tab when |
| 4578 | performing join operation by BKAH join algorithm. With BKAH algorithm, if |
| 4579 | association labels are used, then record loaded into the record buffer |
| 4580 | for join_tab always has a direct reference to the chain of the mathing |
| 4581 | records from the join buffer. If association labels are not used then |
| 4582 | then the chain of the matching records is obtained by the call of the |
| 4583 | get_key_chain_by_join_key function. |
| 4584 | |
| 4585 | RETURN VALUE |
| 4586 | TRUE there are no records in the buffer to iterate over |
| 4587 | FALSE otherwise |
| 4588 | */ |
| 4589 | |
| 4590 | bool JOIN_CACHE_BKAH::prepare_look_for_matches(bool skip_last) |
| 4591 | { |
| 4592 | last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0; |
| 4593 | if (no_association && |
| 4594 | !(curr_matching_chain= get_matching_chain_by_join_key())) //psergey: added '!' |
| 4595 | return 1; |
| 4596 | last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain); |
| 4597 | return 0; |
| 4598 | } |
| 4599 | |
| 4600 | /* |
| 4601 | Initialize the BKAH join cache |
| 4602 | |
| 4603 | SYNOPSIS |
| 4604 | init |
| 4605 | for_explain join buffer is initialized for explain only |
| 4606 | |
| 4607 | DESCRIPTION |
| 4608 | The function initializes the cache structure. It is supposed to be called |
| 4609 | right after a constructor for the JOIN_CACHE_BKAH. |
| 4610 | |
| 4611 | NOTES |
| 4612 | The function first constructs a companion object of the type |
| 4613 | JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class. |
| 4614 | |
| 4615 | RETURN VALUE |
| 4616 | 0 initialization with buffer allocations has been succeeded |
| 4617 | 1 otherwise |
| 4618 | */ |
| 4619 | |
| 4620 | int JOIN_CACHE_BKAH::init(bool for_explain) |
| 4621 | { |
| 4622 | bool check_only_first_match= join_tab->check_only_first_match(); |
| 4623 | |
| 4624 | no_association= MY_TEST(mrr_mode & HA_MRR_NO_ASSOCIATION); |
| 4625 | |
| 4626 | RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info, |
| 4627 | bkah_range_seq_init, |
| 4628 | bkah_range_seq_next, |
| 4629 | check_only_first_match && !no_association ? |
| 4630 | bkah_range_seq_skip_record : 0, |
| 4631 | bkah_skip_index_tuple }; |
| 4632 | |
| 4633 | DBUG_ENTER("JOIN_CACHE_BKAH::init" ); |
| 4634 | |
| 4635 | if (!(join_tab_scan= new JOIN_TAB_SCAN_MRR(join, join_tab, |
| 4636 | mrr_mode, rs_funcs))) |
| 4637 | DBUG_RETURN(1); |
| 4638 | |
| 4639 | DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain)); |
| 4640 | } |
| 4641 | |
| 4642 | |
| 4643 | /* |
| 4644 | Check the index condition of the joined table for a record from the BKA cache |
| 4645 | |
| 4646 | SYNOPSIS |
| 4647 | skip_index_tuple() |
| 4648 | range_info record chain returned by MRR |
| 4649 | |
| 4650 | DESCRIPTION |
| 4651 | See JOIN_CACHE_BKA::skip_index_tuple(). |
| 4652 | This function is the variant for use with rhe class JOIN_CACHE_BKAH. |
| 4653 | The difference from JOIN_CACHE_BKA case is that there may be multiple |
| 4654 | previous table record combinations that share the same key(MRR range). |
| 4655 | As a consequence, we need to loop through the chain of all table record |
| 4656 | combinations that match the given MRR range key range_info until we find |
| 4657 | one that satisfies the index condition. |
| 4658 | |
| 4659 | NOTE |
| 4660 | Possible optimization: |
| 4661 | Before we unpack the record from a previous table |
| 4662 | check if this table is used in the condition. |
| 4663 | If so then unpack the record otherwise skip the unpacking. |
| 4664 | This should be done by a special virtual method |
| 4665 | get_partial_record_by_pos(). |
| 4666 | |
| 4667 | RETURN VALUE |
| 4668 | 1 any record combination from the chain referred by range_info |
| 4669 | does not satisfy the index condition |
| 4670 | 0 otherwise |
| 4671 | |
| 4672 | |
| 4673 | */ |
| 4674 | |
| 4675 | bool JOIN_CACHE_BKAH::skip_index_tuple(range_id_t range_info) |
| 4676 | { |
| 4677 | uchar *last_rec_ref_ptr= get_next_rec_ref((uchar*) range_info); |
| 4678 | uchar *next_rec_ref_ptr= last_rec_ref_ptr; |
| 4679 | DBUG_ENTER("JOIN_CACHE_BKAH::skip_index_tuple" ); |
| 4680 | do |
| 4681 | { |
| 4682 | next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr); |
| 4683 | uchar *rec_ptr= next_rec_ref_ptr + rec_fields_offset; |
| 4684 | get_record_by_pos(rec_ptr); |
| 4685 | if (join_tab->cache_idx_cond->val_int()) |
| 4686 | DBUG_RETURN(FALSE); |
| 4687 | } while(next_rec_ref_ptr != last_rec_ref_ptr); |
| 4688 | DBUG_RETURN(TRUE); |
| 4689 | } |
| 4690 | |