1 | /* Copyright (C) 2000-2006 MySQL AB |
2 | |
3 | This program is free software; you can redistribute it and/or modify |
4 | it under the terms of the GNU General Public License as published by |
5 | the Free Software Foundation; version 2 of the License. |
6 | |
7 | This program is distributed in the hope that it will be useful, |
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | GNU General Public License for more details. |
11 | |
12 | You should have received a copy of the GNU General Public License |
13 | along with this program; if not, write to the Free Software |
14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ |
15 | |
16 | /** |
17 | @file |
18 | |
19 | @brief |
20 | join cache optimizations |
21 | |
22 | @defgroup Query_Optimizer Query Optimizer |
23 | @{ |
24 | */ |
25 | |
26 | #ifdef USE_PRAGMA_IMPLEMENTATION |
27 | #pragma implementation // gcc: Class implementation |
28 | #endif |
29 | |
30 | #include "mariadb.h" |
31 | #include "key.h" |
32 | #include "sql_base.h" |
33 | #include "sql_select.h" |
34 | #include "opt_subselect.h" |
35 | |
36 | #define NO_MORE_RECORDS_IN_BUFFER (uint)(-1) |
37 | |
38 | static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save); |
39 | |
40 | /***************************************************************************** |
41 | * Join cache module |
42 | ******************************************************************************/ |
43 | |
44 | /* |
45 | Fill in the descriptor of a flag field associated with a join cache |
46 | |
47 | SYNOPSIS |
48 | add_field_flag_to_join_cache() |
49 | str position in a record buffer to copy the field from/to |
50 | length length of the field |
51 | field IN/OUT pointer to the field descriptor to fill in |
52 | |
53 | DESCRIPTION |
54 | The function fill in the descriptor of a cache flag field to which |
55 | the parameter 'field' points to. The function uses the first two |
56 | parameters to set the position in the record buffer from/to which |
57 | the field value is to be copied and the length of the copied fragment. |
58 | Before returning the result the function increments the value of |
59 | *field by 1. |
60 | The function ignores the fields 'blob_length' and 'ofset' of the |
61 | descriptor. |
62 | |
63 | RETURN VALUE |
64 | the length of the field |
65 | */ |
66 | |
67 | static |
68 | uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field) |
69 | { |
70 | CACHE_FIELD *copy= *field; |
71 | copy->str= str; |
72 | copy->length= length; |
73 | copy->type= 0; |
74 | copy->field= 0; |
75 | copy->referenced_field_no= 0; |
76 | (*field)++; |
77 | return length; |
78 | } |
79 | |
80 | |
81 | /* |
82 | Fill in the descriptors of table data fields associated with a join cache |
83 | |
84 | SYNOPSIS |
85 | add_table_data_fields_to_join_cache() |
86 | tab descriptors of fields from this table are to be filled |
87 | field_set descriptors for only these fields are to be created |
88 | field_cnt IN/OUT counter of data fields |
89 | descr IN/OUT pointer to the first descriptor to be filled |
90 | field_ptr_cnt IN/OUT counter of pointers to the data fields |
91 | descr_ptr IN/OUT pointer to the first pointer to blob descriptors |
92 | |
93 | DESCRIPTION |
94 | The function fills in the descriptors of cache data fields from the table |
95 | 'tab'. The descriptors are filled only for the fields marked in the |
96 | bitmap 'field_set'. |
97 | The function fills the descriptors starting from the position pointed |
98 | by 'descr'. If an added field is of a BLOB type then a pointer to the |
99 | its descriptor is added to the array descr_ptr. |
100 | At the return 'descr' points to the position after the last added |
101 | descriptor while 'descr_ptr' points to the position right after the |
102 | last added pointer. |
103 | |
104 | RETURN VALUE |
105 | the total length of the added fields |
106 | */ |
107 | |
108 | static |
109 | uint add_table_data_fields_to_join_cache(JOIN_TAB *tab, |
110 | MY_BITMAP *field_set, |
111 | uint *field_cnt, |
112 | CACHE_FIELD **descr, |
113 | uint *field_ptr_cnt, |
114 | CACHE_FIELD ***descr_ptr) |
115 | { |
116 | Field **fld_ptr; |
117 | uint len= 0; |
118 | CACHE_FIELD *copy= *descr; |
119 | CACHE_FIELD **copy_ptr= *descr_ptr; |
120 | uint used_fields= bitmap_bits_set(field_set); |
121 | for (fld_ptr= tab->table->field; used_fields; fld_ptr++) |
122 | { |
123 | if (bitmap_is_set(field_set, (*fld_ptr)->field_index)) |
124 | { |
125 | len+= (*fld_ptr)->fill_cache_field(copy); |
126 | if (copy->type == CACHE_BLOB) |
127 | { |
128 | *copy_ptr= copy; |
129 | copy_ptr++; |
130 | (*field_ptr_cnt)++; |
131 | } |
132 | copy->field= *fld_ptr; |
133 | copy->referenced_field_no= 0; |
134 | copy++; |
135 | (*field_cnt)++; |
136 | used_fields--; |
137 | } |
138 | } |
139 | *descr= copy; |
140 | *descr_ptr= copy_ptr; |
141 | return len; |
142 | } |
143 | |
144 | /* |
145 | Determine different counters of fields associated with a record in the cache |
146 | |
147 | SYNOPSIS |
148 | calc_record_fields() |
149 | |
150 | DESCRIPTION |
151 | The function counts the number of total fields stored in a record |
152 | of the cache and saves this number in the 'fields' member. It also |
153 | determines the number of flag fields and the number of blobs. |
154 | The function sets 'with_match_flag' on if 'join_tab' needs a match flag |
155 | i.e. if it is the first inner table of an outer join or a semi-join. |
156 | |
157 | RETURN VALUE |
158 | none |
159 | */ |
160 | |
161 | void JOIN_CACHE::calc_record_fields() |
162 | { |
163 | JOIN_TAB *tab; |
164 | |
165 | if (prev_cache) |
166 | tab= prev_cache->join_tab; |
167 | else |
168 | { |
169 | if (join_tab->bush_root_tab) |
170 | { |
171 | /* |
172 | --ot1--SJM1--------------ot2--... |
173 | | |
174 | | |
175 | +-it1--...--itN |
176 | ^____________ this->join_tab is somewhere here, |
177 | inside an sjm nest. |
178 | |
179 | The join buffer should store the values of it1.*, it2.*, .. |
180 | It should not store values of ot1.*. |
181 | */ |
182 | tab= join_tab->bush_root_tab->bush_children->start; |
183 | } |
184 | else |
185 | { |
186 | /* |
187 | -ot1--ot2--SJM1--SJM2--------------ot3--...--otN |
188 | | | ^ |
189 | | +-it21--...--it2N | |
190 | | \-- we're somewhere here, |
191 | +-it11--...--it1N at the top level |
192 | |
193 | The join buffer should store the values of |
194 | |
195 | ot1.*, ot2.*, it1{i}, it2{j}.*, ot3.*, ... |
196 | |
197 | that is, we should start from the first non-const top-level table. |
198 | |
199 | We will need to store columns of SJ-inner tables (it_X_Y.*), but we're |
200 | not interested in storing the columns of materialization tables |
201 | themselves. Beause of that, if the first non-const top-level table is a |
202 | materialized table, we move to its bush_children: |
203 | */ |
204 | tab= join->join_tab + join->const_tables; |
205 | if (tab->bush_children) |
206 | tab= tab->bush_children->start; |
207 | } |
208 | } |
209 | DBUG_ASSERT(!tab->bush_children); |
210 | |
211 | start_tab= tab; |
212 | fields= 0; |
213 | blobs= 0; |
214 | flag_fields= 0; |
215 | data_field_count= 0; |
216 | data_field_ptr_count= 0; |
217 | referenced_fields= 0; |
218 | |
219 | /* |
220 | The following loop will get inside SJM nests, because data may be unpacked |
221 | to sjm-inner tables. |
222 | */ |
223 | for (; tab != join_tab ; tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
224 | { |
225 | tab->calc_used_field_length(FALSE); |
226 | flag_fields+= MY_TEST(tab->used_null_fields || tab->used_uneven_bit_fields); |
227 | flag_fields+= MY_TEST(tab->table->maybe_null); |
228 | fields+= tab->used_fields; |
229 | blobs+= tab->used_blobs; |
230 | } |
231 | if ((with_match_flag= join_tab->use_match_flag())) |
232 | flag_fields++; |
233 | fields+= flag_fields; |
234 | } |
235 | |
236 | |
237 | /* |
238 | Collect information on join key arguments |
239 | |
240 | SYNOPSIS |
241 | collect_info_on_key_args() |
242 | |
243 | DESCRIPTION |
244 | The function traverses the ref expressions that are used to access the |
245 | joined table join_tab. For each table 'tab' whose fields are to be stored |
246 | in the join buffer of the cache the function finds the fields from 'tab' |
247 | that occur in the ref expressions and marks these fields in the bitmap |
248 | tab->table->tmp_set. The function counts the number of them stored |
249 | in this cache and the total number of them stored in the previous caches |
250 | and saves the results of the counting in 'local_key_arg_fields' and |
251 | 'external_key_arg_fields' respectively. |
252 | |
253 | NOTES |
254 | The function does not do anything if no key is used to join the records |
255 | from join_tab. |
256 | |
257 | RETURN VALUE |
258 | none |
259 | */ |
260 | |
261 | void JOIN_CACHE::collect_info_on_key_args() |
262 | { |
263 | JOIN_TAB *tab; |
264 | JOIN_CACHE *cache; |
265 | local_key_arg_fields= 0; |
266 | external_key_arg_fields= 0; |
267 | |
268 | if (!is_key_access()) |
269 | return; |
270 | |
271 | TABLE_REF *ref= &join_tab->ref; |
272 | cache= this; |
273 | do |
274 | { |
275 | for (tab= cache->start_tab; tab != cache->join_tab; |
276 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
277 | { |
278 | uint key_args; |
279 | bitmap_clear_all(&tab->table->tmp_set); |
280 | for (uint i= 0; i < ref->key_parts; i++) |
281 | { |
282 | Item *ref_item= ref->items[i]; |
283 | if (!(tab->table->map & ref_item->used_tables())) |
284 | continue; |
285 | ref_item->walk(&Item::add_field_to_set_processor, 1, tab->table); |
286 | } |
287 | if ((key_args= bitmap_bits_set(&tab->table->tmp_set))) |
288 | { |
289 | if (cache == this) |
290 | local_key_arg_fields+= key_args; |
291 | else |
292 | external_key_arg_fields+= key_args; |
293 | } |
294 | } |
295 | cache= cache->prev_cache; |
296 | } |
297 | while (cache); |
298 | |
299 | return; |
300 | } |
301 | |
302 | |
303 | /* |
304 | Allocate memory for descriptors and pointers to them associated with the cache |
305 | |
306 | SYNOPSIS |
307 | alloc_fields() |
308 | |
309 | DESCRIPTION |
310 | The function allocates memory for the array of fields descriptors |
311 | and the array of pointers to the field descriptors used to copy |
312 | join record data from record buffers into the join buffer and |
313 | backward. Some pointers refer to the field descriptor associated |
314 | with previous caches. They are placed at the beginning of the array |
315 | of pointers and its total number is stored in external_key_arg_fields. |
316 | The pointer of the first array is assigned to field_descr and the number |
317 | of the elements in it is precalculated by the function calc_record_fields. |
318 | The allocated arrays are adjacent. |
319 | |
320 | NOTES |
321 | The memory is allocated in join->thd->mem_root |
322 | |
323 | RETURN VALUE |
324 | pointer to the first array |
325 | */ |
326 | |
327 | int JOIN_CACHE::alloc_fields() |
328 | { |
329 | uint ptr_cnt= external_key_arg_fields+blobs+1; |
330 | uint fields_size= sizeof(CACHE_FIELD)*fields; |
331 | field_descr= (CACHE_FIELD*) join->thd->alloc(fields_size + |
332 | sizeof(CACHE_FIELD*)*ptr_cnt); |
333 | blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size); |
334 | return (field_descr == NULL); |
335 | } |
336 | |
337 | |
338 | /* |
339 | Create descriptors of the record flag fields stored in the join buffer |
340 | |
341 | SYNOPSIS |
342 | create_flag_fields() |
343 | |
344 | DESCRIPTION |
345 | The function creates descriptors of the record flag fields stored |
346 | in the join buffer. These are descriptors for: |
347 | - an optional match flag field, |
348 | - table null bitmap fields, |
349 | - table null row fields. |
350 | The match flag field is created when 'join_tab' is the first inner |
351 | table of an outer join our a semi-join. A null bitmap field is |
352 | created for any table whose fields are to be stored in the join |
353 | buffer if at least one of these fields is nullable or is a BIT field |
354 | whose bits are partially stored with null bits. A null row flag |
355 | is created for any table assigned to the cache if it is an inner |
356 | table of an outer join. |
357 | The descriptor for flag fields are placed one after another at the |
358 | beginning of the array of field descriptors 'field_descr' that |
359 | contains 'fields' elements. If there is a match flag field the |
360 | descriptor for it is always first in the sequence of flag fields. |
361 | The descriptors for other flag fields can follow in an arbitrary |
362 | order. |
363 | The flag field values follow in a record stored in the join buffer |
364 | in the same order as field descriptors, with the match flag always |
365 | following first. |
366 | The function sets the value of 'flag_fields' to the total number |
367 | of the descriptors created for the flag fields. |
368 | The function sets the value of 'length' to the total length of the |
369 | flag fields. |
370 | |
371 | RETURN VALUE |
372 | none |
373 | */ |
374 | |
375 | void JOIN_CACHE::create_flag_fields() |
376 | { |
377 | CACHE_FIELD *copy; |
378 | JOIN_TAB *tab; |
379 | |
380 | copy= field_descr; |
381 | |
382 | length=0; |
383 | |
384 | /* If there is a match flag the first field is always used for this flag */ |
385 | if (with_match_flag) |
386 | length+= add_flag_field_to_join_cache((uchar*) &join_tab->found, |
387 | sizeof(join_tab->found), |
388 | ©); |
389 | |
390 | /* Create fields for all null bitmaps and null row flags that are needed */ |
391 | for (tab= start_tab; tab != join_tab; |
392 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
393 | { |
394 | TABLE *table= tab->table; |
395 | |
396 | /* Create a field for the null bitmap from table if needed */ |
397 | if (tab->used_null_fields || tab->used_uneven_bit_fields) |
398 | length+= add_flag_field_to_join_cache(table->null_flags, |
399 | table->s->null_bytes, |
400 | ©); |
401 | |
402 | /* Create table for the null row flag if needed */ |
403 | if (table->maybe_null) |
404 | length+= add_flag_field_to_join_cache((uchar*) &table->null_row, |
405 | sizeof(table->null_row), |
406 | ©); |
407 | } |
408 | |
409 | /* Theoretically the new value of flag_fields can be less than the old one */ |
410 | flag_fields= (uint)(copy-field_descr); |
411 | } |
412 | |
413 | |
414 | /* |
415 | Create descriptors of the fields used to build access keys to the joined table |
416 | |
417 | SYNOPSIS |
418 | create_key_arg_fields() |
419 | |
420 | DESCRIPTION |
421 | The function creates descriptors of the record fields stored in the join |
422 | buffer that are used to build access keys to the joined table. These |
423 | fields are put into the buffer ahead of other records fields stored in |
424 | the buffer. Such placement helps to optimize construction of access keys. |
425 | For each field that is used to build access keys to the joined table but |
426 | is stored in some other join cache buffer the function saves a pointer |
427 | to the the field descriptor. The array of such pointers are placed in the |
428 | the join cache structure just before the array of pointers to the |
429 | blob fields blob_ptr. |
430 | Any field stored in a join cache buffer that is used to construct keys |
431 | to access tables associated with other join caches is called a referenced |
432 | field. It receives a unique number that is saved by the function in the |
433 | member 'referenced_field_no' of the CACHE_FIELD descriptor for the field. |
434 | This number is used as index to the array of offsets to the referenced |
435 | fields that are saved and put in the join cache buffer after all record |
436 | fields. |
437 | The function also finds out whether that the keys to access join_tab |
438 | can be considered as embedded and, if so, sets the flag 'use_emb_key' in |
439 | this join cache appropriately. |
440 | |
441 | NOTES. |
442 | When a key to access the joined table 'join_tab' is constructed the array |
443 | of pointers to the field descriptors for the external fields is looked |
444 | through. For each of this pointers we find out in what previous key cache |
445 | the referenced field is stored. The value of 'referenced_field_no' |
446 | provides us with the index into the array of offsets for referenced |
447 | fields stored in the join cache. The offset read by the the index allows |
448 | us to read the field without reading all other fields of the record |
449 | stored the join cache buffer. This optimizes the construction of keys |
450 | to access 'join_tab' when some key arguments are stored in the previous |
451 | join caches. |
452 | |
453 | NOTES |
454 | The function does not do anything if no key is used to join the records |
455 | from join_tab. |
456 | |
457 | RETURN VALUE |
458 | none |
459 | */ |
460 | void JOIN_CACHE::create_key_arg_fields() |
461 | { |
462 | JOIN_TAB *tab; |
463 | JOIN_CACHE *cache; |
464 | |
465 | if (!is_key_access()) |
466 | return; |
467 | |
468 | /* |
469 | Save pointers to the cache fields in previous caches |
470 | that are used to build keys for this key access. |
471 | */ |
472 | cache= this; |
473 | uint ext_key_arg_cnt= external_key_arg_fields; |
474 | CACHE_FIELD *copy; |
475 | CACHE_FIELD **copy_ptr= blob_ptr; |
476 | while (ext_key_arg_cnt) |
477 | { |
478 | cache= cache->prev_cache; |
479 | for (tab= cache->start_tab; tab != cache->join_tab; |
480 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
481 | { |
482 | CACHE_FIELD *copy_end; |
483 | MY_BITMAP *key_read_set= &tab->table->tmp_set; |
484 | /* key_read_set contains the bitmap of tab's fields referenced by ref */ |
485 | if (bitmap_is_clear_all(key_read_set)) |
486 | continue; |
487 | copy_end= cache->field_descr+cache->fields; |
488 | for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++) |
489 | { |
490 | /* |
491 | (1) - when we store rowids for DuplicateWeedout, they have |
492 | copy->field==NULL |
493 | */ |
494 | if (copy->field && // (1) |
495 | copy->field->table == tab->table && |
496 | bitmap_is_set(key_read_set, copy->field->field_index)) |
497 | { |
498 | *copy_ptr++= copy; |
499 | ext_key_arg_cnt--; |
500 | if (!copy->referenced_field_no) |
501 | { |
502 | /* |
503 | Register the referenced field 'copy': |
504 | - set the offset number in copy->referenced_field_no, |
505 | - adjust the value of the flag 'with_length', |
506 | - adjust the values of 'pack_length' and |
507 | of 'pack_length_with_blob_ptrs'. |
508 | */ |
509 | copy->referenced_field_no= ++cache->referenced_fields; |
510 | if (!cache->with_length) |
511 | { |
512 | cache->with_length= TRUE; |
513 | uint sz= cache->get_size_of_rec_length(); |
514 | cache->base_prefix_length+= sz; |
515 | cache->pack_length+= sz; |
516 | cache->pack_length_with_blob_ptrs+= sz; |
517 | } |
518 | cache->pack_length+= cache->get_size_of_fld_offset(); |
519 | cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset(); |
520 | } |
521 | } |
522 | } |
523 | } |
524 | } |
525 | /* After this 'blob_ptr' shall not be be changed */ |
526 | blob_ptr= copy_ptr; |
527 | |
528 | /* Now create local fields that are used to build ref for this key access */ |
529 | copy= field_descr+flag_fields; |
530 | for (tab= start_tab; tab != join_tab; |
531 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
532 | { |
533 | length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set, |
534 | &data_field_count, ©, |
535 | &data_field_ptr_count, |
536 | ©_ptr); |
537 | } |
538 | |
539 | use_emb_key= check_emb_key_usage(); |
540 | |
541 | return; |
542 | } |
543 | |
544 | |
545 | /* |
546 | Create descriptors of all remaining data fields stored in the join buffer |
547 | |
548 | SYNOPSIS |
549 | create_remaining_fields() |
550 | |
551 | DESCRIPTION |
552 | The function creates descriptors for all remaining data fields of a |
553 | record from the join buffer. If the value returned by is_key_access() is |
554 | false the function creates fields for all read record fields that |
555 | comprise the partial join record joined with join_tab. Otherwise, |
556 | for each table tab, the set of the read fields for which the descriptors |
557 | have to be added is determined as the difference between all read fields |
558 | and and those for which the descriptors have been already created. |
559 | The latter are supposed to be marked in the bitmap tab->table->tmp_set. |
560 | The function increases the value of 'length' to the the total length of |
561 | the added fields. |
562 | |
563 | NOTES |
564 | If is_key_access() returns true the function modifies the value of |
565 | tab->table->tmp_set for a each table whose fields are stored in the cache. |
566 | The function calls the method Field::fill_cache_field to figure out |
567 | the type of the cache field and the maximal length of its representation |
568 | in the join buffer. If this is a blob field then additionally a pointer |
569 | to this field is added as an element of the array blob_ptr. For a blob |
570 | field only the size of the length of the blob data is taken into account. |
571 | It is assumed that 'data_field_count' contains the number of descriptors |
572 | for data fields that have been already created and 'data_field_ptr_count' |
573 | contains the number of the pointers to such descriptors having been |
574 | stored up to the moment. |
575 | |
576 | RETURN VALUE |
577 | none |
578 | */ |
579 | |
580 | void JOIN_CACHE::create_remaining_fields() |
581 | { |
582 | JOIN_TAB *tab; |
583 | bool all_read_fields= !is_key_access(); |
584 | CACHE_FIELD *copy= field_descr+flag_fields+data_field_count; |
585 | CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count; |
586 | |
587 | for (tab= start_tab; tab != join_tab; |
588 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
589 | { |
590 | MY_BITMAP *rem_field_set; |
591 | TABLE *table= tab->table; |
592 | |
593 | if (all_read_fields) |
594 | rem_field_set= table->read_set; |
595 | else |
596 | { |
597 | bitmap_invert(&table->tmp_set); |
598 | bitmap_intersect(&table->tmp_set, table->read_set); |
599 | rem_field_set= &table->tmp_set; |
600 | } |
601 | |
602 | length+= add_table_data_fields_to_join_cache(tab, rem_field_set, |
603 | &data_field_count, ©, |
604 | &data_field_ptr_count, |
605 | ©_ptr); |
606 | |
607 | /* SemiJoinDuplicateElimination: allocate space for rowid if needed */ |
608 | if (tab->keep_current_rowid) |
609 | { |
610 | copy->str= table->file->ref; |
611 | if (copy->str) |
612 | copy->length= table->file->ref_length; |
613 | else |
614 | { |
615 | /* This may happen only for materialized derived tables and views */ |
616 | copy->length= 0; |
617 | copy->str= (uchar *) table; |
618 | } |
619 | copy->type= CACHE_ROWID; |
620 | copy->field= 0; |
621 | copy->referenced_field_no= 0; |
622 | /* |
623 | Note: this may seem odd, but at this point we have |
624 | table->file->ref==NULL while table->file->ref_length is already set |
625 | to correct value. |
626 | */ |
627 | length += table->file->ref_length; |
628 | data_field_count++; |
629 | copy++; |
630 | } |
631 | } |
632 | } |
633 | |
634 | |
635 | |
636 | /* |
637 | Calculate and set all cache constants |
638 | |
639 | SYNOPSIS |
640 | set_constants() |
641 | |
642 | DESCRIPTION |
643 | The function calculates and set all precomputed constants that are used |
644 | when writing records into the join buffer and reading them from it. |
645 | It calculates the size of offsets of a record within the join buffer |
646 | and of a field within a record. It also calculates the number of bytes |
647 | used to store record lengths. |
648 | The function also calculates the maximal length of the representation |
649 | of record in the cache excluding blob_data. This value is used when |
650 | making a dicision whether more records should be added into the join |
651 | buffer or not. |
652 | |
653 | RETURN VALUE |
654 | none |
655 | */ |
656 | |
657 | void JOIN_CACHE::set_constants() |
658 | { |
659 | /* |
660 | Any record from a BKA cache is prepended with the record length. |
661 | We use the record length when reading the buffer and building key values |
662 | for each record. The length allows us not to read the fields that are |
663 | not needed for keys. |
664 | If a record has match flag it also may be skipped when the match flag |
665 | is on. It happens if the cache is used for a semi-join operation or |
666 | for outer join when the 'not exist' optimization can be applied. |
667 | If some of the fields are referenced from other caches then |
668 | the record length allows us to easily reach the saved offsets for |
669 | these fields since the offsets are stored at the very end of the record. |
670 | However at this moment we don't know whether we have referenced fields for |
671 | the cache or not. Later when a referenced field is registered for the cache |
672 | we adjust the value of the flag 'with_length'. |
673 | */ |
674 | with_length= is_key_access() || |
675 | join_tab->is_inner_table_of_semi_join_with_first_match() || |
676 | join_tab->is_inner_table_of_outer_join(); |
677 | /* |
678 | At this moment we don't know yet the value of 'referenced_fields', |
679 | but in any case it can't be greater than the value of 'fields'. |
680 | */ |
681 | uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) + |
682 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + |
683 | sizeof(ulong); |
684 | /* |
685 | The values of size_of_rec_ofs, size_of_rec_len, size_of_fld_ofs, |
686 | base_prefix_length, pack_length, pack_length_with_blob_ptrs |
687 | will be recalculated later in this function when we get the estimate |
688 | for the actual value of the join buffer size. |
689 | */ |
690 | size_of_rec_ofs= size_of_rec_len= size_of_fld_ofs= 4; |
691 | base_prefix_length= (with_length ? size_of_rec_len : 0) + |
692 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); |
693 | pack_length= (with_length ? size_of_rec_len : 0) + |
694 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + |
695 | length + fields*sizeof(uint); |
696 | pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *); |
697 | min_buff_size= 0; |
698 | min_records= 1; |
699 | buff_size= (size_t)MY_MAX(join->thd->variables.join_buff_size, |
700 | get_min_join_buffer_size()); |
701 | size_of_rec_ofs= offset_size(buff_size); |
702 | size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len); |
703 | size_of_fld_ofs= size_of_rec_len; |
704 | base_prefix_length= (with_length ? size_of_rec_len : 0) + |
705 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); |
706 | /* |
707 | The size of the offsets for referenced fields will be added later. |
708 | The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted |
709 | every time when the first reference to the referenced field is registered. |
710 | */ |
711 | pack_length= (with_length ? size_of_rec_len : 0) + |
712 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + |
713 | length; |
714 | pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *); |
715 | } |
716 | |
717 | |
718 | /* |
719 | Get maximum total length of all affixes of a record in the join cache buffer |
720 | |
721 | SYNOPSIS |
722 | get_record_max_affix_length() |
723 | |
724 | DESCRIPTION |
725 | The function calculates the maximum possible total length of all affixes |
726 | of a record in the join cache buffer, that is made of: |
727 | - the length of all prefixes used in this cache, |
728 | - the length of the match flag if it's needed |
729 | - the total length of the maximum possible offsets to the fields of |
730 | a record in the buffer. |
731 | |
732 | RETURN VALUE |
733 | The maximum total length of all affixes of a record in the join buffer |
734 | */ |
735 | |
736 | uint JOIN_CACHE::get_record_max_affix_length() |
737 | { |
738 | uint len= get_prefix_length() + |
739 | MY_TEST(with_match_flag) + |
740 | size_of_fld_ofs * data_field_count; |
741 | return len; |
742 | } |
743 | |
744 | |
745 | /* |
746 | Get the minimum possible size of the cache join buffer |
747 | |
748 | SYNOPSIS |
749 | get_min_join_buffer_size() |
750 | |
751 | DESCRIPTION |
752 | At the first its invocation for the cache the function calculates the |
753 | minimum possible size of the join buffer of the cache. This value depends |
754 | on the minimal number of records 'min_records' to be stored in the join |
755 | buffer. The number is supposed to be determined by the procedure that |
756 | chooses the best access path to the joined table join_tab in the execution |
757 | plan. After the calculation of the interesting size the function saves it |
758 | in the field 'min_buff_size' in order to use it directly at the next |
759 | invocations of the function. |
760 | |
761 | NOTES |
762 | Currently the number of minimal records is just set to 1. |
763 | |
764 | RETURN VALUE |
765 | The minimal possible size of the join buffer of this cache |
766 | */ |
767 | |
768 | size_t JOIN_CACHE::get_min_join_buffer_size() |
769 | { |
770 | if (!min_buff_size) |
771 | { |
772 | size_t len= 0; |
773 | size_t len_last= 0; |
774 | for (JOIN_TAB *tab= start_tab; tab != join_tab; |
775 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
776 | { |
777 | len+= tab->get_max_used_fieldlength(); |
778 | len_last+= tab->get_used_fieldlength(); |
779 | } |
780 | size_t len_addon= get_record_max_affix_length() + |
781 | get_max_key_addon_space_per_record(); |
782 | len+= len_addon; |
783 | len_last+= len_addon; |
784 | size_t min_sz= len*(min_records-1) + len_last; |
785 | min_sz+= pack_length_with_blob_ptrs; |
786 | size_t add_sz= 0; |
787 | for (uint i=0; i < min_records; i++) |
788 | add_sz+= join_tab_scan->aux_buffer_incr(i+1); |
789 | avg_aux_buffer_incr= add_sz/min_records; |
790 | min_sz+= add_sz; |
791 | set_if_bigger(min_sz, 1); |
792 | min_buff_size= min_sz; |
793 | } |
794 | return min_buff_size; |
795 | } |
796 | |
797 | |
798 | /* |
799 | Get the maximum possible size of the cache join buffer |
800 | |
801 | SYNOPSIS |
802 | get_max_join_buffer_size() |
803 | |
804 | optimize_buff_size FALSE <-> do not take more memory than needed for |
805 | the estimated number of records in the partial join |
806 | |
807 | DESCRIPTION |
808 | At the first its invocation for the cache the function calculates the |
809 | maximum possible size of join buffer for the cache. If the parameter |
810 | optimize_buff_size true then this value does not exceed the size of the |
811 | space needed for the estimated number of records 'max_records' in the |
812 | partial join that joins tables from the first one through join_tab. This |
813 | value is also capped off by the value of join_tab->join_buffer_size_limit, |
814 | if it has been set a to non-zero value, and by the value of the system |
815 | parameter join_buffer_size - otherwise. After the calculation of the |
816 | interesting size the function saves the value in the field 'max_buff_size' |
817 | in order to use it directly at the next invocations of the function. |
818 | |
819 | NOTES |
820 | Currently the value of join_tab->join_buffer_size_limit is initialized |
821 | to 0 and is never reset. |
822 | |
823 | RETURN VALUE |
824 | The maximum possible size of the join buffer of this cache |
825 | */ |
826 | |
827 | size_t JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size) |
828 | { |
829 | if (!max_buff_size) |
830 | { |
831 | size_t max_sz; |
832 | size_t min_sz= get_min_join_buffer_size(); |
833 | size_t len= 0; |
834 | for (JOIN_TAB *tab= start_tab; tab != join_tab; |
835 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
836 | { |
837 | len+= tab->get_used_fieldlength(); |
838 | } |
839 | len+= get_record_max_affix_length(); |
840 | avg_record_length= len; |
841 | len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr; |
842 | space_per_record= len; |
843 | |
844 | size_t limit_sz= (size_t)join->thd->variables.join_buff_size; |
845 | if (join_tab->join_buffer_size_limit) |
846 | set_if_smaller(limit_sz, join_tab->join_buffer_size_limit); |
847 | if (!optimize_buff_size) |
848 | max_sz= limit_sz; |
849 | else |
850 | { |
851 | if (limit_sz / max_records > space_per_record) |
852 | max_sz= space_per_record * max_records; |
853 | else |
854 | max_sz= limit_sz; |
855 | max_sz+= pack_length_with_blob_ptrs; |
856 | set_if_smaller(max_sz, limit_sz); |
857 | } |
858 | set_if_bigger(max_sz, min_sz); |
859 | max_buff_size= max_sz; |
860 | } |
861 | return max_buff_size; |
862 | } |
863 | |
864 | |
865 | /* |
866 | Allocate memory for a join buffer |
867 | |
868 | SYNOPSIS |
869 | alloc_buffer() |
870 | |
871 | DESCRIPTION |
872 | The function allocates a lump of memory for the cache join buffer. |
873 | Initially the function sets the size of the buffer buff_size equal to |
874 | the value returned by get_max_join_buffer_size(). If the total size of |
875 | the space intended to be used for the join buffers employed by the |
876 | tables from the first one through join_tab exceeds the value of the |
877 | system parameter join_buff_space_limit, then the function first tries |
878 | to shrink the used buffers to make the occupied space fit the maximum |
879 | memory allowed to be used for all join buffers in total. After |
880 | this the function tries to allocate a join buffer for join_tab. |
881 | If it fails to do so, it decrements the requested size of the join |
882 | buffer, shrinks proportionally the join buffers used for the previous |
883 | tables and tries to allocate a buffer for join_tab. In the case of a |
884 | failure the function repeats its attempts with smaller and smaller |
885 | requested sizes of the buffer, but not more than 4 times. |
886 | |
887 | RETURN VALUE |
888 | 0 if the memory has been successfully allocated |
889 | 1 otherwise |
890 | */ |
891 | |
892 | int JOIN_CACHE::alloc_buffer() |
893 | { |
894 | JOIN_TAB *tab; |
895 | JOIN_CACHE *cache; |
896 | ulonglong curr_buff_space_sz= 0; |
897 | ulonglong curr_min_buff_space_sz= 0; |
898 | ulonglong join_buff_space_limit= |
899 | join->thd->variables.join_buff_space_limit; |
900 | bool optimize_buff_size= |
901 | optimizer_flag(join->thd, OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE); |
902 | double partial_join_cardinality= (join_tab-1)->get_partial_join_cardinality(); |
903 | buff= NULL; |
904 | min_buff_size= 0; |
905 | max_buff_size= 0; |
906 | min_records= 1; |
907 | max_records= (size_t) (partial_join_cardinality <= join_buff_space_limit ? |
908 | (ulonglong) partial_join_cardinality : join_buff_space_limit); |
909 | set_if_bigger(max_records, 10); |
910 | min_buff_size= get_min_join_buffer_size(); |
911 | buff_size= get_max_join_buffer_size(optimize_buff_size); |
912 | |
913 | for (tab= start_tab; tab!= join_tab; |
914 | tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) |
915 | { |
916 | cache= tab->cache; |
917 | if (cache) |
918 | { |
919 | curr_min_buff_space_sz+= cache->get_min_join_buffer_size(); |
920 | curr_buff_space_sz+= cache->get_join_buffer_size(); |
921 | } |
922 | } |
923 | curr_min_buff_space_sz+= min_buff_size; |
924 | curr_buff_space_sz+= buff_size; |
925 | |
926 | if (curr_min_buff_space_sz > join_buff_space_limit || |
927 | (curr_buff_space_sz > join_buff_space_limit && |
928 | (!optimize_buff_size || |
929 | join->shrink_join_buffers(join_tab, curr_buff_space_sz, |
930 | join_buff_space_limit)))) |
931 | goto fail; |
932 | |
933 | if (for_explain_only) |
934 | return 0; |
935 | |
936 | for (size_t buff_size_decr= (buff_size-min_buff_size)/4 + 1; ; ) |
937 | { |
938 | size_t next_buff_size; |
939 | |
940 | if ((buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC)))) |
941 | break; |
942 | |
943 | next_buff_size= buff_size > buff_size_decr ? buff_size-buff_size_decr : 0; |
944 | if (next_buff_size < min_buff_size || |
945 | join->shrink_join_buffers(join_tab, curr_buff_space_sz, |
946 | curr_buff_space_sz-buff_size_decr)) |
947 | goto fail; |
948 | buff_size= next_buff_size; |
949 | |
950 | curr_buff_space_sz= 0; |
951 | for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++) |
952 | { |
953 | cache= tab->cache; |
954 | if (cache) |
955 | curr_buff_space_sz+= cache->get_join_buffer_size(); |
956 | } |
957 | } |
958 | return 0; |
959 | |
960 | fail: |
961 | buff_size= 0; |
962 | return 1; |
963 | } |
964 | |
965 | |
966 | /* |
967 | Shrink the size if the cache join buffer in a given ratio |
968 | |
969 | SYNOPSIS |
970 | shrink_join_buffer_in_ratio() |
971 | n nominator of the ratio to shrink the buffer in |
972 | d denominator if the ratio |
973 | |
974 | DESCRIPTION |
975 | The function first deallocates the join buffer of the cache. Then |
976 | it allocates a buffer that is (n/d) times smaller. |
977 | |
978 | RETURN VALUE |
979 | FALSE on success with allocation of the smaller join buffer |
980 | TRUE otherwise |
981 | */ |
982 | |
983 | bool JOIN_CACHE::shrink_join_buffer_in_ratio(ulonglong n, ulonglong d) |
984 | { |
985 | size_t next_buff_size; |
986 | if (n < d) |
987 | return FALSE; |
988 | next_buff_size= (size_t) ((double) buff_size / n * d); |
989 | set_if_bigger(next_buff_size, min_buff_size); |
990 | buff_size= next_buff_size; |
991 | return realloc_buffer(); |
992 | } |
993 | |
994 | |
995 | /* |
996 | Reallocate the join buffer of a join cache |
997 | |
998 | SYNOPSIS |
999 | realloc_buffer() |
1000 | |
1001 | DESCRITION |
1002 | The function reallocates the join buffer of the join cache. After this |
1003 | it resets the buffer for writing. |
1004 | |
1005 | NOTES |
1006 | The function assumes that buff_size contains the new value for the join |
1007 | buffer size. |
1008 | |
1009 | RETURN VALUE |
1010 | 0 if the buffer has been successfully reallocated |
1011 | 1 otherwise |
1012 | */ |
1013 | |
1014 | int JOIN_CACHE::realloc_buffer() |
1015 | { |
1016 | int rc; |
1017 | free(); |
1018 | rc= MY_TEST(!(buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC)))); |
1019 | reset(TRUE); |
1020 | return rc; |
1021 | } |
1022 | |
1023 | |
1024 | /* |
1025 | Initialize a join cache |
1026 | |
1027 | SYNOPSIS |
1028 | init() |
1029 | for_explain join buffer is initialized for explain only |
1030 | |
1031 | DESCRIPTION |
1032 | The function initializes the join cache structure. It supposed to be called |
1033 | by init methods for classes derived from the JOIN_CACHE. |
1034 | The function allocates memory for the join buffer and for descriptors of |
1035 | the record fields stored in the buffer. |
1036 | |
1037 | NOTES |
1038 | The code of this function should have been included into the constructor |
1039 | code itself. However the new operator for the class JOIN_CACHE would |
1040 | never fail while memory allocation for the join buffer is not absolutely |
1041 | unlikely to fail. That's why this memory allocation has to be placed in a |
1042 | separate function that is called in a couple with a cache constructor. |
1043 | It is quite natural to put almost all other constructor actions into |
1044 | this function. |
1045 | |
1046 | RETURN VALUE |
1047 | 0 initialization with buffer allocations has been succeeded |
1048 | 1 otherwise |
1049 | */ |
1050 | |
1051 | int JOIN_CACHE::init(bool for_explain) |
1052 | { |
1053 | DBUG_ENTER("JOIN_CACHE::init" ); |
1054 | |
1055 | for_explain_only= for_explain; |
1056 | |
1057 | calc_record_fields(); |
1058 | |
1059 | collect_info_on_key_args(); |
1060 | |
1061 | if (alloc_fields()) |
1062 | DBUG_RETURN(1); |
1063 | |
1064 | create_flag_fields(); |
1065 | |
1066 | create_key_arg_fields(); |
1067 | |
1068 | create_remaining_fields(); |
1069 | |
1070 | set_constants(); |
1071 | |
1072 | if (alloc_buffer()) |
1073 | DBUG_RETURN(1); |
1074 | |
1075 | reset(TRUE); |
1076 | |
1077 | DBUG_RETURN(0); |
1078 | } |
1079 | |
1080 | |
1081 | /* |
1082 | Check the possibility to read the access keys directly from the join buffer |
1083 | SYNOPSIS |
1084 | check_emb_key_usage() |
1085 | |
1086 | DESCRIPTION |
1087 | The function checks some conditions at which the key values can be read |
1088 | directly from the join buffer. This is possible when the key values can be |
1089 | composed by concatenation of the record fields stored in the join buffer. |
1090 | Sometimes when the access key is multi-component the function has to re-order |
1091 | the fields written into the join buffer to make keys embedded. If key |
1092 | values for the key access are detected as embedded then 'use_emb_key' |
1093 | is set to TRUE. |
1094 | |
1095 | EXAMPLE |
1096 | Let table t2 has an index defined on the columns a,b . Let's assume also |
1097 | that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all |
1098 | of the integer type. Then if the query |
1099 | SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b |
1100 | is executed with a join cache in such a way that t1 is the driving |
1101 | table then the key values to access table t2 can be read directly |
1102 | from the join buffer. |
1103 | |
1104 | NOTES |
1105 | In some cases key values could be read directly from the join buffer but |
1106 | we still do not consider them embedded. In the future we'll expand the |
1107 | the class of keys which we identify as embedded. |
1108 | |
1109 | NOTES |
1110 | The function returns FALSE if no key is used to join the records |
1111 | from join_tab. |
1112 | |
1113 | RETURN VALUE |
1114 | TRUE key values will be considered as embedded, |
1115 | FALSE otherwise. |
1116 | */ |
1117 | |
1118 | bool JOIN_CACHE::check_emb_key_usage() |
1119 | { |
1120 | |
1121 | if (!is_key_access()) |
1122 | return FALSE; |
1123 | |
1124 | uint i; |
1125 | Item *item; |
1126 | KEY_PART_INFO *key_part; |
1127 | CACHE_FIELD *copy; |
1128 | CACHE_FIELD *copy_end; |
1129 | uint len= 0; |
1130 | TABLE_REF *ref= &join_tab->ref; |
1131 | KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key); |
1132 | |
1133 | /* |
1134 | If some of the key arguments are not from the local cache the key |
1135 | is not considered as embedded. |
1136 | TODO: |
1137 | Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0. |
1138 | */ |
1139 | if (external_key_arg_fields != 0) |
1140 | return FALSE; |
1141 | /* |
1142 | If the number of the local key arguments is not equal to the number |
1143 | of key parts the key value cannot be read directly from the join buffer. |
1144 | */ |
1145 | if (local_key_arg_fields != ref->key_parts) |
1146 | return FALSE; |
1147 | |
1148 | /* |
1149 | A key is not considered embedded if one of the following is true: |
1150 | - one of its key parts is not equal to a field |
1151 | - it is a partial key |
1152 | - definition of the argument field does not coincide with the |
1153 | definition of the corresponding key component |
1154 | - some of the key components are nullable |
1155 | */ |
1156 | for (i=0; i < ref->key_parts; i++) |
1157 | { |
1158 | item= ref->items[i]->real_item(); |
1159 | if (item->type() != Item::FIELD_ITEM) |
1160 | return FALSE; |
1161 | key_part= keyinfo->key_part+i; |
1162 | if (key_part->key_part_flag & HA_PART_KEY_SEG) |
1163 | return FALSE; |
1164 | if (!key_part->field->eq_def(((Item_field *) item)->field)) |
1165 | return FALSE; |
1166 | if (key_part->field->maybe_null()) |
1167 | return FALSE; |
1168 | } |
1169 | |
1170 | copy= field_descr+flag_fields; |
1171 | copy_end= copy+local_key_arg_fields; |
1172 | for ( ; copy < copy_end; copy++) |
1173 | { |
1174 | /* |
1175 | If some of the key arguments are of variable length the key |
1176 | is not considered as embedded. |
1177 | */ |
1178 | if (copy->type != 0) |
1179 | return FALSE; |
1180 | /* |
1181 | If some of the key arguments are bit fields whose bits are partially |
1182 | stored with null bits the key is not considered as embedded. |
1183 | */ |
1184 | if (copy->field->type() == MYSQL_TYPE_BIT && |
1185 | ((Field_bit*) (copy->field))->bit_len) |
1186 | return FALSE; |
1187 | len+= copy->length; |
1188 | } |
1189 | |
1190 | emb_key_length= len; |
1191 | |
1192 | /* |
1193 | Make sure that key fields follow the order of the corresponding |
1194 | key components these fields are equal to. For this the descriptors |
1195 | of the fields that comprise the key might be re-ordered. |
1196 | */ |
1197 | for (i= 0; i < ref->key_parts; i++) |
1198 | { |
1199 | uint j; |
1200 | Item *item= ref->items[i]->real_item(); |
1201 | Field *fld= ((Item_field *) item)->field; |
1202 | CACHE_FIELD *init_copy= field_descr+flag_fields+i; |
1203 | for (j= i, copy= init_copy; i < local_key_arg_fields; i++, copy++) |
1204 | { |
1205 | if (fld->eq(copy->field)) |
1206 | { |
1207 | if (j != i) |
1208 | { |
1209 | CACHE_FIELD key_part_copy= *copy; |
1210 | *copy= *init_copy; |
1211 | *init_copy= key_part_copy; |
1212 | } |
1213 | break; |
1214 | } |
1215 | } |
1216 | } |
1217 | |
1218 | return TRUE; |
1219 | } |
1220 | |
1221 | |
1222 | /* |
1223 | Write record fields and their required offsets into the join cache buffer |
1224 | |
1225 | SYNOPSIS |
1226 | write_record_data() |
1227 | link a reference to the associated info in the previous cache |
1228 | is_full OUT true if it has been decided that no more records will be |
1229 | added to the join buffer |
1230 | |
1231 | DESCRIPTION |
1232 | This function put into the cache buffer the following info that it reads |
1233 | from the join record buffers or computes somehow: |
1234 | (1) the length of all fields written for the record (optional) |
1235 | (2) an offset to the associated info in the previous cache (if there is any) |
1236 | determined by the link parameter |
1237 | (3) all flag fields of the tables whose data field are put into the cache: |
1238 | - match flag (optional), |
1239 | - null bitmaps for all tables, |
1240 | - null row flags for all tables |
1241 | (4) values of all data fields including |
1242 | - full images of those fixed legth data fields that cannot have |
1243 | trailing spaces |
1244 | - significant part of fixed length fields that can have trailing spaces |
1245 | with the prepanded length |
1246 | - data of non-blob variable length fields with the prepanded data length |
1247 | - blob data from blob fields with the prepanded data length |
1248 | (5) record offset values for the data fields that are referred to from |
1249 | other caches |
1250 | |
1251 | The record is written at the current position stored in the field 'pos'. |
1252 | At the end of the function 'pos' points at the position right after the |
1253 | written record data. |
1254 | The function increments the number of records in the cache that is stored |
1255 | in the 'records' field by 1. The function also modifies the values of |
1256 | 'curr_rec_pos' and 'last_rec_pos' to point to the written record. |
1257 | The 'end_pos' cursor is modified accordingly. |
1258 | The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data |
1259 | remains in the record buffers and not copied to the join buffer. It may |
1260 | happen only to the blob data from the last record added into the cache. |
1261 | If on_precond is attached to join_tab and it is not evaluated to TRUE |
1262 | then MATCH_IMPOSSIBLE is placed in the match flag field of the record |
1263 | written into the join buffer. |
1264 | |
1265 | RETURN VALUE |
1266 | length of the written record data |
1267 | */ |
1268 | |
1269 | uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full) |
1270 | { |
1271 | uint len; |
1272 | bool last_record; |
1273 | CACHE_FIELD *copy; |
1274 | CACHE_FIELD *copy_end; |
1275 | uchar *flags_pos; |
1276 | uchar *cp= pos; |
1277 | uchar *init_pos= cp; |
1278 | uchar *rec_len_ptr= 0; |
1279 | uint = extra_key_length(); |
1280 | |
1281 | records++; /* Increment the counter of records in the cache */ |
1282 | |
1283 | len= pack_length + key_extra; |
1284 | |
1285 | /* Make an adjustment for the size of the auxiliary buffer if there is any */ |
1286 | uint incr= aux_buffer_incr(records); |
1287 | size_t rem= rem_space(); |
1288 | aux_buff_size+= len+incr < rem ? incr : rem; |
1289 | |
1290 | /* |
1291 | For each blob to be put into cache save its length and a pointer |
1292 | to the value in the corresponding element of the blob_ptr array. |
1293 | Blobs with null values are skipped. |
1294 | Increment 'len' by the total length of all these blobs. |
1295 | */ |
1296 | if (blobs) |
1297 | { |
1298 | CACHE_FIELD **copy_ptr= blob_ptr; |
1299 | CACHE_FIELD **copy_ptr_end= copy_ptr+blobs; |
1300 | for ( ; copy_ptr < copy_ptr_end; copy_ptr++) |
1301 | { |
1302 | Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field; |
1303 | if (!blob_field->is_null()) |
1304 | { |
1305 | uint blob_len= blob_field->get_length(); |
1306 | (*copy_ptr)->blob_length= blob_len; |
1307 | len+= blob_len; |
1308 | (*copy_ptr)->str= blob_field->get_ptr(); |
1309 | } |
1310 | } |
1311 | } |
1312 | |
1313 | /* |
1314 | Check whether we won't be able to add any new record into the cache after |
1315 | this one because the cache will be full. Set last_record to TRUE if it's so. |
1316 | The assume that the cache will be full after the record has been written |
1317 | into it if either the remaining space of the cache is not big enough for the |
1318 | record's blob values or if there is a chance that not all non-blob fields |
1319 | of the next record can be placed there. |
1320 | This function is called only in the case when there is enough space left in |
1321 | the cache to store at least non-blob parts of the current record. |
1322 | */ |
1323 | last_record= (len+pack_length_with_blob_ptrs+key_extra) > rem_space(); |
1324 | |
1325 | /* |
1326 | Save the position for the length of the record in the cache if it's needed. |
1327 | The length of the record will be inserted here when all fields of the record |
1328 | are put into the cache. |
1329 | */ |
1330 | if (with_length) |
1331 | { |
1332 | rec_len_ptr= cp; |
1333 | DBUG_ASSERT(cp + size_of_rec_len <= buff + buff_size); |
1334 | cp+= size_of_rec_len; |
1335 | } |
1336 | |
1337 | /* |
1338 | Put a reference to the fields of the record that are stored in the previous |
1339 | cache if there is any. This reference is passed by the 'link' parameter. |
1340 | */ |
1341 | if (prev_cache) |
1342 | { |
1343 | DBUG_ASSERT(cp + prev_cache->get_size_of_rec_offset() <= buff + buff_size); |
1344 | cp+= prev_cache->get_size_of_rec_offset(); |
1345 | prev_cache->store_rec_ref(cp, link); |
1346 | } |
1347 | |
1348 | curr_rec_pos= cp; |
1349 | |
1350 | /* If the there is a match flag set its value to 0 */ |
1351 | copy= field_descr; |
1352 | if (with_match_flag) |
1353 | *copy[0].str= 0; |
1354 | |
1355 | /* First put into the cache the values of all flag fields */ |
1356 | copy_end= field_descr+flag_fields; |
1357 | flags_pos= cp; |
1358 | for ( ; copy < copy_end; copy++) |
1359 | { |
1360 | DBUG_ASSERT(cp + copy->length <= buff + buff_size); |
1361 | memcpy(cp, copy->str, copy->length); |
1362 | cp+= copy->length; |
1363 | } |
1364 | |
1365 | /* Now put the values of the remaining fields as soon as they are not nulls */ |
1366 | copy_end= field_descr+fields; |
1367 | for ( ; copy < copy_end; copy++) |
1368 | { |
1369 | Field *field= copy->field; |
1370 | if (field && field->maybe_null() && field->is_null()) |
1371 | { |
1372 | if (copy->referenced_field_no) |
1373 | copy->offset= 0; |
1374 | continue; |
1375 | } |
1376 | /* Save the offset of the field to put it later at the end of the record */ |
1377 | if (copy->referenced_field_no) |
1378 | copy->offset= (uint)(cp-curr_rec_pos); |
1379 | |
1380 | switch (copy->type) { |
1381 | case CACHE_BLOB: |
1382 | { |
1383 | Field_blob *blob_field= (Field_blob *) copy->field; |
1384 | if (last_record) |
1385 | { |
1386 | last_rec_blob_data_is_in_rec_buff= 1; |
1387 | /* Put down the length of the blob and the pointer to the data */ |
1388 | DBUG_ASSERT(cp + copy->length + sizeof(char*) <= buff + buff_size); |
1389 | blob_field->get_image(cp, copy->length+sizeof(char*), |
1390 | blob_field->charset()); |
1391 | cp+= copy->length+sizeof(char*); |
1392 | } |
1393 | else |
1394 | { |
1395 | /* First put down the length of the blob and then copy the data */ |
1396 | blob_field->get_image(cp, copy->length, |
1397 | blob_field->charset()); |
1398 | DBUG_ASSERT(cp + copy->length + copy->blob_length <= buff + buff_size); |
1399 | memcpy(cp+copy->length, copy->str, copy->blob_length); |
1400 | cp+= copy->length+copy->blob_length; |
1401 | } |
1402 | break; |
1403 | } |
1404 | case CACHE_VARSTR1: |
1405 | /* Copy the significant part of the short varstring field */ |
1406 | len= (uint) copy->str[0] + 1; |
1407 | DBUG_ASSERT(cp + len <= buff + buff_size); |
1408 | memcpy(cp, copy->str, len); |
1409 | cp+= len; |
1410 | break; |
1411 | case CACHE_VARSTR2: |
1412 | /* Copy the significant part of the long varstring field */ |
1413 | len= uint2korr(copy->str) + 2; |
1414 | DBUG_ASSERT(cp + len <= buff + buff_size); |
1415 | memcpy(cp, copy->str, len); |
1416 | cp+= len; |
1417 | break; |
1418 | case CACHE_STRIPPED: |
1419 | { |
1420 | /* |
1421 | Put down the field value stripping all trailing spaces off. |
1422 | After this insert the length of the written sequence of bytes. |
1423 | */ |
1424 | uchar *str, *end; |
1425 | for (str= copy->str, end= str+copy->length; |
1426 | end > str && end[-1] == ' '; |
1427 | end--) ; |
1428 | len=(uint) (end-str); |
1429 | DBUG_ASSERT(cp + len + 2 <= buff + buff_size); |
1430 | int2store(cp, len); |
1431 | memcpy(cp+2, str, len); |
1432 | cp+= len+2; |
1433 | break; |
1434 | } |
1435 | case CACHE_ROWID: |
1436 | if (!copy->length) |
1437 | { |
1438 | /* |
1439 | This may happen only for ROWID fields of materialized |
1440 | derived tables and views. |
1441 | */ |
1442 | TABLE *table= (TABLE *) copy->str; |
1443 | copy->str= table->file->ref; |
1444 | copy->length= table->file->ref_length; |
1445 | if (!copy->str) |
1446 | { |
1447 | /* |
1448 | If table is an empty inner table of an outer join and it is |
1449 | a materialized derived table then table->file->ref == NULL. |
1450 | */ |
1451 | cp+= copy->length; |
1452 | break; |
1453 | } |
1454 | } |
1455 | /* fall through */ |
1456 | default: |
1457 | /* Copy the entire image of the field from the record buffer */ |
1458 | DBUG_ASSERT(cp + copy->length <= buff + buff_size); |
1459 | if (copy->str) |
1460 | memcpy(cp, copy->str, copy->length); |
1461 | cp+= copy->length; |
1462 | } |
1463 | } |
1464 | |
1465 | /* Add the offsets of the fields that are referenced from other caches */ |
1466 | if (referenced_fields) |
1467 | { |
1468 | uint cnt= 0; |
1469 | for (copy= field_descr+flag_fields; copy < copy_end ; copy++) |
1470 | { |
1471 | if (copy->referenced_field_no) |
1472 | { |
1473 | store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1), |
1474 | copy->offset); |
1475 | cnt++; |
1476 | } |
1477 | } |
1478 | DBUG_ASSERT(cp + size_of_fld_ofs*cnt <= buff + buff_size); |
1479 | cp+= size_of_fld_ofs*cnt; |
1480 | } |
1481 | |
1482 | if (rec_len_ptr) |
1483 | store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len)); |
1484 | last_rec_pos= curr_rec_pos; |
1485 | end_pos= pos= cp; |
1486 | *is_full= last_record; |
1487 | |
1488 | last_written_is_null_compl= 0; |
1489 | if (!join_tab->first_unmatched && join_tab->on_precond) |
1490 | { |
1491 | join_tab->found= 0; |
1492 | join_tab->not_null_compl= 1; |
1493 | if (!join_tab->on_precond->val_int()) |
1494 | { |
1495 | flags_pos[0]= MATCH_IMPOSSIBLE; |
1496 | last_written_is_null_compl= 1; |
1497 | } |
1498 | } |
1499 | |
1500 | return (uint) (cp-init_pos); |
1501 | } |
1502 | |
1503 | |
1504 | /* |
1505 | Reset the join buffer for reading/writing: default implementation |
1506 | |
1507 | SYNOPSIS |
1508 | reset() |
1509 | for_writing if it's TRUE the function reset the buffer for writing |
1510 | |
1511 | DESCRIPTION |
1512 | This default implementation of the virtual function reset() resets |
1513 | the join buffer for reading or writing. |
1514 | If the buffer is reset for reading only the 'pos' value is reset |
1515 | to point to the very beginning of the join buffer. If the buffer is |
1516 | reset for writing additionally: |
1517 | - the counter of the records in the buffer is set to 0, |
1518 | - the the value of 'last_rec_pos' gets pointing at the position just |
1519 | before the buffer, |
1520 | - 'end_pos' is set to point to the beginning of the join buffer, |
1521 | - the size of the auxiliary buffer is reset to 0, |
1522 | - the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0. |
1523 | |
1524 | RETURN VALUE |
1525 | none |
1526 | */ |
1527 | void JOIN_CACHE::reset(bool for_writing) |
1528 | { |
1529 | pos= buff; |
1530 | curr_rec_link= 0; |
1531 | if (for_writing) |
1532 | { |
1533 | records= 0; |
1534 | last_rec_pos= buff; |
1535 | aux_buff_size= 0; |
1536 | end_pos= pos; |
1537 | last_rec_blob_data_is_in_rec_buff= 0; |
1538 | } |
1539 | } |
1540 | |
1541 | |
1542 | /* |
1543 | Add a record into the join buffer: the default implementation |
1544 | |
1545 | SYNOPSIS |
1546 | put_record() |
1547 | |
1548 | DESCRIPTION |
1549 | This default implementation of the virtual function put_record writes |
1550 | the next matching record into the join buffer. |
1551 | It also links the record having been written into the join buffer with |
1552 | the matched record in the previous cache if there is any. |
1553 | The implementation assumes that the function get_curr_link() |
1554 | will return exactly the pointer to this matched record. |
1555 | |
1556 | RETURN VALUE |
1557 | TRUE if it has been decided that it should be the last record |
1558 | in the join buffer, |
1559 | FALSE otherwise |
1560 | */ |
1561 | |
1562 | bool JOIN_CACHE::put_record() |
1563 | { |
1564 | bool is_full; |
1565 | uchar *link= 0; |
1566 | if (prev_cache) |
1567 | link= prev_cache->get_curr_rec_link(); |
1568 | write_record_data(link, &is_full); |
1569 | return is_full; |
1570 | } |
1571 | |
1572 | |
1573 | /* |
1574 | Read the next record from the join buffer: the default implementation |
1575 | |
1576 | SYNOPSIS |
1577 | get_record() |
1578 | |
1579 | DESCRIPTION |
1580 | This default implementation of the virtual function get_record |
1581 | reads fields of the next record from the join buffer of this cache. |
1582 | The function also reads all other fields associated with this record |
1583 | from the the join buffers of the previous caches. The fields are read |
1584 | into the corresponding record buffers. |
1585 | It is supposed that 'pos' points to the position in the buffer |
1586 | right after the previous record when the function is called. |
1587 | When the function returns the 'pos' values is updated to point |
1588 | to the position after the read record. |
1589 | The value of 'curr_rec_pos' is also updated by the function to |
1590 | point to the beginning of the first field of the record in the |
1591 | join buffer. |
1592 | |
1593 | RETURN VALUE |
1594 | TRUE there are no more records to read from the join buffer |
1595 | FALSE otherwise |
1596 | */ |
1597 | |
1598 | bool JOIN_CACHE::get_record() |
1599 | { |
1600 | bool res; |
1601 | uchar *prev_rec_ptr= 0; |
1602 | if (with_length) |
1603 | pos+= size_of_rec_len; |
1604 | if (prev_cache) |
1605 | { |
1606 | pos+= prev_cache->get_size_of_rec_offset(); |
1607 | prev_rec_ptr= prev_cache->get_rec_ref(pos); |
1608 | } |
1609 | curr_rec_pos= pos; |
1610 | if (!(res= read_all_record_fields() == NO_MORE_RECORDS_IN_BUFFER)) |
1611 | { |
1612 | pos+= referenced_fields*size_of_fld_ofs; |
1613 | if (prev_cache) |
1614 | prev_cache->get_record_by_pos(prev_rec_ptr); |
1615 | } |
1616 | return res; |
1617 | } |
1618 | |
1619 | |
1620 | /* |
1621 | Read a positioned record from the join buffer: the default implementation |
1622 | |
1623 | SYNOPSIS |
1624 | get_record_by_pos() |
1625 | rec_ptr position of the first field of the record in the join buffer |
1626 | |
1627 | DESCRIPTION |
1628 | This default implementation of the virtual function get_record_pos |
1629 | reads the fields of the record positioned at 'rec_ptr' from the join buffer. |
1630 | The function also reads all other fields associated with this record |
1631 | from the the join buffers of the previous caches. The fields are read |
1632 | into the corresponding record buffers. |
1633 | |
1634 | RETURN VALUE |
1635 | none |
1636 | */ |
1637 | |
1638 | void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr) |
1639 | { |
1640 | uchar *save_pos= pos; |
1641 | pos= rec_ptr; |
1642 | read_all_record_fields(); |
1643 | pos= save_pos; |
1644 | if (prev_cache) |
1645 | { |
1646 | uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr); |
1647 | prev_cache->get_record_by_pos(prev_rec_ptr); |
1648 | } |
1649 | } |
1650 | |
1651 | |
1652 | /* |
1653 | Get the match flag from the referenced record: the default implementation |
1654 | |
1655 | SYNOPSIS |
1656 | get_match_flag_by_pos() |
1657 | rec_ptr position of the first field of the record in the join buffer |
1658 | |
1659 | DESCRIPTION |
1660 | This default implementation of the virtual function get_match_flag_by_pos |
1661 | get the match flag for the record pointed by the reference at the position |
1662 | rec_ptr. If the match flag is placed in one of the previous buffers the |
1663 | function first reaches the linked record fields in this buffer. |
1664 | |
1665 | RETURN VALUE |
1666 | match flag for the record at the position rec_ptr |
1667 | */ |
1668 | |
1669 | enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr) |
1670 | { |
1671 | Match_flag match_fl= MATCH_NOT_FOUND; |
1672 | if (with_match_flag) |
1673 | { |
1674 | match_fl= (enum Match_flag) rec_ptr[0]; |
1675 | return match_fl; |
1676 | } |
1677 | if (prev_cache) |
1678 | { |
1679 | uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr); |
1680 | return prev_cache->get_match_flag_by_pos(prev_rec_ptr); |
1681 | } |
1682 | DBUG_ASSERT(0); |
1683 | return match_fl; |
1684 | } |
1685 | |
1686 | |
1687 | /* |
1688 | Calculate the increment of the auxiliary buffer for a record write |
1689 | |
1690 | SYNOPSIS |
1691 | aux_buffer_incr() |
1692 | recno the number of the record the increment to be calculated for |
1693 | |
1694 | DESCRIPTION |
1695 | This function calls the aux_buffer_incr the method of the |
1696 | companion member join_tab_scan to calculate the growth of the |
1697 | auxiliary buffer when the recno-th record is added to the |
1698 | join_buffer of this cache. |
1699 | |
1700 | RETURN VALUE |
1701 | the number of bytes in the increment |
1702 | */ |
1703 | |
1704 | uint JOIN_CACHE::aux_buffer_incr(size_t recno) |
1705 | { |
1706 | return join_tab_scan->aux_buffer_incr(recno); |
1707 | } |
1708 | |
1709 | /* |
1710 | Read all flag and data fields of a record from the join buffer |
1711 | |
1712 | SYNOPSIS |
1713 | read_all_record_fields() |
1714 | |
1715 | DESCRIPTION |
1716 | The function reads all flag and data fields of a record from the join |
1717 | buffer into the corresponding record buffers. |
1718 | The fields are read starting from the position 'pos' which is |
1719 | supposed to point to the beginning of the first record field. |
1720 | The function increments the value of 'pos' by the length of the |
1721 | read data. |
1722 | |
1723 | RETURN VALUE |
1724 | (-1) if there is no more records in the join buffer |
1725 | length of the data read from the join buffer - otherwise |
1726 | */ |
1727 | |
1728 | uint JOIN_CACHE::read_all_record_fields() |
1729 | { |
1730 | uchar *init_pos= pos; |
1731 | |
1732 | if (pos > last_rec_pos || !records) |
1733 | return NO_MORE_RECORDS_IN_BUFFER; |
1734 | |
1735 | /* First match flag, read null bitmaps and null_row flag for each table */ |
1736 | read_flag_fields(); |
1737 | |
1738 | /* Now read the remaining table fields if needed */ |
1739 | CACHE_FIELD *copy= field_descr+flag_fields; |
1740 | CACHE_FIELD *copy_end= field_descr+fields; |
1741 | bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos); |
1742 | for ( ; copy < copy_end; copy++) |
1743 | read_record_field(copy, blob_in_rec_buff); |
1744 | |
1745 | return (uint) (pos-init_pos); |
1746 | } |
1747 | |
1748 | |
1749 | /* |
1750 | Read all flag fields of a record from the join buffer |
1751 | |
1752 | SYNOPSIS |
1753 | read_flag_fields() |
1754 | |
1755 | DESCRIPTION |
1756 | The function reads all flag fields of a record from the join |
1757 | buffer into the corresponding record buffers. |
1758 | The fields are read starting from the position 'pos'. |
1759 | The function increments the value of 'pos' by the length of the |
1760 | read data. |
1761 | |
1762 | RETURN VALUE |
1763 | length of the data read from the join buffer |
1764 | */ |
1765 | |
1766 | uint JOIN_CACHE::read_flag_fields() |
1767 | { |
1768 | uchar *init_pos= pos; |
1769 | CACHE_FIELD *copy= field_descr; |
1770 | CACHE_FIELD *copy_end= copy+flag_fields; |
1771 | if (with_match_flag) |
1772 | { |
1773 | copy->str[0]= MY_TEST((Match_flag) pos[0] == MATCH_FOUND); |
1774 | pos+= copy->length; |
1775 | copy++; |
1776 | } |
1777 | for ( ; copy < copy_end; copy++) |
1778 | { |
1779 | memcpy(copy->str, pos, copy->length); |
1780 | pos+= copy->length; |
1781 | } |
1782 | return (uint)(pos-init_pos); |
1783 | } |
1784 | |
1785 | |
1786 | /* |
1787 | Read a data record field from the join buffer |
1788 | |
1789 | SYNOPSIS |
1790 | read_record_field() |
1791 | copy the descriptor of the data field to be read |
1792 | blob_in_rec_buff indicates whether this is the field from the record |
1793 | whose blob data are in record buffers |
1794 | |
1795 | DESCRIPTION |
1796 | The function reads the data field specified by the parameter copy |
1797 | from the join buffer into the corresponding record buffer. |
1798 | The field is read starting from the position 'pos'. |
1799 | The data of blob values is not copied from the join buffer. |
1800 | The function increments the value of 'pos' by the length of the |
1801 | read data. |
1802 | |
1803 | RETURN VALUE |
1804 | length of the data read from the join buffer |
1805 | */ |
1806 | |
1807 | uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff) |
1808 | { |
1809 | uint len; |
1810 | /* Do not copy the field if its value is null */ |
1811 | if (copy->field && copy->field->maybe_null() && copy->field->is_null()) |
1812 | return 0; |
1813 | switch (copy->type) { |
1814 | case CACHE_BLOB: |
1815 | { |
1816 | Field_blob *blob_field= (Field_blob *) copy->field; |
1817 | /* |
1818 | Copy the length and the pointer to data but not the blob data |
1819 | itself to the record buffer |
1820 | */ |
1821 | if (blob_in_rec_buff) |
1822 | { |
1823 | blob_field->set_image(pos, copy->length + sizeof(char*), |
1824 | blob_field->charset()); |
1825 | len= copy->length + sizeof(char*); |
1826 | } |
1827 | else |
1828 | { |
1829 | blob_field->set_ptr(pos, pos+copy->length); |
1830 | len= copy->length + blob_field->get_length(); |
1831 | } |
1832 | } |
1833 | break; |
1834 | case CACHE_VARSTR1: |
1835 | /* Copy the significant part of the short varstring field */ |
1836 | len= (uint) pos[0] + 1; |
1837 | memcpy(copy->str, pos, len); |
1838 | break; |
1839 | case CACHE_VARSTR2: |
1840 | /* Copy the significant part of the long varstring field */ |
1841 | len= uint2korr(pos) + 2; |
1842 | memcpy(copy->str, pos, len); |
1843 | break; |
1844 | case CACHE_STRIPPED: |
1845 | /* Pad the value by spaces that has been stripped off */ |
1846 | len= uint2korr(pos); |
1847 | memcpy(copy->str, pos+2, len); |
1848 | memset(copy->str+len, ' ', copy->length-len); |
1849 | len+= 2; |
1850 | break; |
1851 | case CACHE_ROWID: |
1852 | if (!copy->str) |
1853 | { |
1854 | len= copy->length; |
1855 | break; |
1856 | } |
1857 | /* fall through */ |
1858 | default: |
1859 | /* Copy the entire image of the field from the record buffer */ |
1860 | len= copy->length; |
1861 | memcpy(copy->str, pos, len); |
1862 | } |
1863 | pos+= len; |
1864 | return len; |
1865 | } |
1866 | |
1867 | |
1868 | /* |
1869 | Read a referenced field from the join buffer |
1870 | |
1871 | SYNOPSIS |
1872 | read_referenced_field() |
1873 | copy pointer to the descriptor of the referenced field |
1874 | rec_ptr pointer to the record that may contain this field |
1875 | len IN/OUT total length of the record fields |
1876 | |
1877 | DESCRIPTION |
1878 | The function checks whether copy points to a data field descriptor |
1879 | for this cache object. If it does not then the function returns |
1880 | FALSE. Otherwise the function reads the field of the record in |
1881 | the join buffer pointed by 'rec_ptr' into the corresponding record |
1882 | buffer and returns TRUE. |
1883 | If the value of *len is 0 then the function sets it to the total |
1884 | length of the record fields including possible trailing offset |
1885 | values. Otherwise *len is supposed to provide this value that |
1886 | has been obtained earlier. |
1887 | |
1888 | NOTE |
1889 | If the value of the referenced field is null then the offset |
1890 | for the value is set to 0. If the value of a field can be null |
1891 | then the value of flag_fields is always positive. So the offset |
1892 | for any non-null value cannot be 0 in this case. |
1893 | |
1894 | RETURN VALUE |
1895 | TRUE 'copy' points to a data descriptor of this join cache |
1896 | FALSE otherwise |
1897 | */ |
1898 | |
1899 | bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy, |
1900 | uchar *rec_ptr, |
1901 | uint *len) |
1902 | { |
1903 | uchar *ptr; |
1904 | uint offset; |
1905 | if (copy < field_descr || copy >= field_descr+fields) |
1906 | return FALSE; |
1907 | if (!*len) |
1908 | { |
1909 | /* Get the total length of the record fields */ |
1910 | uchar *len_ptr= rec_ptr; |
1911 | if (prev_cache) |
1912 | len_ptr-= prev_cache->get_size_of_rec_offset(); |
1913 | *len= get_rec_length(len_ptr-size_of_rec_len); |
1914 | } |
1915 | |
1916 | ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0); |
1917 | offset= get_fld_offset(ptr+ *len - |
1918 | size_of_fld_ofs* |
1919 | (referenced_fields+1-copy->referenced_field_no)); |
1920 | bool is_null= FALSE; |
1921 | Field *field= copy->field; |
1922 | if (offset == 0 && flag_fields) |
1923 | is_null= TRUE; |
1924 | if (is_null) |
1925 | { |
1926 | field->set_null(); |
1927 | if (!field->real_maybe_null()) |
1928 | field->table->null_row= 1; |
1929 | } |
1930 | else |
1931 | { |
1932 | uchar *save_pos= pos; |
1933 | field->set_notnull(); |
1934 | if (!field->real_maybe_null()) |
1935 | field->table->null_row= 0; |
1936 | pos= rec_ptr+offset; |
1937 | read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr)); |
1938 | pos= save_pos; |
1939 | } |
1940 | return TRUE; |
1941 | } |
1942 | |
1943 | |
1944 | /* |
1945 | Skip record from join buffer if's already matched: default implementation |
1946 | |
1947 | SYNOPSIS |
1948 | skip_if_matched() |
1949 | |
1950 | DESCRIPTION |
1951 | This default implementation of the virtual function skip_if_matched |
1952 | skips the next record from the join buffer if its match flag is set to |
1953 | MATCH_FOUND. |
1954 | If the record is skipped the value of 'pos' is set to point to the position |
1955 | right after the record. |
1956 | |
1957 | RETURN VALUE |
1958 | TRUE the match flag is set to MATCH_FOUND and the record has been skipped |
1959 | FALSE otherwise |
1960 | */ |
1961 | |
1962 | bool JOIN_CACHE::skip_if_matched() |
1963 | { |
1964 | DBUG_ASSERT(with_length); |
1965 | uint offset= size_of_rec_len; |
1966 | if (prev_cache) |
1967 | offset+= prev_cache->get_size_of_rec_offset(); |
1968 | /* Check whether the match flag is MATCH_FOUND */ |
1969 | if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND) |
1970 | { |
1971 | pos+= size_of_rec_len + get_rec_length(pos); |
1972 | return TRUE; |
1973 | } |
1974 | return FALSE; |
1975 | } |
1976 | |
1977 | |
1978 | /* |
1979 | Skip record from join buffer if the match isn't needed: default implementation |
1980 | |
1981 | SYNOPSIS |
1982 | skip_if_not_needed_match() |
1983 | |
1984 | DESCRIPTION |
1985 | This default implementation of the virtual function skip_if_not_needed_match |
1986 | skips the next record from the join buffer if its match flag is not |
1987 | MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the |
1988 | first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE |
1989 | and join_tab is the first inner table of an outer join. |
1990 | If the record is skipped the value of 'pos' is set to point to the position |
1991 | right after the record. |
1992 | |
1993 | RETURN VALUE |
1994 | TRUE the record has to be skipped |
1995 | FALSE otherwise |
1996 | */ |
1997 | |
1998 | bool JOIN_CACHE::skip_if_not_needed_match() |
1999 | { |
2000 | DBUG_ASSERT(with_length); |
2001 | enum Match_flag match_fl; |
2002 | uint offset= size_of_rec_len; |
2003 | if (prev_cache) |
2004 | offset+= prev_cache->get_size_of_rec_offset(); |
2005 | |
2006 | if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND && |
2007 | (join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) ) |
2008 | { |
2009 | pos+= size_of_rec_len + get_rec_length(pos); |
2010 | return TRUE; |
2011 | } |
2012 | return FALSE; |
2013 | } |
2014 | |
2015 | |
2016 | /* |
2017 | Restore the fields of the last record from the join buffer |
2018 | |
2019 | SYNOPSIS |
2020 | restore_last_record() |
2021 | |
2022 | DESCRIPTION |
2023 | This function restore the values of the fields of the last record put |
2024 | into join buffer in record buffers. The values most probably have been |
2025 | overwritten by the field values from other records when they were read |
2026 | from the join buffer into the record buffer in order to check pushdown |
2027 | predicates. |
2028 | |
2029 | RETURN |
2030 | none |
2031 | */ |
2032 | |
2033 | void JOIN_CACHE::restore_last_record() |
2034 | { |
2035 | if (records) |
2036 | get_record_by_pos(last_rec_pos); |
2037 | } |
2038 | |
2039 | |
2040 | /* |
2041 | Join records from the join buffer with records from the next join table |
2042 | |
2043 | SYNOPSIS |
2044 | join_records() |
2045 | skip_last do not find matches for the last record from the buffer |
2046 | |
2047 | DESCRIPTION |
2048 | The functions extends all records from the join buffer by the matched |
2049 | records from join_tab. In the case of outer join operation it also |
2050 | adds null complementing extensions for the records from the join buffer |
2051 | that have no match. |
2052 | No extensions are generated for the last record from the buffer if |
2053 | skip_last is true. |
2054 | |
2055 | NOTES |
2056 | The function must make sure that if linked join buffers are used then |
2057 | a join buffer cannot be refilled again until all extensions in the |
2058 | buffers chained to this one are generated. |
2059 | Currently an outer join operation with several inner tables always uses |
2060 | at least two linked buffers with the match join flags placed in the |
2061 | first buffer. Any record composed of rows of the inner tables that |
2062 | matches a record in this buffer must refer to the position of the |
2063 | corresponding match flag. |
2064 | |
2065 | IMPLEMENTATION |
2066 | When generating extensions for outer tables of an outer join operation |
2067 | first we generate all extensions for those records from the join buffer |
2068 | that have matches, after which null complementing extension for all |
2069 | unmatched records from the join buffer are generated. |
2070 | |
2071 | RETURN VALUE |
2072 | return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS. |
2073 | */ |
2074 | |
2075 | enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last) |
2076 | { |
2077 | JOIN_TAB *tab; |
2078 | enum_nested_loop_state rc= NESTED_LOOP_OK; |
2079 | bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join(); |
2080 | DBUG_ENTER("JOIN_CACHE::join_records" ); |
2081 | |
2082 | if (outer_join_first_inner && !join_tab->first_unmatched) |
2083 | join_tab->not_null_compl= TRUE; |
2084 | |
2085 | if (!join_tab->first_unmatched) |
2086 | { |
2087 | /* Find all records from join_tab that match records from join buffer */ |
2088 | rc= join_matching_records(skip_last); |
2089 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
2090 | goto finish; |
2091 | if (outer_join_first_inner) |
2092 | { |
2093 | if (next_cache && join_tab != join_tab->last_inner) |
2094 | { |
2095 | /* |
2096 | Ensure that all matches for outer records from join buffer are to be |
2097 | found. Now we ensure that all full records are found for records from |
2098 | join buffer. Generally this is an overkill. |
2099 | TODO: Ensure that only matches of the inner table records have to be |
2100 | found for the records from join buffer. |
2101 | */ |
2102 | rc= next_cache->join_records(skip_last); |
2103 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
2104 | goto finish; |
2105 | } |
2106 | join_tab->not_null_compl= FALSE; |
2107 | /* Prepare for generation of null complementing extensions */ |
2108 | for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++) |
2109 | tab->first_unmatched= join_tab->first_inner; |
2110 | } |
2111 | } |
2112 | if (join_tab->first_unmatched) |
2113 | { |
2114 | if (is_key_access()) |
2115 | restore_last_record(); |
2116 | |
2117 | /* |
2118 | Generate all null complementing extensions for the records from |
2119 | join buffer that don't have any matching rows from the inner tables. |
2120 | */ |
2121 | reset(FALSE); |
2122 | rc= join_null_complements(skip_last); |
2123 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
2124 | goto finish; |
2125 | } |
2126 | if(next_cache) |
2127 | { |
2128 | /* |
2129 | When using linked caches we must ensure the records in the next caches |
2130 | that refer to the records in the join buffer are fully extended. |
2131 | Otherwise we could have references to the records that have been |
2132 | already erased from the join buffer and replaced for new records. |
2133 | */ |
2134 | rc= next_cache->join_records(skip_last); |
2135 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
2136 | goto finish; |
2137 | } |
2138 | |
2139 | if (skip_last) |
2140 | { |
2141 | DBUG_ASSERT(!is_key_access()); |
2142 | /* |
2143 | Restore the last record from the join buffer to generate |
2144 | all extentions for it. |
2145 | */ |
2146 | get_record(); |
2147 | } |
2148 | |
2149 | finish: |
2150 | if (outer_join_first_inner) |
2151 | { |
2152 | /* |
2153 | All null complemented rows have been already generated for all |
2154 | outer records from join buffer. Restore the state of the |
2155 | first_unmatched values to 0 to avoid another null complementing. |
2156 | */ |
2157 | for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++) |
2158 | tab->first_unmatched= 0; |
2159 | } |
2160 | restore_last_record(); |
2161 | reset(TRUE); |
2162 | DBUG_PRINT("exit" , ("rc: %d" , rc)); |
2163 | DBUG_RETURN(rc); |
2164 | } |
2165 | |
2166 | |
2167 | /* |
2168 | Find matches from the next table for records from the join buffer |
2169 | |
2170 | SYNOPSIS |
2171 | join_matching_records() |
2172 | skip_last do not look for matches for the last partial join record |
2173 | |
2174 | DESCRIPTION |
2175 | The function retrieves rows of the join_tab table and checks whether they |
2176 | match partial join records from the join buffer. If a match is found |
2177 | the function will call the sub_select function trying to look for matches |
2178 | for the remaining join operations. |
2179 | This function currently is called only from the function join_records. |
2180 | If the value of skip_last is true the function writes the partial join |
2181 | record from the record buffer into the join buffer to save its value for |
2182 | the future processing in the caller function. |
2183 | |
2184 | NOTES |
2185 | If employed by BNL or BNLH join algorithms the function performs a full |
2186 | scan of join_tab for each refill of the join buffer. If BKA or BKAH |
2187 | algorithms are used then the function iterates only over those records |
2188 | from join_tab that can be accessed by keys built over records in the join |
2189 | buffer. To apply a proper method of iteration the function just calls |
2190 | virtual iterator methods (open, next, close) of the member join_tab_scan. |
2191 | The member can be either of the JOIN_TAB_SCAN or JOIN_TAB_SCAN_MMR type. |
2192 | The class JOIN_TAB_SCAN provides the iterator methods for BNL/BNLH join |
2193 | algorithms. The class JOIN_TAB_SCAN_MRR provides the iterator methods |
2194 | for BKA/BKAH join algorithms. |
2195 | When the function looks for records from the join buffer that would |
2196 | match a record from join_tab it iterates either over all records in |
2197 | the buffer or only over selected records. If BNL join operation is |
2198 | performed all records are checked for the match. If BNLH or BKAH |
2199 | algorithm is employed to join join_tab then the function looks only |
2200 | through the records with the same join key as the record from join_tab. |
2201 | With the BKA join algorithm only one record from the join buffer is checked |
2202 | for a match for any record from join_tab. To iterate over the candidates |
2203 | for a match the virtual function get_next_candidate_for_match is used, |
2204 | while the virtual function prepare_look_for_matches is called to prepare |
2205 | for such iteration process. |
2206 | |
2207 | NOTES |
2208 | The function produces all matching extensions for the records in the |
2209 | join buffer following the path of the employed blocked algorithm. |
2210 | When an outer join operation is performed all unmatched records from |
2211 | the join buffer must be extended by null values. The function |
2212 | 'join_null_complements' serves this purpose. |
2213 | |
2214 | RETURN VALUE |
2215 | return one of enum_nested_loop_state |
2216 | */ |
2217 | |
2218 | enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last) |
2219 | { |
2220 | int error; |
2221 | enum_nested_loop_state rc= NESTED_LOOP_OK; |
2222 | join_tab->table->null_row= 0; |
2223 | bool check_only_first_match= join_tab->check_only_first_match(); |
2224 | bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join(); |
2225 | DBUG_ENTER("JOIN_CACHE::join_matching_records" ); |
2226 | |
2227 | /* Return at once if there are no records in the join buffer */ |
2228 | if (!records) |
2229 | DBUG_RETURN(NESTED_LOOP_OK); |
2230 | |
2231 | /* |
2232 | When joining we read records from the join buffer back into record buffers. |
2233 | If matches for the last partial join record are found through a call to |
2234 | the sub_select function then this partial join record must be saved in the |
2235 | join buffer in order to be restored just before the sub_select call. |
2236 | */ |
2237 | if (skip_last) |
2238 | put_record(); |
2239 | |
2240 | if (join_tab->use_quick == 2 && join_tab->select->quick) |
2241 | { |
2242 | /* A dynamic range access was used last. Clean up after it */ |
2243 | delete join_tab->select->quick; |
2244 | join_tab->select->quick= 0; |
2245 | } |
2246 | |
2247 | if ((rc= join_tab_execution_startup(join_tab)) < 0) |
2248 | goto finish2; |
2249 | |
2250 | /* Prepare to retrieve all records of the joined table */ |
2251 | if (unlikely((error= join_tab_scan->open()))) |
2252 | { |
2253 | /* |
2254 | TODO: if we get here, we will assert in net_send_statement(). Add test |
2255 | coverage and fix. |
2256 | */ |
2257 | goto finish; |
2258 | } |
2259 | |
2260 | while (!(error= join_tab_scan->next())) |
2261 | { |
2262 | if (unlikely(join->thd->check_killed())) |
2263 | { |
2264 | /* The user has aborted the execution of the query */ |
2265 | join->thd->send_kill_message(); |
2266 | rc= NESTED_LOOP_KILLED; |
2267 | goto finish; |
2268 | } |
2269 | |
2270 | if (join_tab->keep_current_rowid) |
2271 | join_tab->table->file->position(join_tab->table->record[0]); |
2272 | |
2273 | /* Prepare to read matching candidates from the join buffer */ |
2274 | if (prepare_look_for_matches(skip_last)) |
2275 | continue; |
2276 | join_tab->jbuf_tracker->r_scans++; |
2277 | |
2278 | uchar *rec_ptr; |
2279 | /* Read each possible candidate from the buffer and look for matches */ |
2280 | while ((rec_ptr= get_next_candidate_for_match())) |
2281 | { |
2282 | join_tab->jbuf_tracker->r_rows++; |
2283 | /* |
2284 | If only the first match is needed, and, it has been already found for |
2285 | the next record read from the join buffer, then the record is skipped. |
2286 | Also those records that must be null complemented are not considered |
2287 | as candidates for matches. |
2288 | */ |
2289 | if ((!check_only_first_match && !outer_join_first_inner) || |
2290 | !skip_next_candidate_for_match(rec_ptr)) |
2291 | { |
2292 | read_next_candidate_for_match(rec_ptr); |
2293 | rc= generate_full_extensions(rec_ptr); |
2294 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
2295 | goto finish; |
2296 | } |
2297 | } |
2298 | } |
2299 | |
2300 | finish: |
2301 | if (error) |
2302 | rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR; |
2303 | finish2: |
2304 | join_tab_scan->close(); |
2305 | DBUG_RETURN(rc); |
2306 | } |
2307 | |
2308 | |
2309 | /* |
2310 | Set match flag for a record in join buffer if it has not been set yet |
2311 | |
2312 | SYNOPSIS |
2313 | set_match_flag_if_none() |
2314 | first_inner the join table to which this flag is attached to |
2315 | rec_ptr pointer to the record in the join buffer |
2316 | |
2317 | DESCRIPTION |
2318 | If the records of the table are accumulated in a join buffer the function |
2319 | sets the match flag for the record in the buffer that is referred to by |
2320 | the record from this cache positioned at 'rec_ptr'. |
2321 | The function also sets the match flag 'found' of the table first inner |
2322 | if it has not been set before. |
2323 | |
2324 | NOTES |
2325 | The function assumes that the match flag for any record in any cache |
2326 | is placed in the first byte occupied by the record fields. |
2327 | |
2328 | RETURN VALUE |
2329 | TRUE the match flag is set by this call for the first time |
2330 | FALSE the match flag has been set before this call |
2331 | */ |
2332 | |
2333 | bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner, |
2334 | uchar *rec_ptr) |
2335 | { |
2336 | if (!first_inner->cache) |
2337 | { |
2338 | /* |
2339 | Records of the first inner table to which the flag is attached to |
2340 | are not accumulated in a join buffer. |
2341 | */ |
2342 | if (first_inner->found) |
2343 | return FALSE; |
2344 | else |
2345 | { |
2346 | first_inner->found= 1; |
2347 | return TRUE; |
2348 | } |
2349 | } |
2350 | JOIN_CACHE *cache= this; |
2351 | while (cache->join_tab != first_inner) |
2352 | { |
2353 | cache= cache->prev_cache; |
2354 | DBUG_ASSERT(cache); |
2355 | rec_ptr= cache->get_rec_ref(rec_ptr); |
2356 | } |
2357 | if ((Match_flag) rec_ptr[0] != MATCH_FOUND) |
2358 | { |
2359 | rec_ptr[0]= MATCH_FOUND; |
2360 | first_inner->found= 1; |
2361 | return TRUE; |
2362 | } |
2363 | return FALSE; |
2364 | } |
2365 | |
2366 | |
2367 | /* |
2368 | Generate all full extensions for a partial join record in the buffer |
2369 | |
2370 | SYNOPSIS |
2371 | generate_full_extensions() |
2372 | rec_ptr pointer to the record from join buffer to generate extensions |
2373 | |
2374 | DESCRIPTION |
2375 | The function first checks whether the current record of 'join_tab' matches |
2376 | the partial join record from join buffer located at 'rec_ptr'. If it is the |
2377 | case the function calls the join_tab->next_select method to generate |
2378 | all full extension for this partial join match. |
2379 | |
2380 | RETURN VALUE |
2381 | return one of enum_nested_loop_state. |
2382 | */ |
2383 | |
2384 | enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr) |
2385 | { |
2386 | enum_nested_loop_state rc= NESTED_LOOP_OK; |
2387 | DBUG_ENTER("JOIN_CACHE::generate_full_extensions" ); |
2388 | |
2389 | /* |
2390 | Check whether the extended partial join record meets |
2391 | the pushdown conditions. |
2392 | */ |
2393 | if (check_match(rec_ptr)) |
2394 | { |
2395 | int res= 0; |
2396 | |
2397 | if (!join_tab->check_weed_out_table || |
2398 | !(res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd))) |
2399 | { |
2400 | set_curr_rec_link(rec_ptr); |
2401 | rc= (join_tab->next_select)(join, join_tab+1, 0); |
2402 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
2403 | { |
2404 | reset(TRUE); |
2405 | DBUG_RETURN(rc); |
2406 | } |
2407 | } |
2408 | if (res == -1) |
2409 | { |
2410 | rc= NESTED_LOOP_ERROR; |
2411 | DBUG_RETURN(rc); |
2412 | } |
2413 | } |
2414 | else if (unlikely(join->thd->is_error())) |
2415 | rc= NESTED_LOOP_ERROR; |
2416 | DBUG_RETURN(rc); |
2417 | } |
2418 | |
2419 | |
2420 | /* |
2421 | Check matching to a partial join record from the join buffer |
2422 | |
2423 | SYNOPSIS |
2424 | check_match() |
2425 | rec_ptr pointer to the record from join buffer to check matching to |
2426 | |
2427 | DESCRIPTION |
2428 | The function checks whether the current record of 'join_tab' matches |
2429 | the partial join record from join buffer located at 'rec_ptr'. If this is |
2430 | the case and 'join_tab' is the last inner table of a semi-join or an outer |
2431 | join the function turns on the match flag for the 'rec_ptr' record unless |
2432 | it has been already set. |
2433 | |
2434 | NOTES |
2435 | Setting the match flag on can trigger re-evaluation of pushdown conditions |
2436 | for the record when join_tab is the last inner table of an outer join. |
2437 | |
2438 | RETURN VALUE |
2439 | TRUE there is a match |
2440 | FALSE there is no match |
2441 | In this case the caller must also check thd->is_error() to see |
2442 | if there was a fatal error for the query. |
2443 | */ |
2444 | |
2445 | inline bool JOIN_CACHE::check_match(uchar *rec_ptr) |
2446 | { |
2447 | /* Check whether pushdown conditions are satisfied */ |
2448 | DBUG_ENTER("JOIN_CACHE:check_match" ); |
2449 | |
2450 | if (join_tab->select && join_tab->select->skip_record(join->thd) <= 0) |
2451 | DBUG_RETURN(FALSE); |
2452 | |
2453 | join_tab->jbuf_tracker->r_rows_after_where++; |
2454 | |
2455 | if (!join_tab->is_last_inner_table()) |
2456 | DBUG_RETURN(TRUE); |
2457 | |
2458 | /* |
2459 | This is the last inner table of an outer join, |
2460 | and maybe of other embedding outer joins, or |
2461 | this is the last inner table of a semi-join. |
2462 | */ |
2463 | JOIN_TAB *first_inner= join_tab->get_first_inner_table(); |
2464 | do |
2465 | { |
2466 | set_match_flag_if_none(first_inner, rec_ptr); |
2467 | if (first_inner->check_only_first_match() && |
2468 | !join_tab->first_inner) |
2469 | DBUG_RETURN(TRUE); |
2470 | /* |
2471 | This is the first match for the outer table row. |
2472 | The function set_match_flag_if_none has turned the flag |
2473 | first_inner->found on. The pushdown predicates for |
2474 | inner tables must be re-evaluated with this flag on. |
2475 | Note that, if first_inner is the first inner table |
2476 | of a semi-join, but is not an inner table of an outer join |
2477 | such that 'not exists' optimization can be applied to it, |
2478 | the re-evaluation of the pushdown predicates is not needed. |
2479 | */ |
2480 | for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++) |
2481 | { |
2482 | if (tab->select && tab->select->skip_record(join->thd) <= 0) |
2483 | DBUG_RETURN(FALSE); |
2484 | } |
2485 | } |
2486 | while ((first_inner= first_inner->first_upper) && |
2487 | first_inner->last_inner == join_tab); |
2488 | DBUG_RETURN(TRUE); |
2489 | } |
2490 | |
2491 | |
2492 | /* |
2493 | Add null complements for unmatched outer records from join buffer |
2494 | |
2495 | SYNOPSIS |
2496 | join_null_complements() |
2497 | skip_last do not add null complements for the last record |
2498 | |
2499 | DESCRIPTION |
2500 | This function is called only for inner tables of outer joins. |
2501 | The function retrieves all rows from the join buffer and adds null |
2502 | complements for those of them that do not have matches for outer |
2503 | table records. |
2504 | If the 'join_tab' is the last inner table of the embedding outer |
2505 | join and the null complemented record satisfies the outer join |
2506 | condition then the the corresponding match flag is turned on |
2507 | unless it has been set earlier. This setting may trigger |
2508 | re-evaluation of pushdown conditions for the record. |
2509 | |
2510 | NOTES |
2511 | The same implementation of the virtual method join_null_complements |
2512 | is used for BNL/BNLH/BKA/BKA join algorthm. |
2513 | |
2514 | RETURN VALUE |
2515 | return one of enum_nested_loop_state. |
2516 | */ |
2517 | |
2518 | enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last) |
2519 | { |
2520 | ulonglong cnt; |
2521 | enum_nested_loop_state rc= NESTED_LOOP_OK; |
2522 | bool is_first_inner= join_tab == join_tab->first_unmatched; |
2523 | DBUG_ENTER("JOIN_CACHE::join_null_complements" ); |
2524 | |
2525 | /* Return at once if there are no records in the join buffer */ |
2526 | if (!records) |
2527 | DBUG_RETURN(NESTED_LOOP_OK); |
2528 | |
2529 | cnt= records - (is_key_access() ? 0 : MY_TEST(skip_last)); |
2530 | |
2531 | /* This function may be called only for inner tables of outer joins */ |
2532 | DBUG_ASSERT(join_tab->first_inner); |
2533 | |
2534 | for ( ; cnt; cnt--) |
2535 | { |
2536 | if (unlikely(join->thd->check_killed())) |
2537 | { |
2538 | /* The user has aborted the execution of the query */ |
2539 | join->thd->send_kill_message(); |
2540 | rc= NESTED_LOOP_KILLED; |
2541 | goto finish; |
2542 | } |
2543 | /* Just skip the whole record if a match for it has been already found */ |
2544 | if (!is_first_inner || !skip_if_matched()) |
2545 | { |
2546 | get_record(); |
2547 | /* The outer row is complemented by nulls for each inner table */ |
2548 | restore_record(join_tab->table, s->default_values); |
2549 | mark_as_null_row(join_tab->table); |
2550 | rc= generate_full_extensions(get_curr_rec()); |
2551 | if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) |
2552 | goto finish; |
2553 | } |
2554 | } |
2555 | |
2556 | finish: |
2557 | DBUG_RETURN(rc); |
2558 | } |
2559 | |
2560 | |
2561 | /* |
2562 | Save data on the join algorithm employed by the join cache |
2563 | |
2564 | SYNOPSIS |
2565 | save_explain_data() |
2566 | str string to add the comment on the employed join algorithm to |
2567 | |
2568 | DESCRIPTION |
2569 | This function puts info about the type of the used join buffer (flat or |
2570 | incremental) and on the type of the the employed join algorithm (BNL, |
2571 | BNLH, BKA or BKAH) to the data structure |
2572 | |
2573 | RETURN VALUE |
2574 | 0 ok |
2575 | 1 error |
2576 | */ |
2577 | |
2578 | bool JOIN_CACHE::save_explain_data(EXPLAIN_BKA_TYPE *explain) |
2579 | { |
2580 | explain->incremental= MY_TEST(prev_cache); |
2581 | |
2582 | explain->join_buffer_size= get_join_buffer_size(); |
2583 | |
2584 | switch (get_join_alg()) { |
2585 | case BNL_JOIN_ALG: |
2586 | explain->join_alg= "BNL" ; |
2587 | break; |
2588 | case BNLH_JOIN_ALG: |
2589 | explain->join_alg= "BNLH" ; |
2590 | break; |
2591 | case BKA_JOIN_ALG: |
2592 | explain->join_alg= "BKA" ; |
2593 | break; |
2594 | case BKAH_JOIN_ALG: |
2595 | explain->join_alg= "BKAH" ; |
2596 | break; |
2597 | default: |
2598 | DBUG_ASSERT(0); |
2599 | } |
2600 | return 0; |
2601 | } |
2602 | |
2603 | /** |
2604 | get thread handle. |
2605 | */ |
2606 | |
2607 | THD *JOIN_CACHE::thd() |
2608 | { |
2609 | return join->thd; |
2610 | } |
2611 | |
2612 | |
2613 | static bool add_mrr_explain_info(String *str, uint mrr_mode, handler *file) |
2614 | { |
2615 | char mrr_str_buf[128]={0}; |
2616 | int len; |
2617 | len= file->multi_range_read_explain_info(mrr_mode, mrr_str_buf, |
2618 | sizeof(mrr_str_buf)); |
2619 | if (len > 0) |
2620 | { |
2621 | if (str->length()) |
2622 | { |
2623 | if (str->append(STRING_WITH_LEN("; " ))) |
2624 | return 1; |
2625 | } |
2626 | if (str->append(mrr_str_buf, len)) |
2627 | return 1; |
2628 | } |
2629 | return 0; |
2630 | } |
2631 | |
2632 | |
2633 | bool JOIN_CACHE_BKA::save_explain_data(EXPLAIN_BKA_TYPE *explain) |
2634 | { |
2635 | if (JOIN_CACHE::save_explain_data(explain)) |
2636 | return 1; |
2637 | return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file); |
2638 | } |
2639 | |
2640 | |
2641 | bool JOIN_CACHE_BKAH::save_explain_data(EXPLAIN_BKA_TYPE *explain) |
2642 | { |
2643 | if (JOIN_CACHE::save_explain_data(explain)) |
2644 | return 1; |
2645 | return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file); |
2646 | } |
2647 | |
2648 | |
2649 | /* |
2650 | Initialize a hashed join cache |
2651 | |
2652 | SYNOPSIS |
2653 | init() |
2654 | for_explain join buffer is initialized for explain only |
2655 | |
2656 | DESCRIPTION |
2657 | The function initializes the cache structure with a hash table in it. |
2658 | The hash table will be used to store key values for the records from |
2659 | the join buffer. |
2660 | The function allocates memory for the join buffer and for descriptors of |
2661 | the record fields stored in the buffer. |
2662 | The function also initializes a hash table for record keys within the join |
2663 | buffer space. |
2664 | |
2665 | NOTES VALUE |
2666 | The function is supposed to be called by the init methods of the classes |
2667 | derived from JOIN_CACHE_HASHED. |
2668 | |
2669 | RETURN VALUE |
2670 | 0 initialization with buffer allocations has been succeeded |
2671 | 1 otherwise |
2672 | */ |
2673 | |
2674 | int JOIN_CACHE_HASHED::init(bool for_explain) |
2675 | { |
2676 | int rc= 0; |
2677 | TABLE_REF *ref= &join_tab->ref; |
2678 | |
2679 | DBUG_ENTER("JOIN_CACHE_HASHED::init" ); |
2680 | |
2681 | hash_table= 0; |
2682 | key_entries= 0; |
2683 | |
2684 | key_length= ref->key_length; |
2685 | |
2686 | if ((rc= JOIN_CACHE::init(for_explain)) || for_explain) |
2687 | DBUG_RETURN (rc); |
2688 | |
2689 | if (!(key_buff= (uchar*) join->thd->alloc(key_length))) |
2690 | DBUG_RETURN(1); |
2691 | |
2692 | /* Take into account a reference to the next record in the key chain */ |
2693 | pack_length+= get_size_of_rec_offset(); |
2694 | pack_length_with_blob_ptrs+= get_size_of_rec_offset(); |
2695 | |
2696 | ref_key_info= join_tab->get_keyinfo_by_key_no(join_tab->ref.key); |
2697 | ref_used_key_parts= join_tab->ref.key_parts; |
2698 | |
2699 | hash_func= &JOIN_CACHE_HASHED::get_hash_idx_simple; |
2700 | hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_simple; |
2701 | |
2702 | KEY_PART_INFO *key_part= ref_key_info->key_part; |
2703 | KEY_PART_INFO *key_part_end= key_part+ref_used_key_parts; |
2704 | for ( ; key_part < key_part_end; key_part++) |
2705 | { |
2706 | if (!key_part->field->eq_cmp_as_binary()) |
2707 | { |
2708 | hash_func= &JOIN_CACHE_HASHED::get_hash_idx_complex; |
2709 | hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_complex; |
2710 | break; |
2711 | } |
2712 | } |
2713 | |
2714 | init_hash_table(); |
2715 | |
2716 | rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+ |
2717 | (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); |
2718 | |
2719 | data_fields_offset= 0; |
2720 | if (use_emb_key) |
2721 | { |
2722 | CACHE_FIELD *copy= field_descr; |
2723 | CACHE_FIELD *copy_end= copy+flag_fields; |
2724 | for ( ; copy < copy_end; copy++) |
2725 | data_fields_offset+= copy->length; |
2726 | } |
2727 | |
2728 | DBUG_RETURN(0); |
2729 | } |
2730 | |
2731 | |
2732 | /* |
2733 | Initialize the hash table of a hashed join cache |
2734 | |
2735 | SYNOPSIS |
2736 | init_hash_table() |
2737 | |
2738 | DESCRIPTION |
2739 | The function estimates the number of hash table entries in the hash |
2740 | table to be used and initializes this hash table within the join buffer |
2741 | space. |
2742 | |
2743 | RETURN VALUE |
2744 | Currently the function always returns 0; |
2745 | */ |
2746 | |
2747 | int JOIN_CACHE_HASHED::init_hash_table() |
2748 | { |
2749 | hash_table= 0; |
2750 | key_entries= 0; |
2751 | |
2752 | /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */ |
2753 | uint max_size_of_key_ofs= MY_MAX(2, get_size_of_rec_offset()); |
2754 | for (size_of_key_ofs= 2; |
2755 | size_of_key_ofs <= max_size_of_key_ofs; |
2756 | size_of_key_ofs+= 2) |
2757 | { |
2758 | key_entry_length= get_size_of_rec_offset() + // key chain header |
2759 | size_of_key_ofs + // reference to the next key |
2760 | (use_emb_key ? get_size_of_rec_offset() : key_length); |
2761 | |
2762 | size_t space_per_rec= avg_record_length + |
2763 | avg_aux_buffer_incr + |
2764 | key_entry_length+size_of_key_ofs; |
2765 | size_t n= buff_size / space_per_rec; |
2766 | |
2767 | /* |
2768 | TODO: Make a better estimate for this upper bound of |
2769 | the number of records in in the join buffer. |
2770 | */ |
2771 | size_t max_n= buff_size / (pack_length-length+ |
2772 | key_entry_length+size_of_key_ofs); |
2773 | |
2774 | hash_entries= (uint) (n / 0.7); |
2775 | set_if_bigger(hash_entries, 1); |
2776 | |
2777 | if (offset_size((uint)(max_n*key_entry_length)) <= |
2778 | size_of_key_ofs) |
2779 | break; |
2780 | } |
2781 | |
2782 | /* Initialize the hash table */ |
2783 | hash_table= buff + (buff_size-hash_entries*size_of_key_ofs); |
2784 | cleanup_hash_table(); |
2785 | curr_key_entry= hash_table; |
2786 | |
2787 | return 0; |
2788 | } |
2789 | |
2790 | |
2791 | /* |
2792 | Reallocate the join buffer of a hashed join cache |
2793 | |
2794 | SYNOPSIS |
2795 | realloc_buffer() |
2796 | |
2797 | DESCRITION |
2798 | The function reallocates the join buffer of the hashed join cache. |
2799 | After this it initializes a hash table within the buffer space and |
2800 | resets the join cache for writing. |
2801 | |
2802 | NOTES |
2803 | The function assumes that buff_size contains the new value for the join |
2804 | buffer size. |
2805 | |
2806 | RETURN VALUE |
2807 | 0 if the buffer has been successfully reallocated |
2808 | 1 otherwise |
2809 | */ |
2810 | |
2811 | int JOIN_CACHE_HASHED::realloc_buffer() |
2812 | { |
2813 | int rc; |
2814 | free(); |
2815 | rc= MY_TEST(!(buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC)))); |
2816 | init_hash_table(); |
2817 | reset(TRUE); |
2818 | return rc; |
2819 | } |
2820 | |
2821 | |
2822 | /* |
2823 | Get maximum size of the additional space per record used for record keys |
2824 | |
2825 | SYNOPSYS |
2826 | get_max_key_addon_space_per_record() |
2827 | |
2828 | DESCRIPTION |
2829 | The function returns the size of the space occupied by one key entry |
2830 | and one hash table entry. |
2831 | |
2832 | RETURN VALUE |
2833 | maximum size of the additional space per record that is used to store |
2834 | record keys in the hash table |
2835 | */ |
2836 | |
2837 | uint JOIN_CACHE_HASHED::get_max_key_addon_space_per_record() |
2838 | { |
2839 | ulong len; |
2840 | TABLE_REF *ref= &join_tab->ref; |
2841 | /* |
2842 | The total number of hash entries in the hash tables is bounded by |
2843 | ceiling(N/0.7) where N is the maximum number of records in the buffer. |
2844 | That's why the multiplier 2 is used in the formula below. |
2845 | */ |
2846 | len= (use_emb_key ? get_size_of_rec_offset() : ref->key_length) + |
2847 | size_of_rec_ofs + // size of the key chain header |
2848 | size_of_rec_ofs + // >= size of the reference to the next key |
2849 | 2*size_of_rec_ofs; // >= 2*( size of hash table entry) |
2850 | return len; |
2851 | } |
2852 | |
2853 | |
2854 | /* |
2855 | Reset the buffer of a hashed join cache for reading/writing |
2856 | |
2857 | SYNOPSIS |
2858 | reset() |
2859 | for_writing if it's TRUE the function reset the buffer for writing |
2860 | |
2861 | DESCRIPTION |
2862 | This implementation of the virtual function reset() resets the join buffer |
2863 | of the JOIN_CACHE_HASHED class for reading or writing. |
2864 | Additionally to what the default implementation does this function |
2865 | cleans up the hash table allocated within the buffer. |
2866 | |
2867 | RETURN VALUE |
2868 | none |
2869 | */ |
2870 | |
2871 | void JOIN_CACHE_HASHED::reset(bool for_writing) |
2872 | { |
2873 | this->JOIN_CACHE::reset(for_writing); |
2874 | if (for_writing && hash_table) |
2875 | cleanup_hash_table(); |
2876 | curr_key_entry= hash_table; |
2877 | } |
2878 | |
2879 | |
2880 | /* |
2881 | Add a record into the buffer of a hashed join cache |
2882 | |
2883 | SYNOPSIS |
2884 | put_record() |
2885 | |
2886 | DESCRIPTION |
2887 | This implementation of the virtual function put_record writes the next |
2888 | matching record into the join buffer of the JOIN_CACHE_HASHED class. |
2889 | Additionally to what the default implementation does this function |
2890 | performs the following. |
2891 | It extracts from the record the key value used in lookups for matching |
2892 | records and searches for this key in the hash tables from the join cache. |
2893 | If it finds the key in the hash table it joins the record to the chain |
2894 | of records with this key. If the key is not found in the hash table the |
2895 | key is placed into it and a chain containing only the newly added record |
2896 | is attached to the key entry. The key value is either placed in the hash |
2897 | element added for the key or, if the use_emb_key flag is set, remains in |
2898 | the record from the partial join. |
2899 | If the match flag field of a record contains MATCH_IMPOSSIBLE the key is |
2900 | not created for this record. |
2901 | |
2902 | RETURN VALUE |
2903 | TRUE if it has been decided that it should be the last record |
2904 | in the join buffer, |
2905 | FALSE otherwise |
2906 | */ |
2907 | |
2908 | bool JOIN_CACHE_HASHED::put_record() |
2909 | { |
2910 | bool is_full; |
2911 | uchar *key; |
2912 | uint key_len= key_length; |
2913 | uchar *key_ref_ptr; |
2914 | uchar *link= 0; |
2915 | TABLE_REF *ref= &join_tab->ref; |
2916 | uchar *next_ref_ptr= pos; |
2917 | |
2918 | pos+= get_size_of_rec_offset(); |
2919 | /* Write the record into the join buffer */ |
2920 | if (prev_cache) |
2921 | link= prev_cache->get_curr_rec_link(); |
2922 | write_record_data(link, &is_full); |
2923 | |
2924 | if (last_written_is_null_compl) |
2925 | return is_full; |
2926 | |
2927 | if (use_emb_key) |
2928 | key= get_curr_emb_key(); |
2929 | else |
2930 | { |
2931 | /* Build the key over the fields read into the record buffers */ |
2932 | cp_buffer_from_ref(join->thd, join_tab->table, ref); |
2933 | key= ref->key_buff; |
2934 | } |
2935 | |
2936 | /* Look for the key in the hash table */ |
2937 | if (key_search(key, key_len, &key_ref_ptr)) |
2938 | { |
2939 | uchar *last_next_ref_ptr; |
2940 | /* |
2941 | The key is found in the hash table. |
2942 | Add the record to the circular list of the records attached to this key. |
2943 | Below 'rec' is the record to be added into the record chain for the found |
2944 | key, 'key_ref' points to a flatten representation of the st_key_entry |
2945 | structure that contains the key and the head of the record chain. |
2946 | */ |
2947 | last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset()); |
2948 | /* rec->next_rec= key_entry->last_rec->next_rec */ |
2949 | memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset()); |
2950 | /* key_entry->last_rec->next_rec= rec */ |
2951 | store_next_rec_ref(last_next_ref_ptr, next_ref_ptr); |
2952 | /* key_entry->last_rec= rec */ |
2953 | store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr); |
2954 | } |
2955 | else |
2956 | { |
2957 | /* |
2958 | The key is not found in the hash table. |
2959 | Put the key into the join buffer linking it with the keys for the |
2960 | corresponding hash entry. Create a circular list with one element |
2961 | referencing the record and attach the list to the key in the buffer. |
2962 | */ |
2963 | uchar *cp= last_key_entry; |
2964 | cp-= get_size_of_rec_offset()+get_size_of_key_offset(); |
2965 | store_next_key_ref(key_ref_ptr, cp); |
2966 | store_null_key_ref(cp); |
2967 | store_next_rec_ref(next_ref_ptr, next_ref_ptr); |
2968 | store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr); |
2969 | if (use_emb_key) |
2970 | { |
2971 | cp-= get_size_of_rec_offset(); |
2972 | store_emb_key_ref(cp, key); |
2973 | } |
2974 | else |
2975 | { |
2976 | cp-= key_len; |
2977 | memcpy(cp, key, key_len); |
2978 | } |
2979 | last_key_entry= cp; |
2980 | DBUG_ASSERT(last_key_entry >= end_pos); |
2981 | /* Increment the counter of key_entries in the hash table */ |
2982 | key_entries++; |
2983 | } |
2984 | return is_full; |
2985 | } |
2986 | |
2987 | |
2988 | /* |
2989 | Read the next record from the buffer of a hashed join cache |
2990 | |
2991 | SYNOPSIS |
2992 | get_record() |
2993 | |
2994 | DESCRIPTION |
2995 | Additionally to what the default implementation of the virtual |
2996 | function get_record does this implementation skips the link element |
2997 | used to connect the records with the same key into a chain. |
2998 | |
2999 | RETURN VALUE |
3000 | TRUE there are no more records to read from the join buffer |
3001 | FALSE otherwise |
3002 | */ |
3003 | |
3004 | bool JOIN_CACHE_HASHED::get_record() |
3005 | { |
3006 | pos+= get_size_of_rec_offset(); |
3007 | return this->JOIN_CACHE::get_record(); |
3008 | } |
3009 | |
3010 | |
3011 | /* |
3012 | Skip record from a hashed join buffer if its match flag is set to MATCH_FOUND |
3013 | |
3014 | SYNOPSIS |
3015 | skip_if_matched() |
3016 | |
3017 | DESCRIPTION |
3018 | This implementation of the virtual function skip_if_matched does |
3019 | the same as the default implementation does, but it takes into account |
3020 | the link element used to connect the records with the same key into a chain. |
3021 | |
3022 | RETURN VALUE |
3023 | TRUE the match flag is MATCH_FOUND and the record has been skipped |
3024 | FALSE otherwise |
3025 | */ |
3026 | |
3027 | bool JOIN_CACHE_HASHED::skip_if_matched() |
3028 | { |
3029 | uchar *save_pos= pos; |
3030 | pos+= get_size_of_rec_offset(); |
3031 | if (!this->JOIN_CACHE::skip_if_matched()) |
3032 | { |
3033 | pos= save_pos; |
3034 | return FALSE; |
3035 | } |
3036 | return TRUE; |
3037 | } |
3038 | |
3039 | |
3040 | /* |
3041 | Skip record from a hashed join buffer if its match flag dictates to do so |
3042 | |
3043 | SYNOPSIS |
3044 | skip_if_uneeded_match() |
3045 | |
3046 | DESCRIPTION |
3047 | This implementation of the virtual function skip_if_not_needed_match does |
3048 | the same as the default implementation does, but it takes into account |
3049 | the link element used to connect the records with the same key into a chain. |
3050 | |
3051 | RETURN VALUE |
3052 | TRUE the match flag dictates to skip the record |
3053 | FALSE the match flag is off |
3054 | */ |
3055 | |
3056 | bool JOIN_CACHE_HASHED::skip_if_not_needed_match() |
3057 | { |
3058 | uchar *save_pos= pos; |
3059 | pos+= get_size_of_rec_offset(); |
3060 | if (!this->JOIN_CACHE::skip_if_not_needed_match()) |
3061 | { |
3062 | pos= save_pos; |
3063 | return FALSE; |
3064 | } |
3065 | return TRUE; |
3066 | } |
3067 | |
3068 | |
3069 | /* |
3070 | Search for a key in the hash table of the join buffer |
3071 | |
3072 | SYNOPSIS |
3073 | key_search() |
3074 | key pointer to the key value |
3075 | key_len key value length |
3076 | key_ref_ptr OUT position of the reference to the next key from |
3077 | the hash element for the found key , or |
3078 | a position where the reference to the the hash |
3079 | element for the key is to be added in the |
3080 | case when the key has not been found |
3081 | |
3082 | DESCRIPTION |
3083 | The function looks for a key in the hash table of the join buffer. |
3084 | If the key is found the functionreturns the position of the reference |
3085 | to the next key from to the hash element for the given key. |
3086 | Otherwise the function returns the position where the reference to the |
3087 | newly created hash element for the given key is to be added. |
3088 | |
3089 | RETURN VALUE |
3090 | TRUE the key is found in the hash table |
3091 | FALSE otherwise |
3092 | */ |
3093 | |
3094 | bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len, |
3095 | uchar **key_ref_ptr) |
3096 | { |
3097 | bool is_found= FALSE; |
3098 | uint idx= (this->*hash_func)(key, key_length); |
3099 | uchar *ref_ptr= hash_table+size_of_key_ofs*idx; |
3100 | while (!is_null_key_ref(ref_ptr)) |
3101 | { |
3102 | uchar *next_key; |
3103 | ref_ptr= get_next_key_ref(ref_ptr); |
3104 | next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) : |
3105 | ref_ptr-key_length; |
3106 | |
3107 | if ((this->*hash_cmp_func)(next_key, key, key_len)) |
3108 | { |
3109 | is_found= TRUE; |
3110 | break; |
3111 | } |
3112 | } |
3113 | *key_ref_ptr= ref_ptr; |
3114 | return is_found; |
3115 | } |
3116 | |
3117 | |
3118 | /* |
3119 | Hash function that considers a key in the hash table as byte array |
3120 | |
3121 | SYNOPSIS |
3122 | get_hash_idx_simple() |
3123 | key pointer to the key value |
3124 | key_len key value length |
3125 | |
3126 | DESCRIPTION |
3127 | The function calculates an index of the hash entry in the hash table |
3128 | of the join buffer for the given key. It considers the key just as |
3129 | a sequence of bytes of the length key_len. |
3130 | |
3131 | RETURN VALUE |
3132 | the calculated index of the hash entry for the given key |
3133 | */ |
3134 | |
3135 | inline |
3136 | uint JOIN_CACHE_HASHED::get_hash_idx_simple(uchar* key, uint key_len) |
3137 | { |
3138 | ulong nr= 1; |
3139 | ulong nr2= 4; |
3140 | uchar *pos= key; |
3141 | uchar *end= key+key_len; |
3142 | for (; pos < end ; pos++) |
3143 | { |
3144 | nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8); |
3145 | nr2+= 3; |
3146 | } |
3147 | return nr % hash_entries; |
3148 | } |
3149 | |
3150 | |
3151 | /* |
3152 | Hash function that takes into account collations of the components of the key |
3153 | |
3154 | SYNOPSIS |
3155 | get_hash_idx_complex() |
3156 | key pointer to the key value |
3157 | key_len key value length |
3158 | |
3159 | DESCRIPTION |
3160 | The function calculates an index of the hash entry in the hash table |
3161 | of the join buffer for the given key. It takes into account that the |
3162 | components of the key may be of a varchar type with different collations. |
3163 | The function guarantees that the same hash value for any two equal |
3164 | keys that may differ as byte sequences. |
3165 | The function takes the info about the components of the key, their |
3166 | types and used collations from the class member ref_key_info containing |
3167 | a pointer to the descriptor of the index that can be used for the join |
3168 | operation. |
3169 | |
3170 | RETURN VALUE |
3171 | the calculated index of the hash entry for the given key |
3172 | */ |
3173 | |
3174 | inline |
3175 | uint JOIN_CACHE_HASHED::get_hash_idx_complex(uchar *key, uint key_len) |
3176 | { |
3177 | return |
3178 | (uint) (key_hashnr(ref_key_info, ref_used_key_parts, key) % hash_entries); |
3179 | } |
3180 | |
3181 | |
3182 | /* |
3183 | Compare two key entries in the hash table as sequence of bytes |
3184 | |
3185 | SYNOPSIS |
3186 | equal_keys_simple() |
3187 | key1 pointer to the first key entry |
3188 | key2 pointer to the second key entry |
3189 | key_len the length of the key values |
3190 | |
3191 | DESCRIPTION |
3192 | The function compares two key entries in the hash table key1 and key2 |
3193 | as two sequences bytes of the length key_len |
3194 | |
3195 | RETURN VALUE |
3196 | TRUE key1 coincides with key2 |
3197 | FALSE otherwise |
3198 | */ |
3199 | |
3200 | inline |
3201 | bool JOIN_CACHE_HASHED::equal_keys_simple(uchar *key1, uchar *key2, |
3202 | uint key_len) |
3203 | { |
3204 | return memcmp(key1, key2, key_len) == 0; |
3205 | } |
3206 | |
3207 | |
3208 | /* |
3209 | Compare two key entries taking into account the used collation |
3210 | |
3211 | SYNOPSIS |
3212 | equal_keys_complex() |
3213 | key1 pointer to the first key entry |
3214 | key2 pointer to the second key entry |
3215 | key_len the length of the key values |
3216 | |
3217 | DESCRIPTION |
3218 | The function checks whether two key entries in the hash table |
3219 | key1 and key2 are equal as, possibly, compound keys of a certain |
3220 | structure whose components may be of a varchar type and may |
3221 | employ different collations. |
3222 | The descriptor of the key structure is taken from the class |
3223 | member ref_key_info. |
3224 | |
3225 | RETURN VALUE |
3226 | TRUE key1 is equal tokey2 |
3227 | FALSE otherwise |
3228 | */ |
3229 | |
3230 | inline |
3231 | bool JOIN_CACHE_HASHED::equal_keys_complex(uchar *key1, uchar *key2, |
3232 | uint key_len) |
3233 | { |
3234 | return key_buf_cmp(ref_key_info, ref_used_key_parts, key1, key2) == 0; |
3235 | } |
3236 | |
3237 | |
3238 | /* |
3239 | Clean up the hash table of the join buffer |
3240 | |
3241 | SYNOPSIS |
3242 | cleanup_hash_table() |
3243 | key pointer to the key value |
3244 | key_len key value length |
3245 | |
3246 | DESCRIPTION |
3247 | The function cleans up the hash table in the join buffer removing all |
3248 | hash elements from the table. |
3249 | |
3250 | RETURN VALUE |
3251 | none |
3252 | */ |
3253 | |
3254 | void JOIN_CACHE_HASHED:: cleanup_hash_table() |
3255 | { |
3256 | last_key_entry= hash_table; |
3257 | bzero(hash_table, (buff+buff_size)-hash_table); |
3258 | key_entries= 0; |
3259 | } |
3260 | |
3261 | |
3262 | /* |
3263 | Check whether all records in a key chain have their match flags set on |
3264 | |
3265 | SYNOPSIS |
3266 | check_all_match_flags_for_key() |
3267 | key_chain_ptr |
3268 | |
3269 | DESCRIPTION |
3270 | This function retrieves records in the given circular chain and checks |
3271 | whether their match flags are set on. The parameter key_chain_ptr shall |
3272 | point to the position in the join buffer storing the reference to the |
3273 | last element of this chain. |
3274 | |
3275 | RETURN VALUE |
3276 | TRUE if each retrieved record has its match flag set to MATCH_FOUND |
3277 | FALSE otherwise |
3278 | */ |
3279 | |
3280 | bool JOIN_CACHE_HASHED::check_all_match_flags_for_key(uchar *key_chain_ptr) |
3281 | { |
3282 | uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr); |
3283 | uchar *next_rec_ref_ptr= last_rec_ref_ptr; |
3284 | do |
3285 | { |
3286 | next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr); |
3287 | uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset; |
3288 | if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND) |
3289 | return FALSE; |
3290 | } |
3291 | while (next_rec_ref_ptr != last_rec_ref_ptr); |
3292 | return TRUE; |
3293 | } |
3294 | |
3295 | |
3296 | /* |
3297 | Get the next key built for the records from the buffer of a hashed join cache |
3298 | |
3299 | SYNOPSIS |
3300 | get_next_key() |
3301 | key pointer to the buffer where the key value is to be placed |
3302 | |
3303 | DESCRIPTION |
3304 | The function reads the next key value stored in the hash table of the |
3305 | join buffer. Depending on the value of the use_emb_key flag of the |
3306 | join cache the value is read either from the table itself or from |
3307 | the record field where it occurs. |
3308 | |
3309 | RETURN VALUE |
3310 | length of the key value - if the starting value of 'cur_key_entry' refers |
3311 | to the position after that referred by the the value of 'last_key_entry', |
3312 | 0 - otherwise. |
3313 | */ |
3314 | |
3315 | uint JOIN_CACHE_HASHED::get_next_key(uchar ** key) |
3316 | { |
3317 | if (curr_key_entry == last_key_entry) |
3318 | return 0; |
3319 | |
3320 | curr_key_entry-= key_entry_length; |
3321 | |
3322 | *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry; |
3323 | |
3324 | DBUG_ASSERT(*key >= buff && *key < hash_table); |
3325 | |
3326 | return key_length; |
3327 | } |
3328 | |
3329 | |
3330 | /* |
3331 | Initiate an iteration process over records in the joined table |
3332 | |
3333 | SYNOPSIS |
3334 | open() |
3335 | |
3336 | DESCRIPTION |
3337 | The function initiates the process of iteration over records from the |
3338 | joined table recurrently performed by the BNL/BKLH join algorithm. |
3339 | |
3340 | RETURN VALUE |
3341 | 0 the initiation is a success |
3342 | error code otherwise |
3343 | */ |
3344 | |
3345 | int JOIN_TAB_SCAN::open() |
3346 | { |
3347 | save_or_restore_used_tabs(join_tab, FALSE); |
3348 | is_first_record= TRUE; |
3349 | join_tab->tracker->r_scans++; |
3350 | return join_init_read_record(join_tab); |
3351 | } |
3352 | |
3353 | |
3354 | /* |
3355 | Read the next record that can match while scanning the joined table |
3356 | |
3357 | SYNOPSIS |
3358 | next() |
3359 | |
3360 | DESCRIPTION |
3361 | The function reads the next record from the joined table that can |
3362 | match some records in the buffer of the join cache 'cache'. To do |
3363 | this the function calls the function that scans table records and |
3364 | looks for the next one that meets the condition pushed to the |
3365 | joined table join_tab. |
3366 | |
3367 | NOTES |
3368 | The function catches the signal that kills the query. |
3369 | |
3370 | RETURN VALUE |
3371 | 0 the next record exists and has been successfully read |
3372 | error code otherwise |
3373 | */ |
3374 | |
3375 | int JOIN_TAB_SCAN::next() |
3376 | { |
3377 | int err= 0; |
3378 | int skip_rc; |
3379 | READ_RECORD *info= &join_tab->read_record; |
3380 | SQL_SELECT *select= join_tab->cache_select; |
3381 | THD *thd= join->thd; |
3382 | |
3383 | if (is_first_record) |
3384 | is_first_record= FALSE; |
3385 | else |
3386 | err= info->read_record(); |
3387 | |
3388 | if (!err) |
3389 | { |
3390 | join_tab->tracker->r_rows++; |
3391 | } |
3392 | |
3393 | while (!err && select && (skip_rc= select->skip_record(thd)) <= 0) |
3394 | { |
3395 | if (unlikely(thd->check_killed()) || skip_rc < 0) |
3396 | return 1; |
3397 | /* |
3398 | Move to the next record if the last retrieved record does not |
3399 | meet the condition pushed to the table join_tab. |
3400 | */ |
3401 | err= info->read_record(); |
3402 | if (!err) |
3403 | { |
3404 | join_tab->tracker->r_rows++; |
3405 | } |
3406 | } |
3407 | |
3408 | if (!err) |
3409 | join_tab->tracker->r_rows_after_where++; |
3410 | return err; |
3411 | } |
3412 | |
3413 | |
3414 | /* |
3415 | Walk back in join order from join_tab until we encounter a join tab with |
3416 | tab->cache!=NULL, and save/restore tab->table->status along the way. |
3417 | |
3418 | @param save TRUE save |
3419 | FALSE restore |
3420 | */ |
3421 | |
3422 | static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save) |
3423 | { |
3424 | JOIN_TAB *first= join_tab->bush_root_tab? |
3425 | join_tab->bush_root_tab->bush_children->start : |
3426 | join_tab->join->join_tab + join_tab->join->const_tables; |
3427 | |
3428 | for (JOIN_TAB *tab= join_tab-1; tab != first && !tab->cache; tab--) |
3429 | { |
3430 | if (tab->bush_children) |
3431 | { |
3432 | for (JOIN_TAB *child= tab->bush_children->start; |
3433 | child != tab->bush_children->end; |
3434 | child++) |
3435 | { |
3436 | if (save) |
3437 | child->table->status= child->status; |
3438 | else |
3439 | { |
3440 | tab->status= tab->table->status; |
3441 | tab->table->status= 0; |
3442 | } |
3443 | } |
3444 | } |
3445 | |
3446 | if (save) |
3447 | tab->table->status= tab->status; |
3448 | else |
3449 | { |
3450 | tab->status= tab->table->status; |
3451 | tab->table->status= 0; |
3452 | } |
3453 | } |
3454 | } |
3455 | |
3456 | |
3457 | /* |
3458 | Perform finalizing actions for a scan over the table records |
3459 | |
3460 | SYNOPSIS |
3461 | close() |
3462 | |
3463 | DESCRIPTION |
3464 | The function performs the necessary restoring actions after |
3465 | the table scan over the joined table has been finished. |
3466 | |
3467 | RETURN VALUE |
3468 | none |
3469 | */ |
3470 | |
3471 | void JOIN_TAB_SCAN::close() |
3472 | { |
3473 | save_or_restore_used_tabs(join_tab, TRUE); |
3474 | } |
3475 | |
3476 | |
3477 | /* |
3478 | Prepare to iterate over the BNL join cache buffer to look for matches |
3479 | |
3480 | SYNOPSIS |
3481 | prepare_look_for_matches() |
3482 | skip_last <-> ignore the last record in the buffer |
3483 | |
3484 | DESCRIPTION |
3485 | The function prepares the join cache for an iteration over the |
3486 | records in the join buffer. The iteration is performed when looking |
3487 | for matches for the record from the joined table join_tab that |
3488 | has been placed into the record buffer of the joined table. |
3489 | If the value of the parameter skip_last is TRUE then the last |
3490 | record from the join buffer is ignored. |
3491 | The function initializes the counter of the records that have been |
3492 | not iterated over yet. |
3493 | |
3494 | RETURN VALUE |
3495 | TRUE there are no records in the buffer to iterate over |
3496 | FALSE otherwise |
3497 | */ |
3498 | |
3499 | bool JOIN_CACHE_BNL::prepare_look_for_matches(bool skip_last) |
3500 | { |
3501 | if (!records) |
3502 | return TRUE; |
3503 | reset(FALSE); |
3504 | rem_records= (uint)records - MY_TEST(skip_last); |
3505 | return rem_records == 0; |
3506 | } |
3507 | |
3508 | |
3509 | /* |
3510 | Get next record from the BNL join cache buffer when looking for matches |
3511 | |
3512 | SYNOPSIS |
3513 | get_next_candidate_for_match |
3514 | |
3515 | DESCRIPTION |
3516 | This method is used for iterations over the records from the join |
3517 | cache buffer when looking for matches for records from join_tab. |
3518 | The methods performs the necessary preparations to read the next record |
3519 | from the join buffer into the record buffer by the method |
3520 | read_next_candidate_for_match, or, to skip the next record from the join |
3521 | buffer by the method skip_recurrent_candidate_for_match. |
3522 | This implementation of the virtual method get_next_candidate_for_match |
3523 | just decrements the counter of the records that are to be iterated over |
3524 | and returns the current value of the cursor 'pos' as the position of |
3525 | the record to be processed. |
3526 | |
3527 | RETURN VALUE |
3528 | pointer to the position right after the prefix of the current record |
3529 | in the join buffer if the there is another record to iterate over, |
3530 | 0 - otherwise. |
3531 | */ |
3532 | |
3533 | uchar *JOIN_CACHE_BNL::get_next_candidate_for_match() |
3534 | { |
3535 | if (!rem_records) |
3536 | return 0; |
3537 | rem_records--; |
3538 | return pos+base_prefix_length; |
3539 | } |
3540 | |
3541 | |
3542 | /* |
3543 | Check whether the matching record from the BNL cache is to be skipped |
3544 | |
3545 | SYNOPSIS |
3546 | skip_next_candidate_for_match |
3547 | rec_ptr pointer to the position in the join buffer right after the prefix |
3548 | of the current record |
3549 | |
3550 | DESCRIPTION |
3551 | This implementation of the virtual function just calls the |
3552 | method skip_if_not_needed_match to check whether the record referenced by |
3553 | ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the |
3554 | first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and |
3555 | join_tab is the first inner table of an outer join. |
3556 | If so, the function just skips this record setting the value of the |
3557 | cursor 'pos' to the position right after it. |
3558 | |
3559 | RETURN VALUE |
3560 | TRUE the record referenced by rec_ptr has been skipped |
3561 | FALSE otherwise |
3562 | */ |
3563 | |
3564 | bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr) |
3565 | { |
3566 | pos= rec_ptr-base_prefix_length; |
3567 | return skip_if_not_needed_match(); |
3568 | } |
3569 | |
3570 | |
3571 | /* |
3572 | Read next record from the BNL join cache buffer when looking for matches |
3573 | |
3574 | SYNOPSIS |
3575 | read_next_candidate_for_match |
3576 | rec_ptr pointer to the position in the join buffer right after the prefix |
3577 | the current record. |
3578 | |
3579 | DESCRIPTION |
3580 | This implementation of the virtual method read_next_candidate_for_match |
3581 | calls the method get_record to read the record referenced by rec_ptr from |
3582 | the join buffer into the record buffer. If this record refers to the |
3583 | fields in the other join buffers the call of get_record ensures that |
3584 | these fields are read into the corresponding record buffers as well. |
3585 | This function is supposed to be called after a successful call of |
3586 | the method get_next_candidate_for_match. |
3587 | |
3588 | RETURN VALUE |
3589 | none |
3590 | */ |
3591 | |
3592 | void JOIN_CACHE_BNL::read_next_candidate_for_match(uchar *rec_ptr) |
3593 | { |
3594 | pos= rec_ptr-base_prefix_length; |
3595 | get_record(); |
3596 | } |
3597 | |
3598 | |
3599 | /* |
3600 | Initialize the BNL join cache |
3601 | |
3602 | SYNOPSIS |
3603 | init |
3604 | for_explain join buffer is initialized for explain only |
3605 | |
3606 | DESCRIPTION |
3607 | The function initializes the cache structure. It is supposed to be called |
3608 | right after a constructor for the JOIN_CACHE_BNL. |
3609 | |
3610 | NOTES |
3611 | The function first constructs a companion object of the type JOIN_TAB_SCAN, |
3612 | then it calls the init method of the parent class. |
3613 | |
3614 | RETURN VALUE |
3615 | 0 initialization with buffer allocations has been succeeded |
3616 | 1 otherwise |
3617 | */ |
3618 | |
3619 | int JOIN_CACHE_BNL::init(bool for_explain) |
3620 | { |
3621 | DBUG_ENTER("JOIN_CACHE_BNL::init" ); |
3622 | |
3623 | if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab))) |
3624 | DBUG_RETURN(1); |
3625 | |
3626 | DBUG_RETURN(JOIN_CACHE::init(for_explain)); |
3627 | } |
3628 | |
3629 | |
3630 | /* |
3631 | Get the chain of records from buffer matching the current candidate for join |
3632 | |
3633 | SYNOPSIS |
3634 | get_matching_chain_by_join_key() |
3635 | |
3636 | DESCRIPTION |
3637 | This function first build a join key for the record of join_tab that |
3638 | currently is in the join buffer for this table. Then it looks for |
3639 | the key entry with this key in the hash table of the join cache. |
3640 | If such a key entry is found the function returns the pointer to |
3641 | the head of the chain of records in the join_buffer that match this |
3642 | key. |
3643 | |
3644 | RETURN VALUE |
3645 | The pointer to the corresponding circular list of records if |
3646 | the key entry with the join key is found, 0 - otherwise. |
3647 | */ |
3648 | |
3649 | uchar *JOIN_CACHE_BNLH::get_matching_chain_by_join_key() |
3650 | { |
3651 | uchar *key_ref_ptr; |
3652 | TABLE *table= join_tab->table; |
3653 | TABLE_REF *ref= &join_tab->ref; |
3654 | KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key); |
3655 | /* Build the join key value out of the record in the record buffer */ |
3656 | key_copy(key_buff, table->record[0], keyinfo, key_length, TRUE); |
3657 | /* Look for this key in the join buffer */ |
3658 | if (!key_search(key_buff, key_length, &key_ref_ptr)) |
3659 | return 0; |
3660 | return key_ref_ptr+get_size_of_key_offset(); |
3661 | } |
3662 | |
3663 | |
3664 | /* |
3665 | Prepare to iterate over the BNLH join cache buffer to look for matches |
3666 | |
3667 | SYNOPSIS |
3668 | prepare_look_for_matches() |
3669 | skip_last <-> ignore the last record in the buffer |
3670 | |
3671 | DESCRIPTION |
3672 | The function prepares the join cache for an iteration over the |
3673 | records in the join buffer. The iteration is performed when looking |
3674 | for matches for the record from the joined table join_tab that |
3675 | has been placed into the record buffer of the joined table. |
3676 | If the value of the parameter skip_last is TRUE then the last |
3677 | record from the join buffer is ignored. |
3678 | The function builds the hashed key from the join fields of join_tab |
3679 | and uses this key to look in the hash table of the join cache for |
3680 | the chain of matching records in in the join buffer. If it finds |
3681 | such a chain it sets the member last_rec_ref_ptr to point to the |
3682 | last link of the chain while setting the member next_rec_ref_po 0. |
3683 | |
3684 | RETURN VALUE |
3685 | TRUE there are no matching records in the buffer to iterate over |
3686 | FALSE otherwise |
3687 | */ |
3688 | |
3689 | bool JOIN_CACHE_BNLH::prepare_look_for_matches(bool skip_last) |
3690 | { |
3691 | uchar *curr_matching_chain; |
3692 | last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0; |
3693 | if (!(curr_matching_chain= get_matching_chain_by_join_key())) |
3694 | return 1; |
3695 | last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain); |
3696 | return 0; |
3697 | } |
3698 | |
3699 | |
3700 | /* |
3701 | Get next record from the BNLH join cache buffer when looking for matches |
3702 | |
3703 | SYNOPSIS |
3704 | get_next_candidate_for_match |
3705 | |
3706 | DESCRIPTION |
3707 | This method is used for iterations over the records from the join |
3708 | cache buffer when looking for matches for records from join_tab. |
3709 | The methods performs the necessary preparations to read the next record |
3710 | from the join buffer into the record buffer by the method |
3711 | read_next_candidate_for_match, or, to skip the next record from the join |
3712 | buffer by the method skip_next_candidate_for_match. |
3713 | This implementation of the virtual method moves to the next record |
3714 | in the chain of all records from the join buffer that are to be |
3715 | equi-joined with the current record from join_tab. |
3716 | |
3717 | RETURN VALUE |
3718 | pointer to the beginning of the record fields in the join buffer |
3719 | if the there is another record to iterate over, 0 - otherwise. |
3720 | */ |
3721 | |
3722 | uchar *JOIN_CACHE_BNLH::get_next_candidate_for_match() |
3723 | { |
3724 | if (next_matching_rec_ref_ptr == last_matching_rec_ref_ptr) |
3725 | return 0; |
3726 | next_matching_rec_ref_ptr= get_next_rec_ref(next_matching_rec_ref_ptr ? |
3727 | next_matching_rec_ref_ptr : |
3728 | last_matching_rec_ref_ptr); |
3729 | return next_matching_rec_ref_ptr+rec_fields_offset; |
3730 | } |
3731 | |
3732 | |
3733 | /* |
3734 | Check whether the matching record from the BNLH cache is to be skipped |
3735 | |
3736 | SYNOPSIS |
3737 | skip_next_candidate_for_match |
3738 | rec_ptr pointer to the position in the join buffer right after |
3739 | the previous record |
3740 | |
3741 | DESCRIPTION |
3742 | This implementation of the virtual function just calls the |
3743 | method get_match_flag_by_pos to check whether the record referenced |
3744 | by ref_ptr has its match flag set to MATCH_FOUND. |
3745 | |
3746 | RETURN VALUE |
3747 | TRUE the record referenced by rec_ptr has its match flag set to |
3748 | MATCH_FOUND |
3749 | FALSE otherwise |
3750 | */ |
3751 | |
3752 | bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr) |
3753 | { |
3754 | return join_tab->check_only_first_match() && |
3755 | (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); |
3756 | } |
3757 | |
3758 | |
3759 | /* |
3760 | Read next record from the BNLH join cache buffer when looking for matches |
3761 | |
3762 | SYNOPSIS |
3763 | read_next_candidate_for_match |
3764 | rec_ptr pointer to the position in the join buffer right after |
3765 | the previous record |
3766 | |
3767 | DESCRIPTION |
3768 | This implementation of the virtual method read_next_candidate_for_match |
3769 | calls the method get_record_by_pos to read the record referenced by rec_ptr |
3770 | from the join buffer into the record buffer. If this record refers to |
3771 | fields in the other join buffers the call of get_record_by_po ensures that |
3772 | these fields are read into the corresponding record buffers as well. |
3773 | This function is supposed to be called after a successful call of |
3774 | the method get_next_candidate_for_match. |
3775 | |
3776 | RETURN VALUE |
3777 | none |
3778 | */ |
3779 | |
3780 | void JOIN_CACHE_BNLH::read_next_candidate_for_match(uchar *rec_ptr) |
3781 | { |
3782 | get_record_by_pos(rec_ptr); |
3783 | } |
3784 | |
3785 | |
3786 | /* |
3787 | Initialize the BNLH join cache |
3788 | |
3789 | SYNOPSIS |
3790 | init |
3791 | for_explain join buffer is initialized for explain only |
3792 | |
3793 | DESCRIPTION |
3794 | The function initializes the cache structure. It is supposed to be called |
3795 | right after a constructor for the JOIN_CACHE_BNLH. |
3796 | |
3797 | NOTES |
3798 | The function first constructs a companion object of the type JOIN_TAB_SCAN, |
3799 | then it calls the init method of the parent class. |
3800 | |
3801 | RETURN VALUE |
3802 | 0 initialization with buffer allocations has been succeeded |
3803 | 1 otherwise |
3804 | */ |
3805 | |
3806 | int JOIN_CACHE_BNLH::init(bool for_explain) |
3807 | { |
3808 | DBUG_ENTER("JOIN_CACHE_BNLH::init" ); |
3809 | |
3810 | if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab))) |
3811 | DBUG_RETURN(1); |
3812 | |
3813 | DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain)); |
3814 | } |
3815 | |
3816 | |
3817 | /* |
3818 | Calculate the increment of the MRR buffer for a record write |
3819 | |
3820 | SYNOPSIS |
3821 | aux_buffer_incr() |
3822 | |
3823 | DESCRIPTION |
3824 | This implementation of the virtual function aux_buffer_incr determines |
3825 | for how much the size of the MRR buffer should be increased when another |
3826 | record is added to the cache. |
3827 | |
3828 | RETURN VALUE |
3829 | the increment of the size of the MRR buffer for the next record |
3830 | */ |
3831 | |
3832 | uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(size_t recno) |
3833 | { |
3834 | uint incr= 0; |
3835 | TABLE_REF *ref= &join_tab->ref; |
3836 | TABLE *tab= join_tab->table; |
3837 | ha_rows rec_per_key= |
3838 | (ha_rows) tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-1); |
3839 | set_if_bigger(rec_per_key, 1); |
3840 | if (recno == 1) |
3841 | incr= ref->key_length + tab->file->ref_length; |
3842 | incr+= (uint)(tab->file->stats.mrr_length_per_rec * rec_per_key); |
3843 | return incr; |
3844 | } |
3845 | |
3846 | |
3847 | /* |
3848 | Initiate iteration over records returned by MRR for the current join buffer |
3849 | |
3850 | SYNOPSIS |
3851 | open() |
3852 | |
3853 | DESCRIPTION |
3854 | The function initiates the process of iteration over the records from |
3855 | join_tab returned by the MRR interface functions for records from |
3856 | the join buffer. Such an iteration is performed by the BKA/BKAH join |
3857 | algorithm for each new refill of the join buffer. |
3858 | The function calls the MRR handler function multi_range_read_init to |
3859 | initiate this process. |
3860 | |
3861 | RETURN VALUE |
3862 | 0 the initiation is a success |
3863 | error code otherwise |
3864 | */ |
3865 | |
3866 | int JOIN_TAB_SCAN_MRR::open() |
3867 | { |
3868 | handler *file= join_tab->table->file; |
3869 | |
3870 | join_tab->table->null_row= 0; |
3871 | |
3872 | |
3873 | /* Dynamic range access is never used with BKA */ |
3874 | DBUG_ASSERT(join_tab->use_quick != 2); |
3875 | |
3876 | join_tab->tracker->r_scans++; |
3877 | save_or_restore_used_tabs(join_tab, FALSE); |
3878 | |
3879 | init_mrr_buff(); |
3880 | |
3881 | /* |
3882 | Prepare to iterate over keys from the join buffer and to get |
3883 | matching candidates obtained with MMR handler functions. |
3884 | */ |
3885 | if (!file->inited) |
3886 | file->ha_index_init(join_tab->ref.key, 1); |
3887 | ranges= cache->get_number_of_ranges_for_mrr(); |
3888 | if (!join_tab->cache_idx_cond) |
3889 | range_seq_funcs.skip_index_tuple= 0; |
3890 | return file->multi_range_read_init(&range_seq_funcs, (void*) cache, |
3891 | ranges, mrr_mode, &mrr_buff); |
3892 | } |
3893 | |
3894 | |
3895 | /* |
3896 | Read the next record returned by MRR for the current join buffer |
3897 | |
3898 | SYNOPSIS |
3899 | next() |
3900 | |
3901 | DESCRIPTION |
3902 | The function reads the next record from the joined table join_tab |
3903 | returned by the MRR handler function multi_range_read_next for |
3904 | the current refill of the join buffer. The record is read into |
3905 | the record buffer used for join_tab records in join operations. |
3906 | |
3907 | RETURN VALUE |
3908 | 0 the next record exists and has been successfully read |
3909 | error code otherwise |
3910 | */ |
3911 | |
3912 | int JOIN_TAB_SCAN_MRR::next() |
3913 | { |
3914 | char **ptr= (char **) cache->get_curr_association_ptr(); |
3915 | |
3916 | DBUG_ASSERT(sizeof(range_id_t) == sizeof(*ptr)); |
3917 | int rc= join_tab->table->file->multi_range_read_next((range_id_t*)ptr) ? -1 : 0; |
3918 | if (!rc) |
3919 | { |
3920 | join_tab->tracker->r_rows++; |
3921 | join_tab->tracker->r_rows_after_where++; |
3922 | /* |
3923 | If a record in in an incremental cache contains no fields then the |
3924 | association for the last record in cache will be equal to cache->end_pos |
3925 | */ |
3926 | /* |
3927 | psergey: this makes no sense where HA_MRR_NO_ASSOC is used. |
3928 | DBUG_ASSERT(cache->buff <= (uchar *) (*ptr) && |
3929 | (uchar *) (*ptr) <= cache->end_pos); |
3930 | */ |
3931 | } |
3932 | return rc; |
3933 | } |
3934 | |
3935 | |
3936 | static |
3937 | void bka_range_seq_key_info(void *init_params, uint *length, |
3938 | key_part_map *map) |
3939 | { |
3940 | TABLE_REF *ref= &(((JOIN_CACHE*)init_params)->join_tab->ref); |
3941 | *length= ref->key_length; |
3942 | *map= (key_part_map(1) << ref->key_parts) - 1; |
3943 | } |
3944 | |
3945 | |
3946 | /* |
3947 | Initialize retrieval of range sequence for BKA join algorithm |
3948 | |
3949 | SYNOPSIS |
3950 | bka_range_seq_init() |
3951 | init_params pointer to the BKA join cache object |
3952 | n_ranges the number of ranges obtained |
3953 | flags combination of MRR flags |
3954 | |
3955 | DESCRIPTION |
3956 | The function interprets init_param as a pointer to a JOIN_CACHE_BKA |
3957 | object. The function prepares for an iteration over the join keys |
3958 | built for all records from the cache join buffer. |
3959 | |
3960 | NOTE |
3961 | This function are used only as a callback function. |
3962 | |
3963 | RETURN VALUE |
3964 | init_param value that is to be used as a parameter of bka_range_seq_next() |
3965 | */ |
3966 | |
3967 | static |
3968 | range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags) |
3969 | { |
3970 | DBUG_ENTER("bka_range_seq_init" ); |
3971 | JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param; |
3972 | cache->reset(0); |
3973 | DBUG_RETURN((range_seq_t) init_param); |
3974 | } |
3975 | |
3976 | |
3977 | /* |
3978 | Get the next range/key over records from the join buffer used by a BKA cache |
3979 | |
3980 | SYNOPSIS |
3981 | bka_range_seq_next() |
3982 | seq the value returned by bka_range_seq_init |
3983 | range OUT reference to the next range |
3984 | |
3985 | DESCRIPTION |
3986 | The function interprets seq as a pointer to a JOIN_CACHE_BKA |
3987 | object. The function returns a pointer to the range descriptor |
3988 | for the key built over the next record from the join buffer. |
3989 | |
3990 | NOTE |
3991 | This function are used only as a callback function. |
3992 | |
3993 | RETURN VALUE |
3994 | FALSE ok, the range structure filled with info about the next range/key |
3995 | TRUE no more ranges |
3996 | */ |
3997 | |
3998 | static |
3999 | bool bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) |
4000 | { |
4001 | DBUG_ENTER("bka_range_seq_next" ); |
4002 | JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; |
4003 | TABLE_REF *ref= &cache->join_tab->ref; |
4004 | key_range *start_key= &range->start_key; |
4005 | if ((start_key->length= cache->get_next_key((uchar **) &start_key->key))) |
4006 | { |
4007 | start_key->keypart_map= (1 << ref->key_parts) - 1; |
4008 | start_key->flag= HA_READ_KEY_EXACT; |
4009 | range->end_key= *start_key; |
4010 | range->end_key.flag= HA_READ_AFTER_KEY; |
4011 | range->ptr= (char *) cache->get_curr_rec(); |
4012 | range->range_flag= EQ_RANGE; |
4013 | DBUG_RETURN(0); |
4014 | } |
4015 | DBUG_RETURN(1); |
4016 | } |
4017 | |
4018 | |
4019 | /* |
4020 | Check whether range_info orders to skip the next record from BKA buffer |
4021 | |
4022 | SYNOPSIS |
4023 | bka_range_seq_skip_record() |
4024 | seq value returned by bka_range_seq_init() |
4025 | range_info information about the next range |
4026 | rowid [NOT USED] rowid of the record to be checked |
4027 | |
4028 | |
4029 | DESCRIPTION |
4030 | The function interprets seq as a pointer to a JOIN_CACHE_BKA object. |
4031 | The function returns TRUE if the record with this range_info |
4032 | is to be filtered out from the stream of records returned by |
4033 | multi_range_read_next(). |
4034 | |
4035 | NOTE |
4036 | This function are used only as a callback function. |
4037 | |
4038 | RETURN VALUE |
4039 | 1 record with this range_info is to be filtered out from the stream |
4040 | of records returned by multi_range_read_next() |
4041 | 0 the record is to be left in the stream |
4042 | */ |
4043 | |
4044 | static |
4045 | bool bka_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid) |
4046 | { |
4047 | DBUG_ENTER("bka_range_seq_skip_record" ); |
4048 | JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; |
4049 | bool res= cache->get_match_flag_by_pos((uchar *) range_info) == |
4050 | JOIN_CACHE::MATCH_FOUND; |
4051 | DBUG_RETURN(res); |
4052 | } |
4053 | |
4054 | |
4055 | /* |
4056 | Check if the record combination from BKA cache matches the index condition |
4057 | |
4058 | SYNOPSIS |
4059 | bka_skip_index_tuple() |
4060 | rseq value returned by bka_range_seq_init() |
4061 | range_info record chain for the next range/key returned by MRR |
4062 | |
4063 | DESCRIPTION |
4064 | This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method, |
4065 | see comments there. |
4066 | |
4067 | NOTE |
4068 | This function is used as a RANGE_SEQ_IF::skip_index_tuple callback. |
4069 | |
4070 | RETURN VALUE |
4071 | 0 The record combination satisfies the index condition |
4072 | 1 Otherwise |
4073 | */ |
4074 | |
4075 | static |
4076 | bool bka_skip_index_tuple(range_seq_t rseq, range_id_t range_info) |
4077 | { |
4078 | DBUG_ENTER("bka_skip_index_tuple" ); |
4079 | JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; |
4080 | THD *thd= cache->thd(); |
4081 | bool res; |
4082 | status_var_increment(thd->status_var.ha_icp_attempts); |
4083 | if (!(res= cache->skip_index_tuple(range_info))) |
4084 | status_var_increment(thd->status_var.ha_icp_match); |
4085 | DBUG_RETURN(res); |
4086 | } |
4087 | |
4088 | |
4089 | /* |
4090 | Prepare to read the record from BKA cache matching the current joined record |
4091 | |
4092 | SYNOPSIS |
4093 | prepare_look_for_matches() |
4094 | skip_last <-> ignore the last record in the buffer (always unused here) |
4095 | |
4096 | DESCRIPTION |
4097 | The function prepares to iterate over records in the join cache buffer |
4098 | matching the record loaded into the record buffer for join_tab when |
4099 | performing join operation by BKA join algorithm. With BKA algorithms the |
4100 | record loaded into the record buffer for join_tab always has a direct |
4101 | reference to the matching records from the join buffer. When the regular |
4102 | BKA join algorithm is employed the record from join_tab can refer to |
4103 | only one such record. |
4104 | The function sets the counter of the remaining records from the cache |
4105 | buffer that would match the current join_tab record to 1. |
4106 | |
4107 | RETURN VALUE |
4108 | TRUE there are no records in the buffer to iterate over |
4109 | FALSE otherwise |
4110 | */ |
4111 | |
4112 | bool JOIN_CACHE_BKA::prepare_look_for_matches(bool skip_last) |
4113 | { |
4114 | if (!records) |
4115 | return TRUE; |
4116 | rem_records= 1; |
4117 | return FALSE; |
4118 | } |
4119 | |
4120 | |
4121 | /* |
4122 | Get the record from the BKA cache matching the current joined record |
4123 | |
4124 | SYNOPSIS |
4125 | get_next_candidate_for_match |
4126 | |
4127 | DESCRIPTION |
4128 | This method is used for iterations over the records from the join |
4129 | cache buffer when looking for matches for records from join_tab. |
4130 | The method performs the necessary preparations to read the next record |
4131 | from the join buffer into the record buffer by the method |
4132 | read_next_candidate_for_match, or, to skip the next record from the join |
4133 | buffer by the method skip_if_not_needed_match. |
4134 | This implementation of the virtual method get_next_candidate_for_match |
4135 | just decrements the counter of the records that are to be iterated over |
4136 | and returns the value of curr_association as a reference to the position |
4137 | of the beginning of the record fields in the buffer. |
4138 | |
4139 | RETURN VALUE |
4140 | pointer to the start of the record fields in the join buffer |
4141 | if the there is another record to iterate over, 0 - otherwise. |
4142 | */ |
4143 | |
4144 | uchar *JOIN_CACHE_BKA::get_next_candidate_for_match() |
4145 | { |
4146 | if (!rem_records) |
4147 | return 0; |
4148 | rem_records--; |
4149 | return curr_association; |
4150 | } |
4151 | |
4152 | |
4153 | /* |
4154 | Check whether the matching record from the BKA cache is to be skipped |
4155 | |
4156 | SYNOPSIS |
4157 | skip_next_candidate_for_match |
4158 | rec_ptr pointer to the position in the join buffer right after |
4159 | the previous record |
4160 | |
4161 | DESCRIPTION |
4162 | This implementation of the virtual function just calls the |
4163 | method get_match_flag_by_pos to check whether the record referenced |
4164 | by ref_ptr has its match flag set to MATCH_FOUND. |
4165 | |
4166 | RETURN VALUE |
4167 | TRUE the record referenced by rec_ptr has its match flag set to |
4168 | MATCH_FOUND |
4169 | FALSE otherwise |
4170 | */ |
4171 | |
4172 | bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr) |
4173 | { |
4174 | return join_tab->check_only_first_match() && |
4175 | (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); |
4176 | } |
4177 | |
4178 | |
4179 | /* |
4180 | Read the next record from the BKA join cache buffer when looking for matches |
4181 | |
4182 | SYNOPSIS |
4183 | read_next_candidate_for_match |
4184 | rec_ptr pointer to the position in the join buffer right after |
4185 | the previous record |
4186 | |
4187 | DESCRIPTION |
4188 | This implementation of the virtual method read_next_candidate_for_match |
4189 | calls the method get_record_by_pos to read the record referenced by rec_ptr |
4190 | from the join buffer into the record buffer. If this record refers to |
4191 | fields in the other join buffers the call of get_record_by_po ensures that |
4192 | these fields are read into the corresponding record buffers as well. |
4193 | This function is supposed to be called after a successful call of |
4194 | the method get_next_candidate_for_match. |
4195 | |
4196 | RETURN VALUE |
4197 | none |
4198 | */ |
4199 | |
4200 | void JOIN_CACHE_BKA::read_next_candidate_for_match(uchar *rec_ptr) |
4201 | { |
4202 | get_record_by_pos(rec_ptr); |
4203 | } |
4204 | |
4205 | |
4206 | /* |
4207 | Initialize the BKA join cache |
4208 | |
4209 | SYNOPSIS |
4210 | init |
4211 | for_explain join buffer is initialized for explain only |
4212 | |
4213 | |
4214 | DESCRIPTION |
4215 | The function initializes the cache structure. It is supposed to be called |
4216 | right after a constructor for the JOIN_CACHE_BKA. |
4217 | |
4218 | NOTES |
4219 | The function first constructs a companion object of the type |
4220 | JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class. |
4221 | |
4222 | RETURN VALUE |
4223 | 0 initialization with buffer allocations has been succeeded |
4224 | 1 otherwise |
4225 | */ |
4226 | |
4227 | int JOIN_CACHE_BKA::init(bool for_explain) |
4228 | { |
4229 | int res; |
4230 | bool check_only_first_match= join_tab->check_only_first_match(); |
4231 | |
4232 | RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info, |
4233 | bka_range_seq_init, |
4234 | bka_range_seq_next, |
4235 | check_only_first_match ? bka_range_seq_skip_record : 0, |
4236 | bka_skip_index_tuple }; |
4237 | |
4238 | DBUG_ENTER("JOIN_CACHE_BKA::init" ); |
4239 | |
4240 | JOIN_TAB_SCAN_MRR *jsm; |
4241 | if (!(join_tab_scan= jsm= new JOIN_TAB_SCAN_MRR(join, join_tab, |
4242 | mrr_mode, rs_funcs))) |
4243 | DBUG_RETURN(1); |
4244 | |
4245 | if ((res= JOIN_CACHE::init(for_explain))) |
4246 | DBUG_RETURN(res); |
4247 | |
4248 | if (use_emb_key) |
4249 | jsm->mrr_mode |= HA_MRR_MATERIALIZED_KEYS; |
4250 | |
4251 | DBUG_RETURN(0); |
4252 | } |
4253 | |
4254 | |
4255 | /* |
4256 | Get the key built over the next record from BKA join buffer |
4257 | |
4258 | SYNOPSIS |
4259 | get_next_key() |
4260 | key pointer to the buffer where the key value is to be placed |
4261 | |
4262 | DESCRIPTION |
4263 | The function reads key fields from the current record in the join buffer. |
4264 | and builds the key value out of these fields that will be used to access |
4265 | the 'join_tab' table. Some of key fields may belong to previous caches. |
4266 | They are accessed via record references to the record parts stored in the |
4267 | previous join buffers. The other key fields always are placed right after |
4268 | the flag fields of the record. |
4269 | If the key is embedded, which means that its value can be read directly |
4270 | from the join buffer, then *key is set to the beginning of the key in |
4271 | this buffer. Otherwise the key is built in the join_tab->ref->key_buff. |
4272 | The function returns the length of the key if it succeeds ro read it. |
4273 | If is assumed that the functions starts reading at the position of |
4274 | the record length which is provided for each records in a BKA cache. |
4275 | After the key is built the 'pos' value points to the first position after |
4276 | the current record. |
4277 | The function just skips the records with MATCH_IMPOSSIBLE in the |
4278 | match flag field if there is any. |
4279 | The function returns 0 if the initial position is after the beginning |
4280 | of the record fields for last record from the join buffer. |
4281 | |
4282 | RETURN VALUE |
4283 | length of the key value - if the starting value of 'pos' points to |
4284 | the position before the fields for the last record, |
4285 | 0 - otherwise. |
4286 | */ |
4287 | |
4288 | uint JOIN_CACHE_BKA::get_next_key(uchar ** key) |
4289 | { |
4290 | uint len; |
4291 | uint32 rec_len; |
4292 | uchar *init_pos; |
4293 | JOIN_CACHE *cache; |
4294 | |
4295 | start: |
4296 | |
4297 | /* Any record in a BKA cache is prepended with its length */ |
4298 | DBUG_ASSERT(with_length); |
4299 | |
4300 | if ((pos+size_of_rec_len) > last_rec_pos || !records) |
4301 | return 0; |
4302 | |
4303 | /* Read the length of the record */ |
4304 | rec_len= get_rec_length(pos); |
4305 | pos+= size_of_rec_len; |
4306 | init_pos= pos; |
4307 | |
4308 | /* Read a reference to the previous cache if any */ |
4309 | if (prev_cache) |
4310 | pos+= prev_cache->get_size_of_rec_offset(); |
4311 | |
4312 | curr_rec_pos= pos; |
4313 | |
4314 | /* Read all flag fields of the record */ |
4315 | read_flag_fields(); |
4316 | |
4317 | if (with_match_flag && |
4318 | (Match_flag) curr_rec_pos[0] == MATCH_IMPOSSIBLE ) |
4319 | { |
4320 | pos= init_pos+rec_len; |
4321 | goto start; |
4322 | } |
4323 | |
4324 | if (use_emb_key) |
4325 | { |
4326 | /* An embedded key is taken directly from the join buffer */ |
4327 | *key= pos; |
4328 | len= emb_key_length; |
4329 | } |
4330 | else |
4331 | { |
4332 | /* Read key arguments from previous caches if there are any such fields */ |
4333 | if (external_key_arg_fields) |
4334 | { |
4335 | uchar *rec_ptr= curr_rec_pos; |
4336 | uint key_arg_count= external_key_arg_fields; |
4337 | CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count; |
4338 | for (cache= prev_cache; key_arg_count; cache= cache->prev_cache) |
4339 | { |
4340 | uint len= 0; |
4341 | DBUG_ASSERT(cache); |
4342 | rec_ptr= cache->get_rec_ref(rec_ptr); |
4343 | while (!cache->referenced_fields) |
4344 | { |
4345 | cache= cache->prev_cache; |
4346 | DBUG_ASSERT(cache); |
4347 | rec_ptr= cache->get_rec_ref(rec_ptr); |
4348 | } |
4349 | while (key_arg_count && |
4350 | cache->read_referenced_field(*copy_ptr, rec_ptr, &len)) |
4351 | { |
4352 | copy_ptr++; |
4353 | --key_arg_count; |
4354 | } |
4355 | } |
4356 | } |
4357 | |
4358 | /* |
4359 | Read the other key arguments from the current record. The fields for |
4360 | these arguments are always first in the sequence of the record's fields. |
4361 | */ |
4362 | CACHE_FIELD *copy= field_descr+flag_fields; |
4363 | CACHE_FIELD *copy_end= copy+local_key_arg_fields; |
4364 | bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos); |
4365 | for ( ; copy < copy_end; copy++) |
4366 | read_record_field(copy, blob_in_rec_buff); |
4367 | |
4368 | /* Build the key over the fields read into the record buffers */ |
4369 | TABLE_REF *ref= &join_tab->ref; |
4370 | cp_buffer_from_ref(join->thd, join_tab->table, ref); |
4371 | *key= ref->key_buff; |
4372 | len= ref->key_length; |
4373 | } |
4374 | |
4375 | pos= init_pos+rec_len; |
4376 | |
4377 | return len; |
4378 | } |
4379 | |
4380 | |
4381 | /* |
4382 | Check the index condition of the joined table for a record from the BKA cache |
4383 | |
4384 | SYNOPSIS |
4385 | skip_index_tuple() |
4386 | range_info pointer to the record returned by MRR |
4387 | |
4388 | DESCRIPTION |
4389 | This function is invoked from MRR implementation to check if an index |
4390 | tuple matches the index condition. It is used in the case where the index |
4391 | condition actually depends on both columns of the used index and columns |
4392 | from previous tables. |
4393 | |
4394 | NOTES |
4395 | Accessing columns of the previous tables requires special handling with |
4396 | BKA. The idea of BKA is to collect record combinations in a buffer and |
4397 | then do a batch of ref access lookups, i.e. by the time we're doing a |
4398 | lookup its previous-records-combination is not in prev_table->record[0] |
4399 | but somewhere in the join buffer. |
4400 | We need to get it from there back into prev_table(s)->record[0] before we |
4401 | can evaluate the index condition, and that's why we need this function |
4402 | instead of regular IndexConditionPushdown. |
4403 | |
4404 | NOTES |
4405 | Possible optimization: |
4406 | Before we unpack the record from a previous table |
4407 | check if this table is used in the condition. |
4408 | If so then unpack the record otherwise skip the unpacking. |
4409 | This should be done by a special virtual method |
4410 | get_partial_record_by_pos(). |
4411 | |
4412 | RETURN VALUE |
4413 | 1 the record combination does not satisfies the index condition |
4414 | 0 otherwise |
4415 | */ |
4416 | |
4417 | bool JOIN_CACHE_BKA::skip_index_tuple(range_id_t range_info) |
4418 | { |
4419 | DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple" ); |
4420 | get_record_by_pos((uchar*)range_info); |
4421 | DBUG_RETURN(!join_tab->cache_idx_cond->val_int()); |
4422 | } |
4423 | |
4424 | |
4425 | |
4426 | /* |
4427 | Initialize retrieval of range sequence for the BKAH join algorithm |
4428 | |
4429 | SYNOPSIS |
4430 | bkah_range_seq_init() |
4431 | init_params pointer to the BKAH join cache object |
4432 | n_ranges the number of ranges obtained |
4433 | flags combination of MRR flags |
4434 | |
4435 | DESCRIPTION |
4436 | The function interprets init_param as a pointer to a JOIN_CACHE_BKAH |
4437 | object. The function prepares for an iteration over distinct join keys |
4438 | built over the records from the cache join buffer. |
4439 | |
4440 | NOTE |
4441 | This function are used only as a callback function. |
4442 | |
4443 | RETURN VALUE |
4444 | init_param value that is to be used as a parameter of |
4445 | bkah_range_seq_next() |
4446 | */ |
4447 | |
4448 | static |
4449 | range_seq_t bkah_range_seq_init(void *init_param, uint n_ranges, uint flags) |
4450 | { |
4451 | DBUG_ENTER("bkah_range_seq_init" ); |
4452 | JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) init_param; |
4453 | cache->reset(0); |
4454 | DBUG_RETURN((range_seq_t) init_param); |
4455 | } |
4456 | |
4457 | |
4458 | /* |
4459 | Get the next range/key over records from the join buffer of a BKAH cache |
4460 | |
4461 | SYNOPSIS |
4462 | bkah_range_seq_next() |
4463 | seq value returned by bkah_range_seq_init() |
4464 | range OUT reference to the next range |
4465 | |
4466 | DESCRIPTION |
4467 | The function interprets seq as a pointer to a JOIN_CACHE_BKAH |
4468 | object. The function returns a pointer to the range descriptor |
4469 | for the next unique key built over records from the join buffer. |
4470 | |
4471 | NOTE |
4472 | This function are used only as a callback function. |
4473 | |
4474 | RETURN VALUE |
4475 | FALSE ok, the range structure filled with info about the next range/key |
4476 | TRUE no more ranges |
4477 | */ |
4478 | |
4479 | static |
4480 | bool bkah_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) |
4481 | { |
4482 | DBUG_ENTER("bkah_range_seq_next" ); |
4483 | JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; |
4484 | TABLE_REF *ref= &cache->join_tab->ref; |
4485 | key_range *start_key= &range->start_key; |
4486 | if ((start_key->length= cache->get_next_key((uchar **) &start_key->key))) |
4487 | { |
4488 | start_key->keypart_map= (1 << ref->key_parts) - 1; |
4489 | start_key->flag= HA_READ_KEY_EXACT; |
4490 | range->end_key= *start_key; |
4491 | range->end_key.flag= HA_READ_AFTER_KEY; |
4492 | range->ptr= (char *) cache->get_curr_key_chain(); |
4493 | range->range_flag= EQ_RANGE; |
4494 | DBUG_RETURN(0); |
4495 | } |
4496 | DBUG_RETURN(1); |
4497 | } |
4498 | |
4499 | |
4500 | /* |
4501 | Check whether range_info orders to skip the next record from BKAH join buffer |
4502 | |
4503 | SYNOPSIS |
4504 | bkah_range_seq_skip_record() |
4505 | seq value returned by bkah_range_seq_init() |
4506 | range_info information about the next range/key returned by MRR |
4507 | rowid [NOT USED] rowid of the record to be checked (not used) |
4508 | |
4509 | DESCRIPTION |
4510 | The function interprets seq as a pointer to a JOIN_CACHE_BKAH |
4511 | object. The function returns TRUE if the record with this range_info |
4512 | is to be filtered out from the stream of records returned by |
4513 | multi_range_read_next(). |
4514 | |
4515 | NOTE |
4516 | This function are used only as a callback function. |
4517 | |
4518 | RETURN VALUE |
4519 | 1 record with this range_info is to be filtered out from the stream |
4520 | of records returned by multi_range_read_next() |
4521 | 0 the record is to be left in the stream |
4522 | */ |
4523 | |
4524 | static |
4525 | bool bkah_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid) |
4526 | { |
4527 | DBUG_ENTER("bkah_range_seq_skip_record" ); |
4528 | JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; |
4529 | bool res= cache->check_all_match_flags_for_key((uchar *) range_info); |
4530 | DBUG_RETURN(res); |
4531 | } |
4532 | |
4533 | |
4534 | /* |
4535 | Check if the record combination from BKAH cache matches the index condition |
4536 | |
4537 | SYNOPSIS |
4538 | bkah_skip_index_tuple() |
4539 | rseq value returned by bka_range_seq_init() |
4540 | range_info record chain for the next range/key returned by MRR |
4541 | |
4542 | DESCRIPTION |
4543 | This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method, |
4544 | see comments there. |
4545 | |
4546 | NOTE |
4547 | This function is used as a RANGE_SEQ_IF::skip_index_tuple callback. |
4548 | |
4549 | RETURN VALUE |
4550 | 0 some records from the chain satisfy the index condition |
4551 | 1 otherwise |
4552 | */ |
4553 | |
4554 | static |
4555 | bool bkah_skip_index_tuple(range_seq_t rseq, range_id_t range_info) |
4556 | { |
4557 | DBUG_ENTER("bka_unique_skip_index_tuple" ); |
4558 | JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; |
4559 | THD *thd= cache->thd(); |
4560 | bool res; |
4561 | status_var_increment(thd->status_var.ha_icp_attempts); |
4562 | if (!(res= cache->skip_index_tuple(range_info))) |
4563 | status_var_increment(thd->status_var.ha_icp_match); |
4564 | DBUG_RETURN(res); |
4565 | } |
4566 | |
4567 | |
4568 | /* |
4569 | Prepare to read record from BKAH cache matching the current joined record |
4570 | |
4571 | SYNOPSIS |
4572 | prepare_look_for_matches() |
4573 | skip_last <-> ignore the last record in the buffer (always unused here) |
4574 | |
4575 | DESCRIPTION |
4576 | The function prepares to iterate over records in the join cache buffer |
4577 | matching the record loaded into the record buffer for join_tab when |
4578 | performing join operation by BKAH join algorithm. With BKAH algorithm, if |
4579 | association labels are used, then record loaded into the record buffer |
4580 | for join_tab always has a direct reference to the chain of the mathing |
4581 | records from the join buffer. If association labels are not used then |
4582 | then the chain of the matching records is obtained by the call of the |
4583 | get_key_chain_by_join_key function. |
4584 | |
4585 | RETURN VALUE |
4586 | TRUE there are no records in the buffer to iterate over |
4587 | FALSE otherwise |
4588 | */ |
4589 | |
4590 | bool JOIN_CACHE_BKAH::prepare_look_for_matches(bool skip_last) |
4591 | { |
4592 | last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0; |
4593 | if (no_association && |
4594 | !(curr_matching_chain= get_matching_chain_by_join_key())) //psergey: added '!' |
4595 | return 1; |
4596 | last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain); |
4597 | return 0; |
4598 | } |
4599 | |
4600 | /* |
4601 | Initialize the BKAH join cache |
4602 | |
4603 | SYNOPSIS |
4604 | init |
4605 | for_explain join buffer is initialized for explain only |
4606 | |
4607 | DESCRIPTION |
4608 | The function initializes the cache structure. It is supposed to be called |
4609 | right after a constructor for the JOIN_CACHE_BKAH. |
4610 | |
4611 | NOTES |
4612 | The function first constructs a companion object of the type |
4613 | JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class. |
4614 | |
4615 | RETURN VALUE |
4616 | 0 initialization with buffer allocations has been succeeded |
4617 | 1 otherwise |
4618 | */ |
4619 | |
4620 | int JOIN_CACHE_BKAH::init(bool for_explain) |
4621 | { |
4622 | bool check_only_first_match= join_tab->check_only_first_match(); |
4623 | |
4624 | no_association= MY_TEST(mrr_mode & HA_MRR_NO_ASSOCIATION); |
4625 | |
4626 | RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info, |
4627 | bkah_range_seq_init, |
4628 | bkah_range_seq_next, |
4629 | check_only_first_match && !no_association ? |
4630 | bkah_range_seq_skip_record : 0, |
4631 | bkah_skip_index_tuple }; |
4632 | |
4633 | DBUG_ENTER("JOIN_CACHE_BKAH::init" ); |
4634 | |
4635 | if (!(join_tab_scan= new JOIN_TAB_SCAN_MRR(join, join_tab, |
4636 | mrr_mode, rs_funcs))) |
4637 | DBUG_RETURN(1); |
4638 | |
4639 | DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain)); |
4640 | } |
4641 | |
4642 | |
4643 | /* |
4644 | Check the index condition of the joined table for a record from the BKA cache |
4645 | |
4646 | SYNOPSIS |
4647 | skip_index_tuple() |
4648 | range_info record chain returned by MRR |
4649 | |
4650 | DESCRIPTION |
4651 | See JOIN_CACHE_BKA::skip_index_tuple(). |
4652 | This function is the variant for use with rhe class JOIN_CACHE_BKAH. |
4653 | The difference from JOIN_CACHE_BKA case is that there may be multiple |
4654 | previous table record combinations that share the same key(MRR range). |
4655 | As a consequence, we need to loop through the chain of all table record |
4656 | combinations that match the given MRR range key range_info until we find |
4657 | one that satisfies the index condition. |
4658 | |
4659 | NOTE |
4660 | Possible optimization: |
4661 | Before we unpack the record from a previous table |
4662 | check if this table is used in the condition. |
4663 | If so then unpack the record otherwise skip the unpacking. |
4664 | This should be done by a special virtual method |
4665 | get_partial_record_by_pos(). |
4666 | |
4667 | RETURN VALUE |
4668 | 1 any record combination from the chain referred by range_info |
4669 | does not satisfy the index condition |
4670 | 0 otherwise |
4671 | |
4672 | |
4673 | */ |
4674 | |
4675 | bool JOIN_CACHE_BKAH::skip_index_tuple(range_id_t range_info) |
4676 | { |
4677 | uchar *last_rec_ref_ptr= get_next_rec_ref((uchar*) range_info); |
4678 | uchar *next_rec_ref_ptr= last_rec_ref_ptr; |
4679 | DBUG_ENTER("JOIN_CACHE_BKAH::skip_index_tuple" ); |
4680 | do |
4681 | { |
4682 | next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr); |
4683 | uchar *rec_ptr= next_rec_ref_ptr + rec_fields_offset; |
4684 | get_record_by_pos(rec_ptr); |
4685 | if (join_tab->cache_idx_cond->val_int()) |
4686 | DBUG_RETURN(FALSE); |
4687 | } while(next_rec_ref_ptr != last_rec_ref_ptr); |
4688 | DBUG_RETURN(TRUE); |
4689 | } |
4690 | |