1/* Copyright (C) 2000-2006 MySQL AB
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
15
16/**
17 @file
18
19 @brief
20 join cache optimizations
21
22 @defgroup Query_Optimizer Query Optimizer
23 @{
24*/
25
26#ifdef USE_PRAGMA_IMPLEMENTATION
27#pragma implementation // gcc: Class implementation
28#endif
29
30#include "mariadb.h"
31#include "key.h"
32#include "sql_base.h"
33#include "sql_select.h"
34#include "opt_subselect.h"
35
36#define NO_MORE_RECORDS_IN_BUFFER (uint)(-1)
37
38static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save);
39
40/*****************************************************************************
41 * Join cache module
42******************************************************************************/
43
44/*
45 Fill in the descriptor of a flag field associated with a join cache
46
47 SYNOPSIS
48 add_field_flag_to_join_cache()
49 str position in a record buffer to copy the field from/to
50 length length of the field
51 field IN/OUT pointer to the field descriptor to fill in
52
53 DESCRIPTION
54 The function fill in the descriptor of a cache flag field to which
55 the parameter 'field' points to. The function uses the first two
56 parameters to set the position in the record buffer from/to which
57 the field value is to be copied and the length of the copied fragment.
58 Before returning the result the function increments the value of
59 *field by 1.
60 The function ignores the fields 'blob_length' and 'ofset' of the
61 descriptor.
62
63 RETURN VALUE
64 the length of the field
65*/
66
67static
68uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field)
69{
70 CACHE_FIELD *copy= *field;
71 copy->str= str;
72 copy->length= length;
73 copy->type= 0;
74 copy->field= 0;
75 copy->referenced_field_no= 0;
76 (*field)++;
77 return length;
78}
79
80
81/*
82 Fill in the descriptors of table data fields associated with a join cache
83
84 SYNOPSIS
85 add_table_data_fields_to_join_cache()
86 tab descriptors of fields from this table are to be filled
87 field_set descriptors for only these fields are to be created
88 field_cnt IN/OUT counter of data fields
89 descr IN/OUT pointer to the first descriptor to be filled
90 field_ptr_cnt IN/OUT counter of pointers to the data fields
91 descr_ptr IN/OUT pointer to the first pointer to blob descriptors
92
93 DESCRIPTION
94 The function fills in the descriptors of cache data fields from the table
95 'tab'. The descriptors are filled only for the fields marked in the
96 bitmap 'field_set'.
97 The function fills the descriptors starting from the position pointed
98 by 'descr'. If an added field is of a BLOB type then a pointer to the
99 its descriptor is added to the array descr_ptr.
100 At the return 'descr' points to the position after the last added
101 descriptor while 'descr_ptr' points to the position right after the
102 last added pointer.
103
104 RETURN VALUE
105 the total length of the added fields
106*/
107
108static
109uint add_table_data_fields_to_join_cache(JOIN_TAB *tab,
110 MY_BITMAP *field_set,
111 uint *field_cnt,
112 CACHE_FIELD **descr,
113 uint *field_ptr_cnt,
114 CACHE_FIELD ***descr_ptr)
115{
116 Field **fld_ptr;
117 uint len= 0;
118 CACHE_FIELD *copy= *descr;
119 CACHE_FIELD **copy_ptr= *descr_ptr;
120 uint used_fields= bitmap_bits_set(field_set);
121 for (fld_ptr= tab->table->field; used_fields; fld_ptr++)
122 {
123 if (bitmap_is_set(field_set, (*fld_ptr)->field_index))
124 {
125 len+= (*fld_ptr)->fill_cache_field(copy);
126 if (copy->type == CACHE_BLOB)
127 {
128 *copy_ptr= copy;
129 copy_ptr++;
130 (*field_ptr_cnt)++;
131 }
132 copy->field= *fld_ptr;
133 copy->referenced_field_no= 0;
134 copy++;
135 (*field_cnt)++;
136 used_fields--;
137 }
138 }
139 *descr= copy;
140 *descr_ptr= copy_ptr;
141 return len;
142}
143
144/*
145 Determine different counters of fields associated with a record in the cache
146
147 SYNOPSIS
148 calc_record_fields()
149
150 DESCRIPTION
151 The function counts the number of total fields stored in a record
152 of the cache and saves this number in the 'fields' member. It also
153 determines the number of flag fields and the number of blobs.
154 The function sets 'with_match_flag' on if 'join_tab' needs a match flag
155 i.e. if it is the first inner table of an outer join or a semi-join.
156
157 RETURN VALUE
158 none
159*/
160
161void JOIN_CACHE::calc_record_fields()
162{
163 JOIN_TAB *tab;
164
165 if (prev_cache)
166 tab= prev_cache->join_tab;
167 else
168 {
169 if (join_tab->bush_root_tab)
170 {
171 /*
172 --ot1--SJM1--------------ot2--...
173 |
174 |
175 +-it1--...--itN
176 ^____________ this->join_tab is somewhere here,
177 inside an sjm nest.
178
179 The join buffer should store the values of it1.*, it2.*, ..
180 It should not store values of ot1.*.
181 */
182 tab= join_tab->bush_root_tab->bush_children->start;
183 }
184 else
185 {
186 /*
187 -ot1--ot2--SJM1--SJM2--------------ot3--...--otN
188 | | ^
189 | +-it21--...--it2N |
190 | \-- we're somewhere here,
191 +-it11--...--it1N at the top level
192
193 The join buffer should store the values of
194
195 ot1.*, ot2.*, it1{i}, it2{j}.*, ot3.*, ...
196
197 that is, we should start from the first non-const top-level table.
198
199 We will need to store columns of SJ-inner tables (it_X_Y.*), but we're
200 not interested in storing the columns of materialization tables
201 themselves. Beause of that, if the first non-const top-level table is a
202 materialized table, we move to its bush_children:
203 */
204 tab= join->join_tab + join->const_tables;
205 if (tab->bush_children)
206 tab= tab->bush_children->start;
207 }
208 }
209 DBUG_ASSERT(!tab->bush_children);
210
211 start_tab= tab;
212 fields= 0;
213 blobs= 0;
214 flag_fields= 0;
215 data_field_count= 0;
216 data_field_ptr_count= 0;
217 referenced_fields= 0;
218
219 /*
220 The following loop will get inside SJM nests, because data may be unpacked
221 to sjm-inner tables.
222 */
223 for (; tab != join_tab ; tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
224 {
225 tab->calc_used_field_length(FALSE);
226 flag_fields+= MY_TEST(tab->used_null_fields || tab->used_uneven_bit_fields);
227 flag_fields+= MY_TEST(tab->table->maybe_null);
228 fields+= tab->used_fields;
229 blobs+= tab->used_blobs;
230 }
231 if ((with_match_flag= join_tab->use_match_flag()))
232 flag_fields++;
233 fields+= flag_fields;
234}
235
236
237/*
238 Collect information on join key arguments
239
240 SYNOPSIS
241 collect_info_on_key_args()
242
243 DESCRIPTION
244 The function traverses the ref expressions that are used to access the
245 joined table join_tab. For each table 'tab' whose fields are to be stored
246 in the join buffer of the cache the function finds the fields from 'tab'
247 that occur in the ref expressions and marks these fields in the bitmap
248 tab->table->tmp_set. The function counts the number of them stored
249 in this cache and the total number of them stored in the previous caches
250 and saves the results of the counting in 'local_key_arg_fields' and
251 'external_key_arg_fields' respectively.
252
253 NOTES
254 The function does not do anything if no key is used to join the records
255 from join_tab.
256
257 RETURN VALUE
258 none
259*/
260
261void JOIN_CACHE::collect_info_on_key_args()
262{
263 JOIN_TAB *tab;
264 JOIN_CACHE *cache;
265 local_key_arg_fields= 0;
266 external_key_arg_fields= 0;
267
268 if (!is_key_access())
269 return;
270
271 TABLE_REF *ref= &join_tab->ref;
272 cache= this;
273 do
274 {
275 for (tab= cache->start_tab; tab != cache->join_tab;
276 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
277 {
278 uint key_args;
279 bitmap_clear_all(&tab->table->tmp_set);
280 for (uint i= 0; i < ref->key_parts; i++)
281 {
282 Item *ref_item= ref->items[i];
283 if (!(tab->table->map & ref_item->used_tables()))
284 continue;
285 ref_item->walk(&Item::add_field_to_set_processor, 1, tab->table);
286 }
287 if ((key_args= bitmap_bits_set(&tab->table->tmp_set)))
288 {
289 if (cache == this)
290 local_key_arg_fields+= key_args;
291 else
292 external_key_arg_fields+= key_args;
293 }
294 }
295 cache= cache->prev_cache;
296 }
297 while (cache);
298
299 return;
300}
301
302
303/*
304 Allocate memory for descriptors and pointers to them associated with the cache
305
306 SYNOPSIS
307 alloc_fields()
308
309 DESCRIPTION
310 The function allocates memory for the array of fields descriptors
311 and the array of pointers to the field descriptors used to copy
312 join record data from record buffers into the join buffer and
313 backward. Some pointers refer to the field descriptor associated
314 with previous caches. They are placed at the beginning of the array
315 of pointers and its total number is stored in external_key_arg_fields.
316 The pointer of the first array is assigned to field_descr and the number
317 of the elements in it is precalculated by the function calc_record_fields.
318 The allocated arrays are adjacent.
319
320 NOTES
321 The memory is allocated in join->thd->mem_root
322
323 RETURN VALUE
324 pointer to the first array
325*/
326
327int JOIN_CACHE::alloc_fields()
328{
329 uint ptr_cnt= external_key_arg_fields+blobs+1;
330 uint fields_size= sizeof(CACHE_FIELD)*fields;
331 field_descr= (CACHE_FIELD*) join->thd->alloc(fields_size +
332 sizeof(CACHE_FIELD*)*ptr_cnt);
333 blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size);
334 return (field_descr == NULL);
335}
336
337
338/*
339 Create descriptors of the record flag fields stored in the join buffer
340
341 SYNOPSIS
342 create_flag_fields()
343
344 DESCRIPTION
345 The function creates descriptors of the record flag fields stored
346 in the join buffer. These are descriptors for:
347 - an optional match flag field,
348 - table null bitmap fields,
349 - table null row fields.
350 The match flag field is created when 'join_tab' is the first inner
351 table of an outer join our a semi-join. A null bitmap field is
352 created for any table whose fields are to be stored in the join
353 buffer if at least one of these fields is nullable or is a BIT field
354 whose bits are partially stored with null bits. A null row flag
355 is created for any table assigned to the cache if it is an inner
356 table of an outer join.
357 The descriptor for flag fields are placed one after another at the
358 beginning of the array of field descriptors 'field_descr' that
359 contains 'fields' elements. If there is a match flag field the
360 descriptor for it is always first in the sequence of flag fields.
361 The descriptors for other flag fields can follow in an arbitrary
362 order.
363 The flag field values follow in a record stored in the join buffer
364 in the same order as field descriptors, with the match flag always
365 following first.
366 The function sets the value of 'flag_fields' to the total number
367 of the descriptors created for the flag fields.
368 The function sets the value of 'length' to the total length of the
369 flag fields.
370
371 RETURN VALUE
372 none
373*/
374
375void JOIN_CACHE::create_flag_fields()
376{
377 CACHE_FIELD *copy;
378 JOIN_TAB *tab;
379
380 copy= field_descr;
381
382 length=0;
383
384 /* If there is a match flag the first field is always used for this flag */
385 if (with_match_flag)
386 length+= add_flag_field_to_join_cache((uchar*) &join_tab->found,
387 sizeof(join_tab->found),
388 &copy);
389
390 /* Create fields for all null bitmaps and null row flags that are needed */
391 for (tab= start_tab; tab != join_tab;
392 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
393 {
394 TABLE *table= tab->table;
395
396 /* Create a field for the null bitmap from table if needed */
397 if (tab->used_null_fields || tab->used_uneven_bit_fields)
398 length+= add_flag_field_to_join_cache(table->null_flags,
399 table->s->null_bytes,
400 &copy);
401
402 /* Create table for the null row flag if needed */
403 if (table->maybe_null)
404 length+= add_flag_field_to_join_cache((uchar*) &table->null_row,
405 sizeof(table->null_row),
406 &copy);
407 }
408
409 /* Theoretically the new value of flag_fields can be less than the old one */
410 flag_fields= (uint)(copy-field_descr);
411}
412
413
414/*
415 Create descriptors of the fields used to build access keys to the joined table
416
417 SYNOPSIS
418 create_key_arg_fields()
419
420 DESCRIPTION
421 The function creates descriptors of the record fields stored in the join
422 buffer that are used to build access keys to the joined table. These
423 fields are put into the buffer ahead of other records fields stored in
424 the buffer. Such placement helps to optimize construction of access keys.
425 For each field that is used to build access keys to the joined table but
426 is stored in some other join cache buffer the function saves a pointer
427 to the the field descriptor. The array of such pointers are placed in the
428 the join cache structure just before the array of pointers to the
429 blob fields blob_ptr.
430 Any field stored in a join cache buffer that is used to construct keys
431 to access tables associated with other join caches is called a referenced
432 field. It receives a unique number that is saved by the function in the
433 member 'referenced_field_no' of the CACHE_FIELD descriptor for the field.
434 This number is used as index to the array of offsets to the referenced
435 fields that are saved and put in the join cache buffer after all record
436 fields.
437 The function also finds out whether that the keys to access join_tab
438 can be considered as embedded and, if so, sets the flag 'use_emb_key' in
439 this join cache appropriately.
440
441 NOTES.
442 When a key to access the joined table 'join_tab' is constructed the array
443 of pointers to the field descriptors for the external fields is looked
444 through. For each of this pointers we find out in what previous key cache
445 the referenced field is stored. The value of 'referenced_field_no'
446 provides us with the index into the array of offsets for referenced
447 fields stored in the join cache. The offset read by the the index allows
448 us to read the field without reading all other fields of the record
449 stored the join cache buffer. This optimizes the construction of keys
450 to access 'join_tab' when some key arguments are stored in the previous
451 join caches.
452
453 NOTES
454 The function does not do anything if no key is used to join the records
455 from join_tab.
456
457 RETURN VALUE
458 none
459*/
460void JOIN_CACHE::create_key_arg_fields()
461{
462 JOIN_TAB *tab;
463 JOIN_CACHE *cache;
464
465 if (!is_key_access())
466 return;
467
468 /*
469 Save pointers to the cache fields in previous caches
470 that are used to build keys for this key access.
471 */
472 cache= this;
473 uint ext_key_arg_cnt= external_key_arg_fields;
474 CACHE_FIELD *copy;
475 CACHE_FIELD **copy_ptr= blob_ptr;
476 while (ext_key_arg_cnt)
477 {
478 cache= cache->prev_cache;
479 for (tab= cache->start_tab; tab != cache->join_tab;
480 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
481 {
482 CACHE_FIELD *copy_end;
483 MY_BITMAP *key_read_set= &tab->table->tmp_set;
484 /* key_read_set contains the bitmap of tab's fields referenced by ref */
485 if (bitmap_is_clear_all(key_read_set))
486 continue;
487 copy_end= cache->field_descr+cache->fields;
488 for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
489 {
490 /*
491 (1) - when we store rowids for DuplicateWeedout, they have
492 copy->field==NULL
493 */
494 if (copy->field && // (1)
495 copy->field->table == tab->table &&
496 bitmap_is_set(key_read_set, copy->field->field_index))
497 {
498 *copy_ptr++= copy;
499 ext_key_arg_cnt--;
500 if (!copy->referenced_field_no)
501 {
502 /*
503 Register the referenced field 'copy':
504 - set the offset number in copy->referenced_field_no,
505 - adjust the value of the flag 'with_length',
506 - adjust the values of 'pack_length' and
507 of 'pack_length_with_blob_ptrs'.
508 */
509 copy->referenced_field_no= ++cache->referenced_fields;
510 if (!cache->with_length)
511 {
512 cache->with_length= TRUE;
513 uint sz= cache->get_size_of_rec_length();
514 cache->base_prefix_length+= sz;
515 cache->pack_length+= sz;
516 cache->pack_length_with_blob_ptrs+= sz;
517 }
518 cache->pack_length+= cache->get_size_of_fld_offset();
519 cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset();
520 }
521 }
522 }
523 }
524 }
525 /* After this 'blob_ptr' shall not be be changed */
526 blob_ptr= copy_ptr;
527
528 /* Now create local fields that are used to build ref for this key access */
529 copy= field_descr+flag_fields;
530 for (tab= start_tab; tab != join_tab;
531 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
532 {
533 length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set,
534 &data_field_count, &copy,
535 &data_field_ptr_count,
536 &copy_ptr);
537 }
538
539 use_emb_key= check_emb_key_usage();
540
541 return;
542}
543
544
545/*
546 Create descriptors of all remaining data fields stored in the join buffer
547
548 SYNOPSIS
549 create_remaining_fields()
550
551 DESCRIPTION
552 The function creates descriptors for all remaining data fields of a
553 record from the join buffer. If the value returned by is_key_access() is
554 false the function creates fields for all read record fields that
555 comprise the partial join record joined with join_tab. Otherwise,
556 for each table tab, the set of the read fields for which the descriptors
557 have to be added is determined as the difference between all read fields
558 and and those for which the descriptors have been already created.
559 The latter are supposed to be marked in the bitmap tab->table->tmp_set.
560 The function increases the value of 'length' to the the total length of
561 the added fields.
562
563 NOTES
564 If is_key_access() returns true the function modifies the value of
565 tab->table->tmp_set for a each table whose fields are stored in the cache.
566 The function calls the method Field::fill_cache_field to figure out
567 the type of the cache field and the maximal length of its representation
568 in the join buffer. If this is a blob field then additionally a pointer
569 to this field is added as an element of the array blob_ptr. For a blob
570 field only the size of the length of the blob data is taken into account.
571 It is assumed that 'data_field_count' contains the number of descriptors
572 for data fields that have been already created and 'data_field_ptr_count'
573 contains the number of the pointers to such descriptors having been
574 stored up to the moment.
575
576 RETURN VALUE
577 none
578*/
579
580void JOIN_CACHE::create_remaining_fields()
581{
582 JOIN_TAB *tab;
583 bool all_read_fields= !is_key_access();
584 CACHE_FIELD *copy= field_descr+flag_fields+data_field_count;
585 CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count;
586
587 for (tab= start_tab; tab != join_tab;
588 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
589 {
590 MY_BITMAP *rem_field_set;
591 TABLE *table= tab->table;
592
593 if (all_read_fields)
594 rem_field_set= table->read_set;
595 else
596 {
597 bitmap_invert(&table->tmp_set);
598 bitmap_intersect(&table->tmp_set, table->read_set);
599 rem_field_set= &table->tmp_set;
600 }
601
602 length+= add_table_data_fields_to_join_cache(tab, rem_field_set,
603 &data_field_count, &copy,
604 &data_field_ptr_count,
605 &copy_ptr);
606
607 /* SemiJoinDuplicateElimination: allocate space for rowid if needed */
608 if (tab->keep_current_rowid)
609 {
610 copy->str= table->file->ref;
611 if (copy->str)
612 copy->length= table->file->ref_length;
613 else
614 {
615 /* This may happen only for materialized derived tables and views */
616 copy->length= 0;
617 copy->str= (uchar *) table;
618 }
619 copy->type= CACHE_ROWID;
620 copy->field= 0;
621 copy->referenced_field_no= 0;
622 /*
623 Note: this may seem odd, but at this point we have
624 table->file->ref==NULL while table->file->ref_length is already set
625 to correct value.
626 */
627 length += table->file->ref_length;
628 data_field_count++;
629 copy++;
630 }
631 }
632}
633
634
635
636/*
637 Calculate and set all cache constants
638
639 SYNOPSIS
640 set_constants()
641
642 DESCRIPTION
643 The function calculates and set all precomputed constants that are used
644 when writing records into the join buffer and reading them from it.
645 It calculates the size of offsets of a record within the join buffer
646 and of a field within a record. It also calculates the number of bytes
647 used to store record lengths.
648 The function also calculates the maximal length of the representation
649 of record in the cache excluding blob_data. This value is used when
650 making a dicision whether more records should be added into the join
651 buffer or not.
652
653 RETURN VALUE
654 none
655*/
656
657void JOIN_CACHE::set_constants()
658{
659 /*
660 Any record from a BKA cache is prepended with the record length.
661 We use the record length when reading the buffer and building key values
662 for each record. The length allows us not to read the fields that are
663 not needed for keys.
664 If a record has match flag it also may be skipped when the match flag
665 is on. It happens if the cache is used for a semi-join operation or
666 for outer join when the 'not exist' optimization can be applied.
667 If some of the fields are referenced from other caches then
668 the record length allows us to easily reach the saved offsets for
669 these fields since the offsets are stored at the very end of the record.
670 However at this moment we don't know whether we have referenced fields for
671 the cache or not. Later when a referenced field is registered for the cache
672 we adjust the value of the flag 'with_length'.
673 */
674 with_length= is_key_access() ||
675 join_tab->is_inner_table_of_semi_join_with_first_match() ||
676 join_tab->is_inner_table_of_outer_join();
677 /*
678 At this moment we don't know yet the value of 'referenced_fields',
679 but in any case it can't be greater than the value of 'fields'.
680 */
681 uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) +
682 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
683 sizeof(ulong);
684 /*
685 The values of size_of_rec_ofs, size_of_rec_len, size_of_fld_ofs,
686 base_prefix_length, pack_length, pack_length_with_blob_ptrs
687 will be recalculated later in this function when we get the estimate
688 for the actual value of the join buffer size.
689 */
690 size_of_rec_ofs= size_of_rec_len= size_of_fld_ofs= 4;
691 base_prefix_length= (with_length ? size_of_rec_len : 0) +
692 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
693 pack_length= (with_length ? size_of_rec_len : 0) +
694 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
695 length + fields*sizeof(uint);
696 pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
697 min_buff_size= 0;
698 min_records= 1;
699 buff_size= (size_t)MY_MAX(join->thd->variables.join_buff_size,
700 get_min_join_buffer_size());
701 size_of_rec_ofs= offset_size(buff_size);
702 size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len);
703 size_of_fld_ofs= size_of_rec_len;
704 base_prefix_length= (with_length ? size_of_rec_len : 0) +
705 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
706 /*
707 The size of the offsets for referenced fields will be added later.
708 The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted
709 every time when the first reference to the referenced field is registered.
710 */
711 pack_length= (with_length ? size_of_rec_len : 0) +
712 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
713 length;
714 pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
715}
716
717
718/*
719 Get maximum total length of all affixes of a record in the join cache buffer
720
721 SYNOPSIS
722 get_record_max_affix_length()
723
724 DESCRIPTION
725 The function calculates the maximum possible total length of all affixes
726 of a record in the join cache buffer, that is made of:
727 - the length of all prefixes used in this cache,
728 - the length of the match flag if it's needed
729 - the total length of the maximum possible offsets to the fields of
730 a record in the buffer.
731
732 RETURN VALUE
733 The maximum total length of all affixes of a record in the join buffer
734*/
735
736uint JOIN_CACHE::get_record_max_affix_length()
737{
738 uint len= get_prefix_length() +
739 MY_TEST(with_match_flag) +
740 size_of_fld_ofs * data_field_count;
741 return len;
742}
743
744
745/*
746 Get the minimum possible size of the cache join buffer
747
748 SYNOPSIS
749 get_min_join_buffer_size()
750
751 DESCRIPTION
752 At the first its invocation for the cache the function calculates the
753 minimum possible size of the join buffer of the cache. This value depends
754 on the minimal number of records 'min_records' to be stored in the join
755 buffer. The number is supposed to be determined by the procedure that
756 chooses the best access path to the joined table join_tab in the execution
757 plan. After the calculation of the interesting size the function saves it
758 in the field 'min_buff_size' in order to use it directly at the next
759 invocations of the function.
760
761 NOTES
762 Currently the number of minimal records is just set to 1.
763
764 RETURN VALUE
765 The minimal possible size of the join buffer of this cache
766*/
767
768size_t JOIN_CACHE::get_min_join_buffer_size()
769{
770 if (!min_buff_size)
771 {
772 size_t len= 0;
773 size_t len_last= 0;
774 for (JOIN_TAB *tab= start_tab; tab != join_tab;
775 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
776 {
777 len+= tab->get_max_used_fieldlength();
778 len_last+= tab->get_used_fieldlength();
779 }
780 size_t len_addon= get_record_max_affix_length() +
781 get_max_key_addon_space_per_record();
782 len+= len_addon;
783 len_last+= len_addon;
784 size_t min_sz= len*(min_records-1) + len_last;
785 min_sz+= pack_length_with_blob_ptrs;
786 size_t add_sz= 0;
787 for (uint i=0; i < min_records; i++)
788 add_sz+= join_tab_scan->aux_buffer_incr(i+1);
789 avg_aux_buffer_incr= add_sz/min_records;
790 min_sz+= add_sz;
791 set_if_bigger(min_sz, 1);
792 min_buff_size= min_sz;
793 }
794 return min_buff_size;
795}
796
797
798/*
799 Get the maximum possible size of the cache join buffer
800
801 SYNOPSIS
802 get_max_join_buffer_size()
803
804 optimize_buff_size FALSE <-> do not take more memory than needed for
805 the estimated number of records in the partial join
806
807 DESCRIPTION
808 At the first its invocation for the cache the function calculates the
809 maximum possible size of join buffer for the cache. If the parameter
810 optimize_buff_size true then this value does not exceed the size of the
811 space needed for the estimated number of records 'max_records' in the
812 partial join that joins tables from the first one through join_tab. This
813 value is also capped off by the value of join_tab->join_buffer_size_limit,
814 if it has been set a to non-zero value, and by the value of the system
815 parameter join_buffer_size - otherwise. After the calculation of the
816 interesting size the function saves the value in the field 'max_buff_size'
817 in order to use it directly at the next invocations of the function.
818
819 NOTES
820 Currently the value of join_tab->join_buffer_size_limit is initialized
821 to 0 and is never reset.
822
823 RETURN VALUE
824 The maximum possible size of the join buffer of this cache
825*/
826
827size_t JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size)
828{
829 if (!max_buff_size)
830 {
831 size_t max_sz;
832 size_t min_sz= get_min_join_buffer_size();
833 size_t len= 0;
834 for (JOIN_TAB *tab= start_tab; tab != join_tab;
835 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
836 {
837 len+= tab->get_used_fieldlength();
838 }
839 len+= get_record_max_affix_length();
840 avg_record_length= len;
841 len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr;
842 space_per_record= len;
843
844 size_t limit_sz= (size_t)join->thd->variables.join_buff_size;
845 if (join_tab->join_buffer_size_limit)
846 set_if_smaller(limit_sz, join_tab->join_buffer_size_limit);
847 if (!optimize_buff_size)
848 max_sz= limit_sz;
849 else
850 {
851 if (limit_sz / max_records > space_per_record)
852 max_sz= space_per_record * max_records;
853 else
854 max_sz= limit_sz;
855 max_sz+= pack_length_with_blob_ptrs;
856 set_if_smaller(max_sz, limit_sz);
857 }
858 set_if_bigger(max_sz, min_sz);
859 max_buff_size= max_sz;
860 }
861 return max_buff_size;
862}
863
864
865/*
866 Allocate memory for a join buffer
867
868 SYNOPSIS
869 alloc_buffer()
870
871 DESCRIPTION
872 The function allocates a lump of memory for the cache join buffer.
873 Initially the function sets the size of the buffer buff_size equal to
874 the value returned by get_max_join_buffer_size(). If the total size of
875 the space intended to be used for the join buffers employed by the
876 tables from the first one through join_tab exceeds the value of the
877 system parameter join_buff_space_limit, then the function first tries
878 to shrink the used buffers to make the occupied space fit the maximum
879 memory allowed to be used for all join buffers in total. After
880 this the function tries to allocate a join buffer for join_tab.
881 If it fails to do so, it decrements the requested size of the join
882 buffer, shrinks proportionally the join buffers used for the previous
883 tables and tries to allocate a buffer for join_tab. In the case of a
884 failure the function repeats its attempts with smaller and smaller
885 requested sizes of the buffer, but not more than 4 times.
886
887 RETURN VALUE
888 0 if the memory has been successfully allocated
889 1 otherwise
890*/
891
892int JOIN_CACHE::alloc_buffer()
893{
894 JOIN_TAB *tab;
895 JOIN_CACHE *cache;
896 ulonglong curr_buff_space_sz= 0;
897 ulonglong curr_min_buff_space_sz= 0;
898 ulonglong join_buff_space_limit=
899 join->thd->variables.join_buff_space_limit;
900 bool optimize_buff_size=
901 optimizer_flag(join->thd, OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE);
902 double partial_join_cardinality= (join_tab-1)->get_partial_join_cardinality();
903 buff= NULL;
904 min_buff_size= 0;
905 max_buff_size= 0;
906 min_records= 1;
907 max_records= (size_t) (partial_join_cardinality <= join_buff_space_limit ?
908 (ulonglong) partial_join_cardinality : join_buff_space_limit);
909 set_if_bigger(max_records, 10);
910 min_buff_size= get_min_join_buffer_size();
911 buff_size= get_max_join_buffer_size(optimize_buff_size);
912
913 for (tab= start_tab; tab!= join_tab;
914 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
915 {
916 cache= tab->cache;
917 if (cache)
918 {
919 curr_min_buff_space_sz+= cache->get_min_join_buffer_size();
920 curr_buff_space_sz+= cache->get_join_buffer_size();
921 }
922 }
923 curr_min_buff_space_sz+= min_buff_size;
924 curr_buff_space_sz+= buff_size;
925
926 if (curr_min_buff_space_sz > join_buff_space_limit ||
927 (curr_buff_space_sz > join_buff_space_limit &&
928 (!optimize_buff_size ||
929 join->shrink_join_buffers(join_tab, curr_buff_space_sz,
930 join_buff_space_limit))))
931 goto fail;
932
933 if (for_explain_only)
934 return 0;
935
936 for (size_t buff_size_decr= (buff_size-min_buff_size)/4 + 1; ; )
937 {
938 size_t next_buff_size;
939
940 if ((buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC))))
941 break;
942
943 next_buff_size= buff_size > buff_size_decr ? buff_size-buff_size_decr : 0;
944 if (next_buff_size < min_buff_size ||
945 join->shrink_join_buffers(join_tab, curr_buff_space_sz,
946 curr_buff_space_sz-buff_size_decr))
947 goto fail;
948 buff_size= next_buff_size;
949
950 curr_buff_space_sz= 0;
951 for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++)
952 {
953 cache= tab->cache;
954 if (cache)
955 curr_buff_space_sz+= cache->get_join_buffer_size();
956 }
957 }
958 return 0;
959
960fail:
961 buff_size= 0;
962 return 1;
963}
964
965
966/*
967 Shrink the size if the cache join buffer in a given ratio
968
969 SYNOPSIS
970 shrink_join_buffer_in_ratio()
971 n nominator of the ratio to shrink the buffer in
972 d denominator if the ratio
973
974 DESCRIPTION
975 The function first deallocates the join buffer of the cache. Then
976 it allocates a buffer that is (n/d) times smaller.
977
978 RETURN VALUE
979 FALSE on success with allocation of the smaller join buffer
980 TRUE otherwise
981*/
982
983bool JOIN_CACHE::shrink_join_buffer_in_ratio(ulonglong n, ulonglong d)
984{
985 size_t next_buff_size;
986 if (n < d)
987 return FALSE;
988 next_buff_size= (size_t) ((double) buff_size / n * d);
989 set_if_bigger(next_buff_size, min_buff_size);
990 buff_size= next_buff_size;
991 return realloc_buffer();
992}
993
994
995/*
996 Reallocate the join buffer of a join cache
997
998 SYNOPSIS
999 realloc_buffer()
1000
1001 DESCRITION
1002 The function reallocates the join buffer of the join cache. After this
1003 it resets the buffer for writing.
1004
1005 NOTES
1006 The function assumes that buff_size contains the new value for the join
1007 buffer size.
1008
1009 RETURN VALUE
1010 0 if the buffer has been successfully reallocated
1011 1 otherwise
1012*/
1013
1014int JOIN_CACHE::realloc_buffer()
1015{
1016 int rc;
1017 free();
1018 rc= MY_TEST(!(buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC))));
1019 reset(TRUE);
1020 return rc;
1021}
1022
1023
1024/*
1025 Initialize a join cache
1026
1027 SYNOPSIS
1028 init()
1029 for_explain join buffer is initialized for explain only
1030
1031 DESCRIPTION
1032 The function initializes the join cache structure. It supposed to be called
1033 by init methods for classes derived from the JOIN_CACHE.
1034 The function allocates memory for the join buffer and for descriptors of
1035 the record fields stored in the buffer.
1036
1037 NOTES
1038 The code of this function should have been included into the constructor
1039 code itself. However the new operator for the class JOIN_CACHE would
1040 never fail while memory allocation for the join buffer is not absolutely
1041 unlikely to fail. That's why this memory allocation has to be placed in a
1042 separate function that is called in a couple with a cache constructor.
1043 It is quite natural to put almost all other constructor actions into
1044 this function.
1045
1046 RETURN VALUE
1047 0 initialization with buffer allocations has been succeeded
1048 1 otherwise
1049*/
1050
1051int JOIN_CACHE::init(bool for_explain)
1052{
1053 DBUG_ENTER("JOIN_CACHE::init");
1054
1055 for_explain_only= for_explain;
1056
1057 calc_record_fields();
1058
1059 collect_info_on_key_args();
1060
1061 if (alloc_fields())
1062 DBUG_RETURN(1);
1063
1064 create_flag_fields();
1065
1066 create_key_arg_fields();
1067
1068 create_remaining_fields();
1069
1070 set_constants();
1071
1072 if (alloc_buffer())
1073 DBUG_RETURN(1);
1074
1075 reset(TRUE);
1076
1077 DBUG_RETURN(0);
1078}
1079
1080
1081/*
1082 Check the possibility to read the access keys directly from the join buffer
1083 SYNOPSIS
1084 check_emb_key_usage()
1085
1086 DESCRIPTION
1087 The function checks some conditions at which the key values can be read
1088 directly from the join buffer. This is possible when the key values can be
1089 composed by concatenation of the record fields stored in the join buffer.
1090 Sometimes when the access key is multi-component the function has to re-order
1091 the fields written into the join buffer to make keys embedded. If key
1092 values for the key access are detected as embedded then 'use_emb_key'
1093 is set to TRUE.
1094
1095 EXAMPLE
1096 Let table t2 has an index defined on the columns a,b . Let's assume also
1097 that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all
1098 of the integer type. Then if the query
1099 SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b
1100 is executed with a join cache in such a way that t1 is the driving
1101 table then the key values to access table t2 can be read directly
1102 from the join buffer.
1103
1104 NOTES
1105 In some cases key values could be read directly from the join buffer but
1106 we still do not consider them embedded. In the future we'll expand the
1107 the class of keys which we identify as embedded.
1108
1109 NOTES
1110 The function returns FALSE if no key is used to join the records
1111 from join_tab.
1112
1113 RETURN VALUE
1114 TRUE key values will be considered as embedded,
1115 FALSE otherwise.
1116*/
1117
1118bool JOIN_CACHE::check_emb_key_usage()
1119{
1120
1121 if (!is_key_access())
1122 return FALSE;
1123
1124 uint i;
1125 Item *item;
1126 KEY_PART_INFO *key_part;
1127 CACHE_FIELD *copy;
1128 CACHE_FIELD *copy_end;
1129 uint len= 0;
1130 TABLE_REF *ref= &join_tab->ref;
1131 KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key);
1132
1133 /*
1134 If some of the key arguments are not from the local cache the key
1135 is not considered as embedded.
1136 TODO:
1137 Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0.
1138 */
1139 if (external_key_arg_fields != 0)
1140 return FALSE;
1141 /*
1142 If the number of the local key arguments is not equal to the number
1143 of key parts the key value cannot be read directly from the join buffer.
1144 */
1145 if (local_key_arg_fields != ref->key_parts)
1146 return FALSE;
1147
1148 /*
1149 A key is not considered embedded if one of the following is true:
1150 - one of its key parts is not equal to a field
1151 - it is a partial key
1152 - definition of the argument field does not coincide with the
1153 definition of the corresponding key component
1154 - some of the key components are nullable
1155 */
1156 for (i=0; i < ref->key_parts; i++)
1157 {
1158 item= ref->items[i]->real_item();
1159 if (item->type() != Item::FIELD_ITEM)
1160 return FALSE;
1161 key_part= keyinfo->key_part+i;
1162 if (key_part->key_part_flag & HA_PART_KEY_SEG)
1163 return FALSE;
1164 if (!key_part->field->eq_def(((Item_field *) item)->field))
1165 return FALSE;
1166 if (key_part->field->maybe_null())
1167 return FALSE;
1168 }
1169
1170 copy= field_descr+flag_fields;
1171 copy_end= copy+local_key_arg_fields;
1172 for ( ; copy < copy_end; copy++)
1173 {
1174 /*
1175 If some of the key arguments are of variable length the key
1176 is not considered as embedded.
1177 */
1178 if (copy->type != 0)
1179 return FALSE;
1180 /*
1181 If some of the key arguments are bit fields whose bits are partially
1182 stored with null bits the key is not considered as embedded.
1183 */
1184 if (copy->field->type() == MYSQL_TYPE_BIT &&
1185 ((Field_bit*) (copy->field))->bit_len)
1186 return FALSE;
1187 len+= copy->length;
1188 }
1189
1190 emb_key_length= len;
1191
1192 /*
1193 Make sure that key fields follow the order of the corresponding
1194 key components these fields are equal to. For this the descriptors
1195 of the fields that comprise the key might be re-ordered.
1196 */
1197 for (i= 0; i < ref->key_parts; i++)
1198 {
1199 uint j;
1200 Item *item= ref->items[i]->real_item();
1201 Field *fld= ((Item_field *) item)->field;
1202 CACHE_FIELD *init_copy= field_descr+flag_fields+i;
1203 for (j= i, copy= init_copy; i < local_key_arg_fields; i++, copy++)
1204 {
1205 if (fld->eq(copy->field))
1206 {
1207 if (j != i)
1208 {
1209 CACHE_FIELD key_part_copy= *copy;
1210 *copy= *init_copy;
1211 *init_copy= key_part_copy;
1212 }
1213 break;
1214 }
1215 }
1216 }
1217
1218 return TRUE;
1219}
1220
1221
1222/*
1223 Write record fields and their required offsets into the join cache buffer
1224
1225 SYNOPSIS
1226 write_record_data()
1227 link a reference to the associated info in the previous cache
1228 is_full OUT true if it has been decided that no more records will be
1229 added to the join buffer
1230
1231 DESCRIPTION
1232 This function put into the cache buffer the following info that it reads
1233 from the join record buffers or computes somehow:
1234 (1) the length of all fields written for the record (optional)
1235 (2) an offset to the associated info in the previous cache (if there is any)
1236 determined by the link parameter
1237 (3) all flag fields of the tables whose data field are put into the cache:
1238 - match flag (optional),
1239 - null bitmaps for all tables,
1240 - null row flags for all tables
1241 (4) values of all data fields including
1242 - full images of those fixed legth data fields that cannot have
1243 trailing spaces
1244 - significant part of fixed length fields that can have trailing spaces
1245 with the prepanded length
1246 - data of non-blob variable length fields with the prepanded data length
1247 - blob data from blob fields with the prepanded data length
1248 (5) record offset values for the data fields that are referred to from
1249 other caches
1250
1251 The record is written at the current position stored in the field 'pos'.
1252 At the end of the function 'pos' points at the position right after the
1253 written record data.
1254 The function increments the number of records in the cache that is stored
1255 in the 'records' field by 1. The function also modifies the values of
1256 'curr_rec_pos' and 'last_rec_pos' to point to the written record.
1257 The 'end_pos' cursor is modified accordingly.
1258 The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data
1259 remains in the record buffers and not copied to the join buffer. It may
1260 happen only to the blob data from the last record added into the cache.
1261 If on_precond is attached to join_tab and it is not evaluated to TRUE
1262 then MATCH_IMPOSSIBLE is placed in the match flag field of the record
1263 written into the join buffer.
1264
1265 RETURN VALUE
1266 length of the written record data
1267*/
1268
1269uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
1270{
1271 uint len;
1272 bool last_record;
1273 CACHE_FIELD *copy;
1274 CACHE_FIELD *copy_end;
1275 uchar *flags_pos;
1276 uchar *cp= pos;
1277 uchar *init_pos= cp;
1278 uchar *rec_len_ptr= 0;
1279 uint key_extra= extra_key_length();
1280
1281 records++; /* Increment the counter of records in the cache */
1282
1283 len= pack_length + key_extra;
1284
1285 /* Make an adjustment for the size of the auxiliary buffer if there is any */
1286 uint incr= aux_buffer_incr(records);
1287 size_t rem= rem_space();
1288 aux_buff_size+= len+incr < rem ? incr : rem;
1289
1290 /*
1291 For each blob to be put into cache save its length and a pointer
1292 to the value in the corresponding element of the blob_ptr array.
1293 Blobs with null values are skipped.
1294 Increment 'len' by the total length of all these blobs.
1295 */
1296 if (blobs)
1297 {
1298 CACHE_FIELD **copy_ptr= blob_ptr;
1299 CACHE_FIELD **copy_ptr_end= copy_ptr+blobs;
1300 for ( ; copy_ptr < copy_ptr_end; copy_ptr++)
1301 {
1302 Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field;
1303 if (!blob_field->is_null())
1304 {
1305 uint blob_len= blob_field->get_length();
1306 (*copy_ptr)->blob_length= blob_len;
1307 len+= blob_len;
1308 (*copy_ptr)->str= blob_field->get_ptr();
1309 }
1310 }
1311 }
1312
1313 /*
1314 Check whether we won't be able to add any new record into the cache after
1315 this one because the cache will be full. Set last_record to TRUE if it's so.
1316 The assume that the cache will be full after the record has been written
1317 into it if either the remaining space of the cache is not big enough for the
1318 record's blob values or if there is a chance that not all non-blob fields
1319 of the next record can be placed there.
1320 This function is called only in the case when there is enough space left in
1321 the cache to store at least non-blob parts of the current record.
1322 */
1323 last_record= (len+pack_length_with_blob_ptrs+key_extra) > rem_space();
1324
1325 /*
1326 Save the position for the length of the record in the cache if it's needed.
1327 The length of the record will be inserted here when all fields of the record
1328 are put into the cache.
1329 */
1330 if (with_length)
1331 {
1332 rec_len_ptr= cp;
1333 DBUG_ASSERT(cp + size_of_rec_len <= buff + buff_size);
1334 cp+= size_of_rec_len;
1335 }
1336
1337 /*
1338 Put a reference to the fields of the record that are stored in the previous
1339 cache if there is any. This reference is passed by the 'link' parameter.
1340 */
1341 if (prev_cache)
1342 {
1343 DBUG_ASSERT(cp + prev_cache->get_size_of_rec_offset() <= buff + buff_size);
1344 cp+= prev_cache->get_size_of_rec_offset();
1345 prev_cache->store_rec_ref(cp, link);
1346 }
1347
1348 curr_rec_pos= cp;
1349
1350 /* If the there is a match flag set its value to 0 */
1351 copy= field_descr;
1352 if (with_match_flag)
1353 *copy[0].str= 0;
1354
1355 /* First put into the cache the values of all flag fields */
1356 copy_end= field_descr+flag_fields;
1357 flags_pos= cp;
1358 for ( ; copy < copy_end; copy++)
1359 {
1360 DBUG_ASSERT(cp + copy->length <= buff + buff_size);
1361 memcpy(cp, copy->str, copy->length);
1362 cp+= copy->length;
1363 }
1364
1365 /* Now put the values of the remaining fields as soon as they are not nulls */
1366 copy_end= field_descr+fields;
1367 for ( ; copy < copy_end; copy++)
1368 {
1369 Field *field= copy->field;
1370 if (field && field->maybe_null() && field->is_null())
1371 {
1372 if (copy->referenced_field_no)
1373 copy->offset= 0;
1374 continue;
1375 }
1376 /* Save the offset of the field to put it later at the end of the record */
1377 if (copy->referenced_field_no)
1378 copy->offset= (uint)(cp-curr_rec_pos);
1379
1380 switch (copy->type) {
1381 case CACHE_BLOB:
1382 {
1383 Field_blob *blob_field= (Field_blob *) copy->field;
1384 if (last_record)
1385 {
1386 last_rec_blob_data_is_in_rec_buff= 1;
1387 /* Put down the length of the blob and the pointer to the data */
1388 DBUG_ASSERT(cp + copy->length + sizeof(char*) <= buff + buff_size);
1389 blob_field->get_image(cp, copy->length+sizeof(char*),
1390 blob_field->charset());
1391 cp+= copy->length+sizeof(char*);
1392 }
1393 else
1394 {
1395 /* First put down the length of the blob and then copy the data */
1396 blob_field->get_image(cp, copy->length,
1397 blob_field->charset());
1398 DBUG_ASSERT(cp + copy->length + copy->blob_length <= buff + buff_size);
1399 memcpy(cp+copy->length, copy->str, copy->blob_length);
1400 cp+= copy->length+copy->blob_length;
1401 }
1402 break;
1403 }
1404 case CACHE_VARSTR1:
1405 /* Copy the significant part of the short varstring field */
1406 len= (uint) copy->str[0] + 1;
1407 DBUG_ASSERT(cp + len <= buff + buff_size);
1408 memcpy(cp, copy->str, len);
1409 cp+= len;
1410 break;
1411 case CACHE_VARSTR2:
1412 /* Copy the significant part of the long varstring field */
1413 len= uint2korr(copy->str) + 2;
1414 DBUG_ASSERT(cp + len <= buff + buff_size);
1415 memcpy(cp, copy->str, len);
1416 cp+= len;
1417 break;
1418 case CACHE_STRIPPED:
1419 {
1420 /*
1421 Put down the field value stripping all trailing spaces off.
1422 After this insert the length of the written sequence of bytes.
1423 */
1424 uchar *str, *end;
1425 for (str= copy->str, end= str+copy->length;
1426 end > str && end[-1] == ' ';
1427 end--) ;
1428 len=(uint) (end-str);
1429 DBUG_ASSERT(cp + len + 2 <= buff + buff_size);
1430 int2store(cp, len);
1431 memcpy(cp+2, str, len);
1432 cp+= len+2;
1433 break;
1434 }
1435 case CACHE_ROWID:
1436 if (!copy->length)
1437 {
1438 /*
1439 This may happen only for ROWID fields of materialized
1440 derived tables and views.
1441 */
1442 TABLE *table= (TABLE *) copy->str;
1443 copy->str= table->file->ref;
1444 copy->length= table->file->ref_length;
1445 if (!copy->str)
1446 {
1447 /*
1448 If table is an empty inner table of an outer join and it is
1449 a materialized derived table then table->file->ref == NULL.
1450 */
1451 cp+= copy->length;
1452 break;
1453 }
1454 }
1455 /* fall through */
1456 default:
1457 /* Copy the entire image of the field from the record buffer */
1458 DBUG_ASSERT(cp + copy->length <= buff + buff_size);
1459 if (copy->str)
1460 memcpy(cp, copy->str, copy->length);
1461 cp+= copy->length;
1462 }
1463 }
1464
1465 /* Add the offsets of the fields that are referenced from other caches */
1466 if (referenced_fields)
1467 {
1468 uint cnt= 0;
1469 for (copy= field_descr+flag_fields; copy < copy_end ; copy++)
1470 {
1471 if (copy->referenced_field_no)
1472 {
1473 store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1),
1474 copy->offset);
1475 cnt++;
1476 }
1477 }
1478 DBUG_ASSERT(cp + size_of_fld_ofs*cnt <= buff + buff_size);
1479 cp+= size_of_fld_ofs*cnt;
1480 }
1481
1482 if (rec_len_ptr)
1483 store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len));
1484 last_rec_pos= curr_rec_pos;
1485 end_pos= pos= cp;
1486 *is_full= last_record;
1487
1488 last_written_is_null_compl= 0;
1489 if (!join_tab->first_unmatched && join_tab->on_precond)
1490 {
1491 join_tab->found= 0;
1492 join_tab->not_null_compl= 1;
1493 if (!join_tab->on_precond->val_int())
1494 {
1495 flags_pos[0]= MATCH_IMPOSSIBLE;
1496 last_written_is_null_compl= 1;
1497 }
1498 }
1499
1500 return (uint) (cp-init_pos);
1501}
1502
1503
1504/*
1505 Reset the join buffer for reading/writing: default implementation
1506
1507 SYNOPSIS
1508 reset()
1509 for_writing if it's TRUE the function reset the buffer for writing
1510
1511 DESCRIPTION
1512 This default implementation of the virtual function reset() resets
1513 the join buffer for reading or writing.
1514 If the buffer is reset for reading only the 'pos' value is reset
1515 to point to the very beginning of the join buffer. If the buffer is
1516 reset for writing additionally:
1517 - the counter of the records in the buffer is set to 0,
1518 - the the value of 'last_rec_pos' gets pointing at the position just
1519 before the buffer,
1520 - 'end_pos' is set to point to the beginning of the join buffer,
1521 - the size of the auxiliary buffer is reset to 0,
1522 - the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0.
1523
1524 RETURN VALUE
1525 none
1526*/
1527void JOIN_CACHE::reset(bool for_writing)
1528{
1529 pos= buff;
1530 curr_rec_link= 0;
1531 if (for_writing)
1532 {
1533 records= 0;
1534 last_rec_pos= buff;
1535 aux_buff_size= 0;
1536 end_pos= pos;
1537 last_rec_blob_data_is_in_rec_buff= 0;
1538 }
1539}
1540
1541
1542/*
1543 Add a record into the join buffer: the default implementation
1544
1545 SYNOPSIS
1546 put_record()
1547
1548 DESCRIPTION
1549 This default implementation of the virtual function put_record writes
1550 the next matching record into the join buffer.
1551 It also links the record having been written into the join buffer with
1552 the matched record in the previous cache if there is any.
1553 The implementation assumes that the function get_curr_link()
1554 will return exactly the pointer to this matched record.
1555
1556 RETURN VALUE
1557 TRUE if it has been decided that it should be the last record
1558 in the join buffer,
1559 FALSE otherwise
1560*/
1561
1562bool JOIN_CACHE::put_record()
1563{
1564 bool is_full;
1565 uchar *link= 0;
1566 if (prev_cache)
1567 link= prev_cache->get_curr_rec_link();
1568 write_record_data(link, &is_full);
1569 return is_full;
1570}
1571
1572
1573/*
1574 Read the next record from the join buffer: the default implementation
1575
1576 SYNOPSIS
1577 get_record()
1578
1579 DESCRIPTION
1580 This default implementation of the virtual function get_record
1581 reads fields of the next record from the join buffer of this cache.
1582 The function also reads all other fields associated with this record
1583 from the the join buffers of the previous caches. The fields are read
1584 into the corresponding record buffers.
1585 It is supposed that 'pos' points to the position in the buffer
1586 right after the previous record when the function is called.
1587 When the function returns the 'pos' values is updated to point
1588 to the position after the read record.
1589 The value of 'curr_rec_pos' is also updated by the function to
1590 point to the beginning of the first field of the record in the
1591 join buffer.
1592
1593 RETURN VALUE
1594 TRUE there are no more records to read from the join buffer
1595 FALSE otherwise
1596*/
1597
1598bool JOIN_CACHE::get_record()
1599{
1600 bool res;
1601 uchar *prev_rec_ptr= 0;
1602 if (with_length)
1603 pos+= size_of_rec_len;
1604 if (prev_cache)
1605 {
1606 pos+= prev_cache->get_size_of_rec_offset();
1607 prev_rec_ptr= prev_cache->get_rec_ref(pos);
1608 }
1609 curr_rec_pos= pos;
1610 if (!(res= read_all_record_fields() == NO_MORE_RECORDS_IN_BUFFER))
1611 {
1612 pos+= referenced_fields*size_of_fld_ofs;
1613 if (prev_cache)
1614 prev_cache->get_record_by_pos(prev_rec_ptr);
1615 }
1616 return res;
1617}
1618
1619
1620/*
1621 Read a positioned record from the join buffer: the default implementation
1622
1623 SYNOPSIS
1624 get_record_by_pos()
1625 rec_ptr position of the first field of the record in the join buffer
1626
1627 DESCRIPTION
1628 This default implementation of the virtual function get_record_pos
1629 reads the fields of the record positioned at 'rec_ptr' from the join buffer.
1630 The function also reads all other fields associated with this record
1631 from the the join buffers of the previous caches. The fields are read
1632 into the corresponding record buffers.
1633
1634 RETURN VALUE
1635 none
1636*/
1637
1638void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
1639{
1640 uchar *save_pos= pos;
1641 pos= rec_ptr;
1642 read_all_record_fields();
1643 pos= save_pos;
1644 if (prev_cache)
1645 {
1646 uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1647 prev_cache->get_record_by_pos(prev_rec_ptr);
1648 }
1649}
1650
1651
1652/*
1653 Get the match flag from the referenced record: the default implementation
1654
1655 SYNOPSIS
1656 get_match_flag_by_pos()
1657 rec_ptr position of the first field of the record in the join buffer
1658
1659 DESCRIPTION
1660 This default implementation of the virtual function get_match_flag_by_pos
1661 get the match flag for the record pointed by the reference at the position
1662 rec_ptr. If the match flag is placed in one of the previous buffers the
1663 function first reaches the linked record fields in this buffer.
1664
1665 RETURN VALUE
1666 match flag for the record at the position rec_ptr
1667*/
1668
1669enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
1670{
1671 Match_flag match_fl= MATCH_NOT_FOUND;
1672 if (with_match_flag)
1673 {
1674 match_fl= (enum Match_flag) rec_ptr[0];
1675 return match_fl;
1676 }
1677 if (prev_cache)
1678 {
1679 uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1680 return prev_cache->get_match_flag_by_pos(prev_rec_ptr);
1681 }
1682 DBUG_ASSERT(0);
1683 return match_fl;
1684}
1685
1686
1687/*
1688 Calculate the increment of the auxiliary buffer for a record write
1689
1690 SYNOPSIS
1691 aux_buffer_incr()
1692 recno the number of the record the increment to be calculated for
1693
1694 DESCRIPTION
1695 This function calls the aux_buffer_incr the method of the
1696 companion member join_tab_scan to calculate the growth of the
1697 auxiliary buffer when the recno-th record is added to the
1698 join_buffer of this cache.
1699
1700 RETURN VALUE
1701 the number of bytes in the increment
1702*/
1703
1704uint JOIN_CACHE::aux_buffer_incr(size_t recno)
1705{
1706 return join_tab_scan->aux_buffer_incr(recno);
1707}
1708
1709/*
1710 Read all flag and data fields of a record from the join buffer
1711
1712 SYNOPSIS
1713 read_all_record_fields()
1714
1715 DESCRIPTION
1716 The function reads all flag and data fields of a record from the join
1717 buffer into the corresponding record buffers.
1718 The fields are read starting from the position 'pos' which is
1719 supposed to point to the beginning of the first record field.
1720 The function increments the value of 'pos' by the length of the
1721 read data.
1722
1723 RETURN VALUE
1724 (-1) if there is no more records in the join buffer
1725 length of the data read from the join buffer - otherwise
1726*/
1727
1728uint JOIN_CACHE::read_all_record_fields()
1729{
1730 uchar *init_pos= pos;
1731
1732 if (pos > last_rec_pos || !records)
1733 return NO_MORE_RECORDS_IN_BUFFER;
1734
1735 /* First match flag, read null bitmaps and null_row flag for each table */
1736 read_flag_fields();
1737
1738 /* Now read the remaining table fields if needed */
1739 CACHE_FIELD *copy= field_descr+flag_fields;
1740 CACHE_FIELD *copy_end= field_descr+fields;
1741 bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos);
1742 for ( ; copy < copy_end; copy++)
1743 read_record_field(copy, blob_in_rec_buff);
1744
1745 return (uint) (pos-init_pos);
1746}
1747
1748
1749/*
1750 Read all flag fields of a record from the join buffer
1751
1752 SYNOPSIS
1753 read_flag_fields()
1754
1755 DESCRIPTION
1756 The function reads all flag fields of a record from the join
1757 buffer into the corresponding record buffers.
1758 The fields are read starting from the position 'pos'.
1759 The function increments the value of 'pos' by the length of the
1760 read data.
1761
1762 RETURN VALUE
1763 length of the data read from the join buffer
1764*/
1765
1766uint JOIN_CACHE::read_flag_fields()
1767{
1768 uchar *init_pos= pos;
1769 CACHE_FIELD *copy= field_descr;
1770 CACHE_FIELD *copy_end= copy+flag_fields;
1771 if (with_match_flag)
1772 {
1773 copy->str[0]= MY_TEST((Match_flag) pos[0] == MATCH_FOUND);
1774 pos+= copy->length;
1775 copy++;
1776 }
1777 for ( ; copy < copy_end; copy++)
1778 {
1779 memcpy(copy->str, pos, copy->length);
1780 pos+= copy->length;
1781 }
1782 return (uint)(pos-init_pos);
1783}
1784
1785
1786/*
1787 Read a data record field from the join buffer
1788
1789 SYNOPSIS
1790 read_record_field()
1791 copy the descriptor of the data field to be read
1792 blob_in_rec_buff indicates whether this is the field from the record
1793 whose blob data are in record buffers
1794
1795 DESCRIPTION
1796 The function reads the data field specified by the parameter copy
1797 from the join buffer into the corresponding record buffer.
1798 The field is read starting from the position 'pos'.
1799 The data of blob values is not copied from the join buffer.
1800 The function increments the value of 'pos' by the length of the
1801 read data.
1802
1803 RETURN VALUE
1804 length of the data read from the join buffer
1805*/
1806
1807uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff)
1808{
1809 uint len;
1810 /* Do not copy the field if its value is null */
1811 if (copy->field && copy->field->maybe_null() && copy->field->is_null())
1812 return 0;
1813 switch (copy->type) {
1814 case CACHE_BLOB:
1815 {
1816 Field_blob *blob_field= (Field_blob *) copy->field;
1817 /*
1818 Copy the length and the pointer to data but not the blob data
1819 itself to the record buffer
1820 */
1821 if (blob_in_rec_buff)
1822 {
1823 blob_field->set_image(pos, copy->length + sizeof(char*),
1824 blob_field->charset());
1825 len= copy->length + sizeof(char*);
1826 }
1827 else
1828 {
1829 blob_field->set_ptr(pos, pos+copy->length);
1830 len= copy->length + blob_field->get_length();
1831 }
1832 }
1833 break;
1834 case CACHE_VARSTR1:
1835 /* Copy the significant part of the short varstring field */
1836 len= (uint) pos[0] + 1;
1837 memcpy(copy->str, pos, len);
1838 break;
1839 case CACHE_VARSTR2:
1840 /* Copy the significant part of the long varstring field */
1841 len= uint2korr(pos) + 2;
1842 memcpy(copy->str, pos, len);
1843 break;
1844 case CACHE_STRIPPED:
1845 /* Pad the value by spaces that has been stripped off */
1846 len= uint2korr(pos);
1847 memcpy(copy->str, pos+2, len);
1848 memset(copy->str+len, ' ', copy->length-len);
1849 len+= 2;
1850 break;
1851 case CACHE_ROWID:
1852 if (!copy->str)
1853 {
1854 len= copy->length;
1855 break;
1856 }
1857 /* fall through */
1858 default:
1859 /* Copy the entire image of the field from the record buffer */
1860 len= copy->length;
1861 memcpy(copy->str, pos, len);
1862 }
1863 pos+= len;
1864 return len;
1865}
1866
1867
1868/*
1869 Read a referenced field from the join buffer
1870
1871 SYNOPSIS
1872 read_referenced_field()
1873 copy pointer to the descriptor of the referenced field
1874 rec_ptr pointer to the record that may contain this field
1875 len IN/OUT total length of the record fields
1876
1877 DESCRIPTION
1878 The function checks whether copy points to a data field descriptor
1879 for this cache object. If it does not then the function returns
1880 FALSE. Otherwise the function reads the field of the record in
1881 the join buffer pointed by 'rec_ptr' into the corresponding record
1882 buffer and returns TRUE.
1883 If the value of *len is 0 then the function sets it to the total
1884 length of the record fields including possible trailing offset
1885 values. Otherwise *len is supposed to provide this value that
1886 has been obtained earlier.
1887
1888 NOTE
1889 If the value of the referenced field is null then the offset
1890 for the value is set to 0. If the value of a field can be null
1891 then the value of flag_fields is always positive. So the offset
1892 for any non-null value cannot be 0 in this case.
1893
1894 RETURN VALUE
1895 TRUE 'copy' points to a data descriptor of this join cache
1896 FALSE otherwise
1897*/
1898
1899bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
1900 uchar *rec_ptr,
1901 uint *len)
1902{
1903 uchar *ptr;
1904 uint offset;
1905 if (copy < field_descr || copy >= field_descr+fields)
1906 return FALSE;
1907 if (!*len)
1908 {
1909 /* Get the total length of the record fields */
1910 uchar *len_ptr= rec_ptr;
1911 if (prev_cache)
1912 len_ptr-= prev_cache->get_size_of_rec_offset();
1913 *len= get_rec_length(len_ptr-size_of_rec_len);
1914 }
1915
1916 ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
1917 offset= get_fld_offset(ptr+ *len -
1918 size_of_fld_ofs*
1919 (referenced_fields+1-copy->referenced_field_no));
1920 bool is_null= FALSE;
1921 Field *field= copy->field;
1922 if (offset == 0 && flag_fields)
1923 is_null= TRUE;
1924 if (is_null)
1925 {
1926 field->set_null();
1927 if (!field->real_maybe_null())
1928 field->table->null_row= 1;
1929 }
1930 else
1931 {
1932 uchar *save_pos= pos;
1933 field->set_notnull();
1934 if (!field->real_maybe_null())
1935 field->table->null_row= 0;
1936 pos= rec_ptr+offset;
1937 read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr));
1938 pos= save_pos;
1939 }
1940 return TRUE;
1941}
1942
1943
1944/*
1945 Skip record from join buffer if's already matched: default implementation
1946
1947 SYNOPSIS
1948 skip_if_matched()
1949
1950 DESCRIPTION
1951 This default implementation of the virtual function skip_if_matched
1952 skips the next record from the join buffer if its match flag is set to
1953 MATCH_FOUND.
1954 If the record is skipped the value of 'pos' is set to point to the position
1955 right after the record.
1956
1957 RETURN VALUE
1958 TRUE the match flag is set to MATCH_FOUND and the record has been skipped
1959 FALSE otherwise
1960*/
1961
1962bool JOIN_CACHE::skip_if_matched()
1963{
1964 DBUG_ASSERT(with_length);
1965 uint offset= size_of_rec_len;
1966 if (prev_cache)
1967 offset+= prev_cache->get_size_of_rec_offset();
1968 /* Check whether the match flag is MATCH_FOUND */
1969 if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND)
1970 {
1971 pos+= size_of_rec_len + get_rec_length(pos);
1972 return TRUE;
1973 }
1974 return FALSE;
1975}
1976
1977
1978/*
1979 Skip record from join buffer if the match isn't needed: default implementation
1980
1981 SYNOPSIS
1982 skip_if_not_needed_match()
1983
1984 DESCRIPTION
1985 This default implementation of the virtual function skip_if_not_needed_match
1986 skips the next record from the join buffer if its match flag is not
1987 MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the
1988 first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE
1989 and join_tab is the first inner table of an outer join.
1990 If the record is skipped the value of 'pos' is set to point to the position
1991 right after the record.
1992
1993 RETURN VALUE
1994 TRUE the record has to be skipped
1995 FALSE otherwise
1996*/
1997
1998bool JOIN_CACHE::skip_if_not_needed_match()
1999{
2000 DBUG_ASSERT(with_length);
2001 enum Match_flag match_fl;
2002 uint offset= size_of_rec_len;
2003 if (prev_cache)
2004 offset+= prev_cache->get_size_of_rec_offset();
2005
2006 if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND &&
2007 (join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) )
2008 {
2009 pos+= size_of_rec_len + get_rec_length(pos);
2010 return TRUE;
2011 }
2012 return FALSE;
2013}
2014
2015
2016/*
2017 Restore the fields of the last record from the join buffer
2018
2019 SYNOPSIS
2020 restore_last_record()
2021
2022 DESCRIPTION
2023 This function restore the values of the fields of the last record put
2024 into join buffer in record buffers. The values most probably have been
2025 overwritten by the field values from other records when they were read
2026 from the join buffer into the record buffer in order to check pushdown
2027 predicates.
2028
2029 RETURN
2030 none
2031*/
2032
2033void JOIN_CACHE::restore_last_record()
2034{
2035 if (records)
2036 get_record_by_pos(last_rec_pos);
2037}
2038
2039
2040/*
2041 Join records from the join buffer with records from the next join table
2042
2043 SYNOPSIS
2044 join_records()
2045 skip_last do not find matches for the last record from the buffer
2046
2047 DESCRIPTION
2048 The functions extends all records from the join buffer by the matched
2049 records from join_tab. In the case of outer join operation it also
2050 adds null complementing extensions for the records from the join buffer
2051 that have no match.
2052 No extensions are generated for the last record from the buffer if
2053 skip_last is true.
2054
2055 NOTES
2056 The function must make sure that if linked join buffers are used then
2057 a join buffer cannot be refilled again until all extensions in the
2058 buffers chained to this one are generated.
2059 Currently an outer join operation with several inner tables always uses
2060 at least two linked buffers with the match join flags placed in the
2061 first buffer. Any record composed of rows of the inner tables that
2062 matches a record in this buffer must refer to the position of the
2063 corresponding match flag.
2064
2065 IMPLEMENTATION
2066 When generating extensions for outer tables of an outer join operation
2067 first we generate all extensions for those records from the join buffer
2068 that have matches, after which null complementing extension for all
2069 unmatched records from the join buffer are generated.
2070
2071 RETURN VALUE
2072 return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
2073*/
2074
2075enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last)
2076{
2077 JOIN_TAB *tab;
2078 enum_nested_loop_state rc= NESTED_LOOP_OK;
2079 bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
2080 DBUG_ENTER("JOIN_CACHE::join_records");
2081
2082 if (outer_join_first_inner && !join_tab->first_unmatched)
2083 join_tab->not_null_compl= TRUE;
2084
2085 if (!join_tab->first_unmatched)
2086 {
2087 /* Find all records from join_tab that match records from join buffer */
2088 rc= join_matching_records(skip_last);
2089 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2090 goto finish;
2091 if (outer_join_first_inner)
2092 {
2093 if (next_cache && join_tab != join_tab->last_inner)
2094 {
2095 /*
2096 Ensure that all matches for outer records from join buffer are to be
2097 found. Now we ensure that all full records are found for records from
2098 join buffer. Generally this is an overkill.
2099 TODO: Ensure that only matches of the inner table records have to be
2100 found for the records from join buffer.
2101 */
2102 rc= next_cache->join_records(skip_last);
2103 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2104 goto finish;
2105 }
2106 join_tab->not_null_compl= FALSE;
2107 /* Prepare for generation of null complementing extensions */
2108 for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
2109 tab->first_unmatched= join_tab->first_inner;
2110 }
2111 }
2112 if (join_tab->first_unmatched)
2113 {
2114 if (is_key_access())
2115 restore_last_record();
2116
2117 /*
2118 Generate all null complementing extensions for the records from
2119 join buffer that don't have any matching rows from the inner tables.
2120 */
2121 reset(FALSE);
2122 rc= join_null_complements(skip_last);
2123 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2124 goto finish;
2125 }
2126 if(next_cache)
2127 {
2128 /*
2129 When using linked caches we must ensure the records in the next caches
2130 that refer to the records in the join buffer are fully extended.
2131 Otherwise we could have references to the records that have been
2132 already erased from the join buffer and replaced for new records.
2133 */
2134 rc= next_cache->join_records(skip_last);
2135 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2136 goto finish;
2137 }
2138
2139 if (skip_last)
2140 {
2141 DBUG_ASSERT(!is_key_access());
2142 /*
2143 Restore the last record from the join buffer to generate
2144 all extentions for it.
2145 */
2146 get_record();
2147 }
2148
2149finish:
2150 if (outer_join_first_inner)
2151 {
2152 /*
2153 All null complemented rows have been already generated for all
2154 outer records from join buffer. Restore the state of the
2155 first_unmatched values to 0 to avoid another null complementing.
2156 */
2157 for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
2158 tab->first_unmatched= 0;
2159 }
2160 restore_last_record();
2161 reset(TRUE);
2162 DBUG_PRINT("exit", ("rc: %d", rc));
2163 DBUG_RETURN(rc);
2164}
2165
2166
2167/*
2168 Find matches from the next table for records from the join buffer
2169
2170 SYNOPSIS
2171 join_matching_records()
2172 skip_last do not look for matches for the last partial join record
2173
2174 DESCRIPTION
2175 The function retrieves rows of the join_tab table and checks whether they
2176 match partial join records from the join buffer. If a match is found
2177 the function will call the sub_select function trying to look for matches
2178 for the remaining join operations.
2179 This function currently is called only from the function join_records.
2180 If the value of skip_last is true the function writes the partial join
2181 record from the record buffer into the join buffer to save its value for
2182 the future processing in the caller function.
2183
2184 NOTES
2185 If employed by BNL or BNLH join algorithms the function performs a full
2186 scan of join_tab for each refill of the join buffer. If BKA or BKAH
2187 algorithms are used then the function iterates only over those records
2188 from join_tab that can be accessed by keys built over records in the join
2189 buffer. To apply a proper method of iteration the function just calls
2190 virtual iterator methods (open, next, close) of the member join_tab_scan.
2191 The member can be either of the JOIN_TAB_SCAN or JOIN_TAB_SCAN_MMR type.
2192 The class JOIN_TAB_SCAN provides the iterator methods for BNL/BNLH join
2193 algorithms. The class JOIN_TAB_SCAN_MRR provides the iterator methods
2194 for BKA/BKAH join algorithms.
2195 When the function looks for records from the join buffer that would
2196 match a record from join_tab it iterates either over all records in
2197 the buffer or only over selected records. If BNL join operation is
2198 performed all records are checked for the match. If BNLH or BKAH
2199 algorithm is employed to join join_tab then the function looks only
2200 through the records with the same join key as the record from join_tab.
2201 With the BKA join algorithm only one record from the join buffer is checked
2202 for a match for any record from join_tab. To iterate over the candidates
2203 for a match the virtual function get_next_candidate_for_match is used,
2204 while the virtual function prepare_look_for_matches is called to prepare
2205 for such iteration process.
2206
2207 NOTES
2208 The function produces all matching extensions for the records in the
2209 join buffer following the path of the employed blocked algorithm.
2210 When an outer join operation is performed all unmatched records from
2211 the join buffer must be extended by null values. The function
2212 'join_null_complements' serves this purpose.
2213
2214 RETURN VALUE
2215 return one of enum_nested_loop_state
2216*/
2217
2218enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last)
2219{
2220 int error;
2221 enum_nested_loop_state rc= NESTED_LOOP_OK;
2222 join_tab->table->null_row= 0;
2223 bool check_only_first_match= join_tab->check_only_first_match();
2224 bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
2225 DBUG_ENTER("JOIN_CACHE::join_matching_records");
2226
2227 /* Return at once if there are no records in the join buffer */
2228 if (!records)
2229 DBUG_RETURN(NESTED_LOOP_OK);
2230
2231 /*
2232 When joining we read records from the join buffer back into record buffers.
2233 If matches for the last partial join record are found through a call to
2234 the sub_select function then this partial join record must be saved in the
2235 join buffer in order to be restored just before the sub_select call.
2236 */
2237 if (skip_last)
2238 put_record();
2239
2240 if (join_tab->use_quick == 2 && join_tab->select->quick)
2241 {
2242 /* A dynamic range access was used last. Clean up after it */
2243 delete join_tab->select->quick;
2244 join_tab->select->quick= 0;
2245 }
2246
2247 if ((rc= join_tab_execution_startup(join_tab)) < 0)
2248 goto finish2;
2249
2250 /* Prepare to retrieve all records of the joined table */
2251 if (unlikely((error= join_tab_scan->open())))
2252 {
2253 /*
2254 TODO: if we get here, we will assert in net_send_statement(). Add test
2255 coverage and fix.
2256 */
2257 goto finish;
2258 }
2259
2260 while (!(error= join_tab_scan->next()))
2261 {
2262 if (unlikely(join->thd->check_killed()))
2263 {
2264 /* The user has aborted the execution of the query */
2265 join->thd->send_kill_message();
2266 rc= NESTED_LOOP_KILLED;
2267 goto finish;
2268 }
2269
2270 if (join_tab->keep_current_rowid)
2271 join_tab->table->file->position(join_tab->table->record[0]);
2272
2273 /* Prepare to read matching candidates from the join buffer */
2274 if (prepare_look_for_matches(skip_last))
2275 continue;
2276 join_tab->jbuf_tracker->r_scans++;
2277
2278 uchar *rec_ptr;
2279 /* Read each possible candidate from the buffer and look for matches */
2280 while ((rec_ptr= get_next_candidate_for_match()))
2281 {
2282 join_tab->jbuf_tracker->r_rows++;
2283 /*
2284 If only the first match is needed, and, it has been already found for
2285 the next record read from the join buffer, then the record is skipped.
2286 Also those records that must be null complemented are not considered
2287 as candidates for matches.
2288 */
2289 if ((!check_only_first_match && !outer_join_first_inner) ||
2290 !skip_next_candidate_for_match(rec_ptr))
2291 {
2292 read_next_candidate_for_match(rec_ptr);
2293 rc= generate_full_extensions(rec_ptr);
2294 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2295 goto finish;
2296 }
2297 }
2298 }
2299
2300finish:
2301 if (error)
2302 rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
2303finish2:
2304 join_tab_scan->close();
2305 DBUG_RETURN(rc);
2306}
2307
2308
2309/*
2310 Set match flag for a record in join buffer if it has not been set yet
2311
2312 SYNOPSIS
2313 set_match_flag_if_none()
2314 first_inner the join table to which this flag is attached to
2315 rec_ptr pointer to the record in the join buffer
2316
2317 DESCRIPTION
2318 If the records of the table are accumulated in a join buffer the function
2319 sets the match flag for the record in the buffer that is referred to by
2320 the record from this cache positioned at 'rec_ptr'.
2321 The function also sets the match flag 'found' of the table first inner
2322 if it has not been set before.
2323
2324 NOTES
2325 The function assumes that the match flag for any record in any cache
2326 is placed in the first byte occupied by the record fields.
2327
2328 RETURN VALUE
2329 TRUE the match flag is set by this call for the first time
2330 FALSE the match flag has been set before this call
2331*/
2332
2333bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner,
2334 uchar *rec_ptr)
2335{
2336 if (!first_inner->cache)
2337 {
2338 /*
2339 Records of the first inner table to which the flag is attached to
2340 are not accumulated in a join buffer.
2341 */
2342 if (first_inner->found)
2343 return FALSE;
2344 else
2345 {
2346 first_inner->found= 1;
2347 return TRUE;
2348 }
2349 }
2350 JOIN_CACHE *cache= this;
2351 while (cache->join_tab != first_inner)
2352 {
2353 cache= cache->prev_cache;
2354 DBUG_ASSERT(cache);
2355 rec_ptr= cache->get_rec_ref(rec_ptr);
2356 }
2357 if ((Match_flag) rec_ptr[0] != MATCH_FOUND)
2358 {
2359 rec_ptr[0]= MATCH_FOUND;
2360 first_inner->found= 1;
2361 return TRUE;
2362 }
2363 return FALSE;
2364}
2365
2366
2367/*
2368 Generate all full extensions for a partial join record in the buffer
2369
2370 SYNOPSIS
2371 generate_full_extensions()
2372 rec_ptr pointer to the record from join buffer to generate extensions
2373
2374 DESCRIPTION
2375 The function first checks whether the current record of 'join_tab' matches
2376 the partial join record from join buffer located at 'rec_ptr'. If it is the
2377 case the function calls the join_tab->next_select method to generate
2378 all full extension for this partial join match.
2379
2380 RETURN VALUE
2381 return one of enum_nested_loop_state.
2382*/
2383
2384enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr)
2385{
2386 enum_nested_loop_state rc= NESTED_LOOP_OK;
2387 DBUG_ENTER("JOIN_CACHE::generate_full_extensions");
2388
2389 /*
2390 Check whether the extended partial join record meets
2391 the pushdown conditions.
2392 */
2393 if (check_match(rec_ptr))
2394 {
2395 int res= 0;
2396
2397 if (!join_tab->check_weed_out_table ||
2398 !(res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd)))
2399 {
2400 set_curr_rec_link(rec_ptr);
2401 rc= (join_tab->next_select)(join, join_tab+1, 0);
2402 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2403 {
2404 reset(TRUE);
2405 DBUG_RETURN(rc);
2406 }
2407 }
2408 if (res == -1)
2409 {
2410 rc= NESTED_LOOP_ERROR;
2411 DBUG_RETURN(rc);
2412 }
2413 }
2414 else if (unlikely(join->thd->is_error()))
2415 rc= NESTED_LOOP_ERROR;
2416 DBUG_RETURN(rc);
2417}
2418
2419
2420/*
2421 Check matching to a partial join record from the join buffer
2422
2423 SYNOPSIS
2424 check_match()
2425 rec_ptr pointer to the record from join buffer to check matching to
2426
2427 DESCRIPTION
2428 The function checks whether the current record of 'join_tab' matches
2429 the partial join record from join buffer located at 'rec_ptr'. If this is
2430 the case and 'join_tab' is the last inner table of a semi-join or an outer
2431 join the function turns on the match flag for the 'rec_ptr' record unless
2432 it has been already set.
2433
2434 NOTES
2435 Setting the match flag on can trigger re-evaluation of pushdown conditions
2436 for the record when join_tab is the last inner table of an outer join.
2437
2438 RETURN VALUE
2439 TRUE there is a match
2440 FALSE there is no match
2441 In this case the caller must also check thd->is_error() to see
2442 if there was a fatal error for the query.
2443*/
2444
2445inline bool JOIN_CACHE::check_match(uchar *rec_ptr)
2446{
2447 /* Check whether pushdown conditions are satisfied */
2448 DBUG_ENTER("JOIN_CACHE:check_match");
2449
2450 if (join_tab->select && join_tab->select->skip_record(join->thd) <= 0)
2451 DBUG_RETURN(FALSE);
2452
2453 join_tab->jbuf_tracker->r_rows_after_where++;
2454
2455 if (!join_tab->is_last_inner_table())
2456 DBUG_RETURN(TRUE);
2457
2458 /*
2459 This is the last inner table of an outer join,
2460 and maybe of other embedding outer joins, or
2461 this is the last inner table of a semi-join.
2462 */
2463 JOIN_TAB *first_inner= join_tab->get_first_inner_table();
2464 do
2465 {
2466 set_match_flag_if_none(first_inner, rec_ptr);
2467 if (first_inner->check_only_first_match() &&
2468 !join_tab->first_inner)
2469 DBUG_RETURN(TRUE);
2470 /*
2471 This is the first match for the outer table row.
2472 The function set_match_flag_if_none has turned the flag
2473 first_inner->found on. The pushdown predicates for
2474 inner tables must be re-evaluated with this flag on.
2475 Note that, if first_inner is the first inner table
2476 of a semi-join, but is not an inner table of an outer join
2477 such that 'not exists' optimization can be applied to it,
2478 the re-evaluation of the pushdown predicates is not needed.
2479 */
2480 for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++)
2481 {
2482 if (tab->select && tab->select->skip_record(join->thd) <= 0)
2483 DBUG_RETURN(FALSE);
2484 }
2485 }
2486 while ((first_inner= first_inner->first_upper) &&
2487 first_inner->last_inner == join_tab);
2488 DBUG_RETURN(TRUE);
2489}
2490
2491
2492/*
2493 Add null complements for unmatched outer records from join buffer
2494
2495 SYNOPSIS
2496 join_null_complements()
2497 skip_last do not add null complements for the last record
2498
2499 DESCRIPTION
2500 This function is called only for inner tables of outer joins.
2501 The function retrieves all rows from the join buffer and adds null
2502 complements for those of them that do not have matches for outer
2503 table records.
2504 If the 'join_tab' is the last inner table of the embedding outer
2505 join and the null complemented record satisfies the outer join
2506 condition then the the corresponding match flag is turned on
2507 unless it has been set earlier. This setting may trigger
2508 re-evaluation of pushdown conditions for the record.
2509
2510 NOTES
2511 The same implementation of the virtual method join_null_complements
2512 is used for BNL/BNLH/BKA/BKA join algorthm.
2513
2514 RETURN VALUE
2515 return one of enum_nested_loop_state.
2516*/
2517
2518enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last)
2519{
2520 ulonglong cnt;
2521 enum_nested_loop_state rc= NESTED_LOOP_OK;
2522 bool is_first_inner= join_tab == join_tab->first_unmatched;
2523 DBUG_ENTER("JOIN_CACHE::join_null_complements");
2524
2525 /* Return at once if there are no records in the join buffer */
2526 if (!records)
2527 DBUG_RETURN(NESTED_LOOP_OK);
2528
2529 cnt= records - (is_key_access() ? 0 : MY_TEST(skip_last));
2530
2531 /* This function may be called only for inner tables of outer joins */
2532 DBUG_ASSERT(join_tab->first_inner);
2533
2534 for ( ; cnt; cnt--)
2535 {
2536 if (unlikely(join->thd->check_killed()))
2537 {
2538 /* The user has aborted the execution of the query */
2539 join->thd->send_kill_message();
2540 rc= NESTED_LOOP_KILLED;
2541 goto finish;
2542 }
2543 /* Just skip the whole record if a match for it has been already found */
2544 if (!is_first_inner || !skip_if_matched())
2545 {
2546 get_record();
2547 /* The outer row is complemented by nulls for each inner table */
2548 restore_record(join_tab->table, s->default_values);
2549 mark_as_null_row(join_tab->table);
2550 rc= generate_full_extensions(get_curr_rec());
2551 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2552 goto finish;
2553 }
2554 }
2555
2556finish:
2557 DBUG_RETURN(rc);
2558}
2559
2560
2561/*
2562 Save data on the join algorithm employed by the join cache
2563
2564 SYNOPSIS
2565 save_explain_data()
2566 str string to add the comment on the employed join algorithm to
2567
2568 DESCRIPTION
2569 This function puts info about the type of the used join buffer (flat or
2570 incremental) and on the type of the the employed join algorithm (BNL,
2571 BNLH, BKA or BKAH) to the data structure
2572
2573 RETURN VALUE
2574 0 ok
2575 1 error
2576*/
2577
2578bool JOIN_CACHE::save_explain_data(EXPLAIN_BKA_TYPE *explain)
2579{
2580 explain->incremental= MY_TEST(prev_cache);
2581
2582 explain->join_buffer_size= get_join_buffer_size();
2583
2584 switch (get_join_alg()) {
2585 case BNL_JOIN_ALG:
2586 explain->join_alg= "BNL";
2587 break;
2588 case BNLH_JOIN_ALG:
2589 explain->join_alg= "BNLH";
2590 break;
2591 case BKA_JOIN_ALG:
2592 explain->join_alg= "BKA";
2593 break;
2594 case BKAH_JOIN_ALG:
2595 explain->join_alg= "BKAH";
2596 break;
2597 default:
2598 DBUG_ASSERT(0);
2599 }
2600 return 0;
2601}
2602
2603/**
2604 get thread handle.
2605*/
2606
2607THD *JOIN_CACHE::thd()
2608{
2609 return join->thd;
2610}
2611
2612
2613static bool add_mrr_explain_info(String *str, uint mrr_mode, handler *file)
2614{
2615 char mrr_str_buf[128]={0};
2616 int len;
2617 len= file->multi_range_read_explain_info(mrr_mode, mrr_str_buf,
2618 sizeof(mrr_str_buf));
2619 if (len > 0)
2620 {
2621 if (str->length())
2622 {
2623 if (str->append(STRING_WITH_LEN("; ")))
2624 return 1;
2625 }
2626 if (str->append(mrr_str_buf, len))
2627 return 1;
2628 }
2629 return 0;
2630}
2631
2632
2633bool JOIN_CACHE_BKA::save_explain_data(EXPLAIN_BKA_TYPE *explain)
2634{
2635 if (JOIN_CACHE::save_explain_data(explain))
2636 return 1;
2637 return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file);
2638}
2639
2640
2641bool JOIN_CACHE_BKAH::save_explain_data(EXPLAIN_BKA_TYPE *explain)
2642{
2643 if (JOIN_CACHE::save_explain_data(explain))
2644 return 1;
2645 return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file);
2646}
2647
2648
2649/*
2650 Initialize a hashed join cache
2651
2652 SYNOPSIS
2653 init()
2654 for_explain join buffer is initialized for explain only
2655
2656 DESCRIPTION
2657 The function initializes the cache structure with a hash table in it.
2658 The hash table will be used to store key values for the records from
2659 the join buffer.
2660 The function allocates memory for the join buffer and for descriptors of
2661 the record fields stored in the buffer.
2662 The function also initializes a hash table for record keys within the join
2663 buffer space.
2664
2665 NOTES VALUE
2666 The function is supposed to be called by the init methods of the classes
2667 derived from JOIN_CACHE_HASHED.
2668
2669 RETURN VALUE
2670 0 initialization with buffer allocations has been succeeded
2671 1 otherwise
2672*/
2673
2674int JOIN_CACHE_HASHED::init(bool for_explain)
2675{
2676 int rc= 0;
2677 TABLE_REF *ref= &join_tab->ref;
2678
2679 DBUG_ENTER("JOIN_CACHE_HASHED::init");
2680
2681 hash_table= 0;
2682 key_entries= 0;
2683
2684 key_length= ref->key_length;
2685
2686 if ((rc= JOIN_CACHE::init(for_explain)) || for_explain)
2687 DBUG_RETURN (rc);
2688
2689 if (!(key_buff= (uchar*) join->thd->alloc(key_length)))
2690 DBUG_RETURN(1);
2691
2692 /* Take into account a reference to the next record in the key chain */
2693 pack_length+= get_size_of_rec_offset();
2694 pack_length_with_blob_ptrs+= get_size_of_rec_offset();
2695
2696 ref_key_info= join_tab->get_keyinfo_by_key_no(join_tab->ref.key);
2697 ref_used_key_parts= join_tab->ref.key_parts;
2698
2699 hash_func= &JOIN_CACHE_HASHED::get_hash_idx_simple;
2700 hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_simple;
2701
2702 KEY_PART_INFO *key_part= ref_key_info->key_part;
2703 KEY_PART_INFO *key_part_end= key_part+ref_used_key_parts;
2704 for ( ; key_part < key_part_end; key_part++)
2705 {
2706 if (!key_part->field->eq_cmp_as_binary())
2707 {
2708 hash_func= &JOIN_CACHE_HASHED::get_hash_idx_complex;
2709 hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_complex;
2710 break;
2711 }
2712 }
2713
2714 init_hash_table();
2715
2716 rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
2717 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
2718
2719 data_fields_offset= 0;
2720 if (use_emb_key)
2721 {
2722 CACHE_FIELD *copy= field_descr;
2723 CACHE_FIELD *copy_end= copy+flag_fields;
2724 for ( ; copy < copy_end; copy++)
2725 data_fields_offset+= copy->length;
2726 }
2727
2728 DBUG_RETURN(0);
2729}
2730
2731
2732/*
2733 Initialize the hash table of a hashed join cache
2734
2735 SYNOPSIS
2736 init_hash_table()
2737
2738 DESCRIPTION
2739 The function estimates the number of hash table entries in the hash
2740 table to be used and initializes this hash table within the join buffer
2741 space.
2742
2743 RETURN VALUE
2744 Currently the function always returns 0;
2745*/
2746
2747int JOIN_CACHE_HASHED::init_hash_table()
2748{
2749 hash_table= 0;
2750 key_entries= 0;
2751
2752 /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */
2753 uint max_size_of_key_ofs= MY_MAX(2, get_size_of_rec_offset());
2754 for (size_of_key_ofs= 2;
2755 size_of_key_ofs <= max_size_of_key_ofs;
2756 size_of_key_ofs+= 2)
2757 {
2758 key_entry_length= get_size_of_rec_offset() + // key chain header
2759 size_of_key_ofs + // reference to the next key
2760 (use_emb_key ? get_size_of_rec_offset() : key_length);
2761
2762 size_t space_per_rec= avg_record_length +
2763 avg_aux_buffer_incr +
2764 key_entry_length+size_of_key_ofs;
2765 size_t n= buff_size / space_per_rec;
2766
2767 /*
2768 TODO: Make a better estimate for this upper bound of
2769 the number of records in in the join buffer.
2770 */
2771 size_t max_n= buff_size / (pack_length-length+
2772 key_entry_length+size_of_key_ofs);
2773
2774 hash_entries= (uint) (n / 0.7);
2775 set_if_bigger(hash_entries, 1);
2776
2777 if (offset_size((uint)(max_n*key_entry_length)) <=
2778 size_of_key_ofs)
2779 break;
2780 }
2781
2782 /* Initialize the hash table */
2783 hash_table= buff + (buff_size-hash_entries*size_of_key_ofs);
2784 cleanup_hash_table();
2785 curr_key_entry= hash_table;
2786
2787 return 0;
2788}
2789
2790
2791/*
2792 Reallocate the join buffer of a hashed join cache
2793
2794 SYNOPSIS
2795 realloc_buffer()
2796
2797 DESCRITION
2798 The function reallocates the join buffer of the hashed join cache.
2799 After this it initializes a hash table within the buffer space and
2800 resets the join cache for writing.
2801
2802 NOTES
2803 The function assumes that buff_size contains the new value for the join
2804 buffer size.
2805
2806 RETURN VALUE
2807 0 if the buffer has been successfully reallocated
2808 1 otherwise
2809*/
2810
2811int JOIN_CACHE_HASHED::realloc_buffer()
2812{
2813 int rc;
2814 free();
2815 rc= MY_TEST(!(buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC))));
2816 init_hash_table();
2817 reset(TRUE);
2818 return rc;
2819}
2820
2821
2822/*
2823 Get maximum size of the additional space per record used for record keys
2824
2825 SYNOPSYS
2826 get_max_key_addon_space_per_record()
2827
2828 DESCRIPTION
2829 The function returns the size of the space occupied by one key entry
2830 and one hash table entry.
2831
2832 RETURN VALUE
2833 maximum size of the additional space per record that is used to store
2834 record keys in the hash table
2835*/
2836
2837uint JOIN_CACHE_HASHED::get_max_key_addon_space_per_record()
2838{
2839 ulong len;
2840 TABLE_REF *ref= &join_tab->ref;
2841 /*
2842 The total number of hash entries in the hash tables is bounded by
2843 ceiling(N/0.7) where N is the maximum number of records in the buffer.
2844 That's why the multiplier 2 is used in the formula below.
2845 */
2846 len= (use_emb_key ? get_size_of_rec_offset() : ref->key_length) +
2847 size_of_rec_ofs + // size of the key chain header
2848 size_of_rec_ofs + // >= size of the reference to the next key
2849 2*size_of_rec_ofs; // >= 2*( size of hash table entry)
2850 return len;
2851}
2852
2853
2854/*
2855 Reset the buffer of a hashed join cache for reading/writing
2856
2857 SYNOPSIS
2858 reset()
2859 for_writing if it's TRUE the function reset the buffer for writing
2860
2861 DESCRIPTION
2862 This implementation of the virtual function reset() resets the join buffer
2863 of the JOIN_CACHE_HASHED class for reading or writing.
2864 Additionally to what the default implementation does this function
2865 cleans up the hash table allocated within the buffer.
2866
2867 RETURN VALUE
2868 none
2869*/
2870
2871void JOIN_CACHE_HASHED::reset(bool for_writing)
2872{
2873 this->JOIN_CACHE::reset(for_writing);
2874 if (for_writing && hash_table)
2875 cleanup_hash_table();
2876 curr_key_entry= hash_table;
2877}
2878
2879
2880/*
2881 Add a record into the buffer of a hashed join cache
2882
2883 SYNOPSIS
2884 put_record()
2885
2886 DESCRIPTION
2887 This implementation of the virtual function put_record writes the next
2888 matching record into the join buffer of the JOIN_CACHE_HASHED class.
2889 Additionally to what the default implementation does this function
2890 performs the following.
2891 It extracts from the record the key value used in lookups for matching
2892 records and searches for this key in the hash tables from the join cache.
2893 If it finds the key in the hash table it joins the record to the chain
2894 of records with this key. If the key is not found in the hash table the
2895 key is placed into it and a chain containing only the newly added record
2896 is attached to the key entry. The key value is either placed in the hash
2897 element added for the key or, if the use_emb_key flag is set, remains in
2898 the record from the partial join.
2899 If the match flag field of a record contains MATCH_IMPOSSIBLE the key is
2900 not created for this record.
2901
2902 RETURN VALUE
2903 TRUE if it has been decided that it should be the last record
2904 in the join buffer,
2905 FALSE otherwise
2906*/
2907
2908bool JOIN_CACHE_HASHED::put_record()
2909{
2910 bool is_full;
2911 uchar *key;
2912 uint key_len= key_length;
2913 uchar *key_ref_ptr;
2914 uchar *link= 0;
2915 TABLE_REF *ref= &join_tab->ref;
2916 uchar *next_ref_ptr= pos;
2917
2918 pos+= get_size_of_rec_offset();
2919 /* Write the record into the join buffer */
2920 if (prev_cache)
2921 link= prev_cache->get_curr_rec_link();
2922 write_record_data(link, &is_full);
2923
2924 if (last_written_is_null_compl)
2925 return is_full;
2926
2927 if (use_emb_key)
2928 key= get_curr_emb_key();
2929 else
2930 {
2931 /* Build the key over the fields read into the record buffers */
2932 cp_buffer_from_ref(join->thd, join_tab->table, ref);
2933 key= ref->key_buff;
2934 }
2935
2936 /* Look for the key in the hash table */
2937 if (key_search(key, key_len, &key_ref_ptr))
2938 {
2939 uchar *last_next_ref_ptr;
2940 /*
2941 The key is found in the hash table.
2942 Add the record to the circular list of the records attached to this key.
2943 Below 'rec' is the record to be added into the record chain for the found
2944 key, 'key_ref' points to a flatten representation of the st_key_entry
2945 structure that contains the key and the head of the record chain.
2946 */
2947 last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset());
2948 /* rec->next_rec= key_entry->last_rec->next_rec */
2949 memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset());
2950 /* key_entry->last_rec->next_rec= rec */
2951 store_next_rec_ref(last_next_ref_ptr, next_ref_ptr);
2952 /* key_entry->last_rec= rec */
2953 store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr);
2954 }
2955 else
2956 {
2957 /*
2958 The key is not found in the hash table.
2959 Put the key into the join buffer linking it with the keys for the
2960 corresponding hash entry. Create a circular list with one element
2961 referencing the record and attach the list to the key in the buffer.
2962 */
2963 uchar *cp= last_key_entry;
2964 cp-= get_size_of_rec_offset()+get_size_of_key_offset();
2965 store_next_key_ref(key_ref_ptr, cp);
2966 store_null_key_ref(cp);
2967 store_next_rec_ref(next_ref_ptr, next_ref_ptr);
2968 store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr);
2969 if (use_emb_key)
2970 {
2971 cp-= get_size_of_rec_offset();
2972 store_emb_key_ref(cp, key);
2973 }
2974 else
2975 {
2976 cp-= key_len;
2977 memcpy(cp, key, key_len);
2978 }
2979 last_key_entry= cp;
2980 DBUG_ASSERT(last_key_entry >= end_pos);
2981 /* Increment the counter of key_entries in the hash table */
2982 key_entries++;
2983 }
2984 return is_full;
2985}
2986
2987
2988/*
2989 Read the next record from the buffer of a hashed join cache
2990
2991 SYNOPSIS
2992 get_record()
2993
2994 DESCRIPTION
2995 Additionally to what the default implementation of the virtual
2996 function get_record does this implementation skips the link element
2997 used to connect the records with the same key into a chain.
2998
2999 RETURN VALUE
3000 TRUE there are no more records to read from the join buffer
3001 FALSE otherwise
3002*/
3003
3004bool JOIN_CACHE_HASHED::get_record()
3005{
3006 pos+= get_size_of_rec_offset();
3007 return this->JOIN_CACHE::get_record();
3008}
3009
3010
3011/*
3012 Skip record from a hashed join buffer if its match flag is set to MATCH_FOUND
3013
3014 SYNOPSIS
3015 skip_if_matched()
3016
3017 DESCRIPTION
3018 This implementation of the virtual function skip_if_matched does
3019 the same as the default implementation does, but it takes into account
3020 the link element used to connect the records with the same key into a chain.
3021
3022 RETURN VALUE
3023 TRUE the match flag is MATCH_FOUND and the record has been skipped
3024 FALSE otherwise
3025*/
3026
3027bool JOIN_CACHE_HASHED::skip_if_matched()
3028{
3029 uchar *save_pos= pos;
3030 pos+= get_size_of_rec_offset();
3031 if (!this->JOIN_CACHE::skip_if_matched())
3032 {
3033 pos= save_pos;
3034 return FALSE;
3035 }
3036 return TRUE;
3037}
3038
3039
3040/*
3041 Skip record from a hashed join buffer if its match flag dictates to do so
3042
3043 SYNOPSIS
3044 skip_if_uneeded_match()
3045
3046 DESCRIPTION
3047 This implementation of the virtual function skip_if_not_needed_match does
3048 the same as the default implementation does, but it takes into account
3049 the link element used to connect the records with the same key into a chain.
3050
3051 RETURN VALUE
3052 TRUE the match flag dictates to skip the record
3053 FALSE the match flag is off
3054*/
3055
3056bool JOIN_CACHE_HASHED::skip_if_not_needed_match()
3057{
3058 uchar *save_pos= pos;
3059 pos+= get_size_of_rec_offset();
3060 if (!this->JOIN_CACHE::skip_if_not_needed_match())
3061 {
3062 pos= save_pos;
3063 return FALSE;
3064 }
3065 return TRUE;
3066}
3067
3068
3069/*
3070 Search for a key in the hash table of the join buffer
3071
3072 SYNOPSIS
3073 key_search()
3074 key pointer to the key value
3075 key_len key value length
3076 key_ref_ptr OUT position of the reference to the next key from
3077 the hash element for the found key , or
3078 a position where the reference to the the hash
3079 element for the key is to be added in the
3080 case when the key has not been found
3081
3082 DESCRIPTION
3083 The function looks for a key in the hash table of the join buffer.
3084 If the key is found the functionreturns the position of the reference
3085 to the next key from to the hash element for the given key.
3086 Otherwise the function returns the position where the reference to the
3087 newly created hash element for the given key is to be added.
3088
3089 RETURN VALUE
3090 TRUE the key is found in the hash table
3091 FALSE otherwise
3092*/
3093
3094bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len,
3095 uchar **key_ref_ptr)
3096{
3097 bool is_found= FALSE;
3098 uint idx= (this->*hash_func)(key, key_length);
3099 uchar *ref_ptr= hash_table+size_of_key_ofs*idx;
3100 while (!is_null_key_ref(ref_ptr))
3101 {
3102 uchar *next_key;
3103 ref_ptr= get_next_key_ref(ref_ptr);
3104 next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) :
3105 ref_ptr-key_length;
3106
3107 if ((this->*hash_cmp_func)(next_key, key, key_len))
3108 {
3109 is_found= TRUE;
3110 break;
3111 }
3112 }
3113 *key_ref_ptr= ref_ptr;
3114 return is_found;
3115}
3116
3117
3118/*
3119 Hash function that considers a key in the hash table as byte array
3120
3121 SYNOPSIS
3122 get_hash_idx_simple()
3123 key pointer to the key value
3124 key_len key value length
3125
3126 DESCRIPTION
3127 The function calculates an index of the hash entry in the hash table
3128 of the join buffer for the given key. It considers the key just as
3129 a sequence of bytes of the length key_len.
3130
3131 RETURN VALUE
3132 the calculated index of the hash entry for the given key
3133*/
3134
3135inline
3136uint JOIN_CACHE_HASHED::get_hash_idx_simple(uchar* key, uint key_len)
3137{
3138 ulong nr= 1;
3139 ulong nr2= 4;
3140 uchar *pos= key;
3141 uchar *end= key+key_len;
3142 for (; pos < end ; pos++)
3143 {
3144 nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8);
3145 nr2+= 3;
3146 }
3147 return nr % hash_entries;
3148}
3149
3150
3151/*
3152 Hash function that takes into account collations of the components of the key
3153
3154 SYNOPSIS
3155 get_hash_idx_complex()
3156 key pointer to the key value
3157 key_len key value length
3158
3159 DESCRIPTION
3160 The function calculates an index of the hash entry in the hash table
3161 of the join buffer for the given key. It takes into account that the
3162 components of the key may be of a varchar type with different collations.
3163 The function guarantees that the same hash value for any two equal
3164 keys that may differ as byte sequences.
3165 The function takes the info about the components of the key, their
3166 types and used collations from the class member ref_key_info containing
3167 a pointer to the descriptor of the index that can be used for the join
3168 operation.
3169
3170 RETURN VALUE
3171 the calculated index of the hash entry for the given key
3172*/
3173
3174inline
3175uint JOIN_CACHE_HASHED::get_hash_idx_complex(uchar *key, uint key_len)
3176{
3177 return
3178 (uint) (key_hashnr(ref_key_info, ref_used_key_parts, key) % hash_entries);
3179}
3180
3181
3182/*
3183 Compare two key entries in the hash table as sequence of bytes
3184
3185 SYNOPSIS
3186 equal_keys_simple()
3187 key1 pointer to the first key entry
3188 key2 pointer to the second key entry
3189 key_len the length of the key values
3190
3191 DESCRIPTION
3192 The function compares two key entries in the hash table key1 and key2
3193 as two sequences bytes of the length key_len
3194
3195 RETURN VALUE
3196 TRUE key1 coincides with key2
3197 FALSE otherwise
3198*/
3199
3200inline
3201bool JOIN_CACHE_HASHED::equal_keys_simple(uchar *key1, uchar *key2,
3202 uint key_len)
3203{
3204 return memcmp(key1, key2, key_len) == 0;
3205}
3206
3207
3208/*
3209 Compare two key entries taking into account the used collation
3210
3211 SYNOPSIS
3212 equal_keys_complex()
3213 key1 pointer to the first key entry
3214 key2 pointer to the second key entry
3215 key_len the length of the key values
3216
3217 DESCRIPTION
3218 The function checks whether two key entries in the hash table
3219 key1 and key2 are equal as, possibly, compound keys of a certain
3220 structure whose components may be of a varchar type and may
3221 employ different collations.
3222 The descriptor of the key structure is taken from the class
3223 member ref_key_info.
3224
3225 RETURN VALUE
3226 TRUE key1 is equal tokey2
3227 FALSE otherwise
3228*/
3229
3230inline
3231bool JOIN_CACHE_HASHED::equal_keys_complex(uchar *key1, uchar *key2,
3232 uint key_len)
3233{
3234 return key_buf_cmp(ref_key_info, ref_used_key_parts, key1, key2) == 0;
3235}
3236
3237
3238/*
3239 Clean up the hash table of the join buffer
3240
3241 SYNOPSIS
3242 cleanup_hash_table()
3243 key pointer to the key value
3244 key_len key value length
3245
3246 DESCRIPTION
3247 The function cleans up the hash table in the join buffer removing all
3248 hash elements from the table.
3249
3250 RETURN VALUE
3251 none
3252*/
3253
3254void JOIN_CACHE_HASHED:: cleanup_hash_table()
3255{
3256 last_key_entry= hash_table;
3257 bzero(hash_table, (buff+buff_size)-hash_table);
3258 key_entries= 0;
3259}
3260
3261
3262/*
3263 Check whether all records in a key chain have their match flags set on
3264
3265 SYNOPSIS
3266 check_all_match_flags_for_key()
3267 key_chain_ptr
3268
3269 DESCRIPTION
3270 This function retrieves records in the given circular chain and checks
3271 whether their match flags are set on. The parameter key_chain_ptr shall
3272 point to the position in the join buffer storing the reference to the
3273 last element of this chain.
3274
3275 RETURN VALUE
3276 TRUE if each retrieved record has its match flag set to MATCH_FOUND
3277 FALSE otherwise
3278*/
3279
3280bool JOIN_CACHE_HASHED::check_all_match_flags_for_key(uchar *key_chain_ptr)
3281{
3282 uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
3283 uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3284 do
3285 {
3286 next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
3287 uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
3288 if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND)
3289 return FALSE;
3290 }
3291 while (next_rec_ref_ptr != last_rec_ref_ptr);
3292 return TRUE;
3293}
3294
3295
3296/*
3297 Get the next key built for the records from the buffer of a hashed join cache
3298
3299 SYNOPSIS
3300 get_next_key()
3301 key pointer to the buffer where the key value is to be placed
3302
3303 DESCRIPTION
3304 The function reads the next key value stored in the hash table of the
3305 join buffer. Depending on the value of the use_emb_key flag of the
3306 join cache the value is read either from the table itself or from
3307 the record field where it occurs.
3308
3309 RETURN VALUE
3310 length of the key value - if the starting value of 'cur_key_entry' refers
3311 to the position after that referred by the the value of 'last_key_entry',
3312 0 - otherwise.
3313*/
3314
3315uint JOIN_CACHE_HASHED::get_next_key(uchar ** key)
3316{
3317 if (curr_key_entry == last_key_entry)
3318 return 0;
3319
3320 curr_key_entry-= key_entry_length;
3321
3322 *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry;
3323
3324 DBUG_ASSERT(*key >= buff && *key < hash_table);
3325
3326 return key_length;
3327}
3328
3329
3330/*
3331 Initiate an iteration process over records in the joined table
3332
3333 SYNOPSIS
3334 open()
3335
3336 DESCRIPTION
3337 The function initiates the process of iteration over records from the
3338 joined table recurrently performed by the BNL/BKLH join algorithm.
3339
3340 RETURN VALUE
3341 0 the initiation is a success
3342 error code otherwise
3343*/
3344
3345int JOIN_TAB_SCAN::open()
3346{
3347 save_or_restore_used_tabs(join_tab, FALSE);
3348 is_first_record= TRUE;
3349 join_tab->tracker->r_scans++;
3350 return join_init_read_record(join_tab);
3351}
3352
3353
3354/*
3355 Read the next record that can match while scanning the joined table
3356
3357 SYNOPSIS
3358 next()
3359
3360 DESCRIPTION
3361 The function reads the next record from the joined table that can
3362 match some records in the buffer of the join cache 'cache'. To do
3363 this the function calls the function that scans table records and
3364 looks for the next one that meets the condition pushed to the
3365 joined table join_tab.
3366
3367 NOTES
3368 The function catches the signal that kills the query.
3369
3370 RETURN VALUE
3371 0 the next record exists and has been successfully read
3372 error code otherwise
3373*/
3374
3375int JOIN_TAB_SCAN::next()
3376{
3377 int err= 0;
3378 int skip_rc;
3379 READ_RECORD *info= &join_tab->read_record;
3380 SQL_SELECT *select= join_tab->cache_select;
3381 THD *thd= join->thd;
3382
3383 if (is_first_record)
3384 is_first_record= FALSE;
3385 else
3386 err= info->read_record();
3387
3388 if (!err)
3389 {
3390 join_tab->tracker->r_rows++;
3391 }
3392
3393 while (!err && select && (skip_rc= select->skip_record(thd)) <= 0)
3394 {
3395 if (unlikely(thd->check_killed()) || skip_rc < 0)
3396 return 1;
3397 /*
3398 Move to the next record if the last retrieved record does not
3399 meet the condition pushed to the table join_tab.
3400 */
3401 err= info->read_record();
3402 if (!err)
3403 {
3404 join_tab->tracker->r_rows++;
3405 }
3406 }
3407
3408 if (!err)
3409 join_tab->tracker->r_rows_after_where++;
3410 return err;
3411}
3412
3413
3414/*
3415 Walk back in join order from join_tab until we encounter a join tab with
3416 tab->cache!=NULL, and save/restore tab->table->status along the way.
3417
3418 @param save TRUE save
3419 FALSE restore
3420*/
3421
3422static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save)
3423{
3424 JOIN_TAB *first= join_tab->bush_root_tab?
3425 join_tab->bush_root_tab->bush_children->start :
3426 join_tab->join->join_tab + join_tab->join->const_tables;
3427
3428 for (JOIN_TAB *tab= join_tab-1; tab != first && !tab->cache; tab--)
3429 {
3430 if (tab->bush_children)
3431 {
3432 for (JOIN_TAB *child= tab->bush_children->start;
3433 child != tab->bush_children->end;
3434 child++)
3435 {
3436 if (save)
3437 child->table->status= child->status;
3438 else
3439 {
3440 tab->status= tab->table->status;
3441 tab->table->status= 0;
3442 }
3443 }
3444 }
3445
3446 if (save)
3447 tab->table->status= tab->status;
3448 else
3449 {
3450 tab->status= tab->table->status;
3451 tab->table->status= 0;
3452 }
3453 }
3454}
3455
3456
3457/*
3458 Perform finalizing actions for a scan over the table records
3459
3460 SYNOPSIS
3461 close()
3462
3463 DESCRIPTION
3464 The function performs the necessary restoring actions after
3465 the table scan over the joined table has been finished.
3466
3467 RETURN VALUE
3468 none
3469*/
3470
3471void JOIN_TAB_SCAN::close()
3472{
3473 save_or_restore_used_tabs(join_tab, TRUE);
3474}
3475
3476
3477/*
3478 Prepare to iterate over the BNL join cache buffer to look for matches
3479
3480 SYNOPSIS
3481 prepare_look_for_matches()
3482 skip_last <-> ignore the last record in the buffer
3483
3484 DESCRIPTION
3485 The function prepares the join cache for an iteration over the
3486 records in the join buffer. The iteration is performed when looking
3487 for matches for the record from the joined table join_tab that
3488 has been placed into the record buffer of the joined table.
3489 If the value of the parameter skip_last is TRUE then the last
3490 record from the join buffer is ignored.
3491 The function initializes the counter of the records that have been
3492 not iterated over yet.
3493
3494 RETURN VALUE
3495 TRUE there are no records in the buffer to iterate over
3496 FALSE otherwise
3497*/
3498
3499bool JOIN_CACHE_BNL::prepare_look_for_matches(bool skip_last)
3500{
3501 if (!records)
3502 return TRUE;
3503 reset(FALSE);
3504 rem_records= (uint)records - MY_TEST(skip_last);
3505 return rem_records == 0;
3506}
3507
3508
3509/*
3510 Get next record from the BNL join cache buffer when looking for matches
3511
3512 SYNOPSIS
3513 get_next_candidate_for_match
3514
3515 DESCRIPTION
3516 This method is used for iterations over the records from the join
3517 cache buffer when looking for matches for records from join_tab.
3518 The methods performs the necessary preparations to read the next record
3519 from the join buffer into the record buffer by the method
3520 read_next_candidate_for_match, or, to skip the next record from the join
3521 buffer by the method skip_recurrent_candidate_for_match.
3522 This implementation of the virtual method get_next_candidate_for_match
3523 just decrements the counter of the records that are to be iterated over
3524 and returns the current value of the cursor 'pos' as the position of
3525 the record to be processed.
3526
3527 RETURN VALUE
3528 pointer to the position right after the prefix of the current record
3529 in the join buffer if the there is another record to iterate over,
3530 0 - otherwise.
3531*/
3532
3533uchar *JOIN_CACHE_BNL::get_next_candidate_for_match()
3534{
3535 if (!rem_records)
3536 return 0;
3537 rem_records--;
3538 return pos+base_prefix_length;
3539}
3540
3541
3542/*
3543 Check whether the matching record from the BNL cache is to be skipped
3544
3545 SYNOPSIS
3546 skip_next_candidate_for_match
3547 rec_ptr pointer to the position in the join buffer right after the prefix
3548 of the current record
3549
3550 DESCRIPTION
3551 This implementation of the virtual function just calls the
3552 method skip_if_not_needed_match to check whether the record referenced by
3553 ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the
3554 first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and
3555 join_tab is the first inner table of an outer join.
3556 If so, the function just skips this record setting the value of the
3557 cursor 'pos' to the position right after it.
3558
3559 RETURN VALUE
3560 TRUE the record referenced by rec_ptr has been skipped
3561 FALSE otherwise
3562*/
3563
3564bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr)
3565{
3566 pos= rec_ptr-base_prefix_length;
3567 return skip_if_not_needed_match();
3568}
3569
3570
3571/*
3572 Read next record from the BNL join cache buffer when looking for matches
3573
3574 SYNOPSIS
3575 read_next_candidate_for_match
3576 rec_ptr pointer to the position in the join buffer right after the prefix
3577 the current record.
3578
3579 DESCRIPTION
3580 This implementation of the virtual method read_next_candidate_for_match
3581 calls the method get_record to read the record referenced by rec_ptr from
3582 the join buffer into the record buffer. If this record refers to the
3583 fields in the other join buffers the call of get_record ensures that
3584 these fields are read into the corresponding record buffers as well.
3585 This function is supposed to be called after a successful call of
3586 the method get_next_candidate_for_match.
3587
3588 RETURN VALUE
3589 none
3590*/
3591
3592void JOIN_CACHE_BNL::read_next_candidate_for_match(uchar *rec_ptr)
3593{
3594 pos= rec_ptr-base_prefix_length;
3595 get_record();
3596}
3597
3598
3599/*
3600 Initialize the BNL join cache
3601
3602 SYNOPSIS
3603 init
3604 for_explain join buffer is initialized for explain only
3605
3606 DESCRIPTION
3607 The function initializes the cache structure. It is supposed to be called
3608 right after a constructor for the JOIN_CACHE_BNL.
3609
3610 NOTES
3611 The function first constructs a companion object of the type JOIN_TAB_SCAN,
3612 then it calls the init method of the parent class.
3613
3614 RETURN VALUE
3615 0 initialization with buffer allocations has been succeeded
3616 1 otherwise
3617*/
3618
3619int JOIN_CACHE_BNL::init(bool for_explain)
3620{
3621 DBUG_ENTER("JOIN_CACHE_BNL::init");
3622
3623 if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab)))
3624 DBUG_RETURN(1);
3625
3626 DBUG_RETURN(JOIN_CACHE::init(for_explain));
3627}
3628
3629
3630/*
3631 Get the chain of records from buffer matching the current candidate for join
3632
3633 SYNOPSIS
3634 get_matching_chain_by_join_key()
3635
3636 DESCRIPTION
3637 This function first build a join key for the record of join_tab that
3638 currently is in the join buffer for this table. Then it looks for
3639 the key entry with this key in the hash table of the join cache.
3640 If such a key entry is found the function returns the pointer to
3641 the head of the chain of records in the join_buffer that match this
3642 key.
3643
3644 RETURN VALUE
3645 The pointer to the corresponding circular list of records if
3646 the key entry with the join key is found, 0 - otherwise.
3647*/
3648
3649uchar *JOIN_CACHE_BNLH::get_matching_chain_by_join_key()
3650{
3651 uchar *key_ref_ptr;
3652 TABLE *table= join_tab->table;
3653 TABLE_REF *ref= &join_tab->ref;
3654 KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key);
3655 /* Build the join key value out of the record in the record buffer */
3656 key_copy(key_buff, table->record[0], keyinfo, key_length, TRUE);
3657 /* Look for this key in the join buffer */
3658 if (!key_search(key_buff, key_length, &key_ref_ptr))
3659 return 0;
3660 return key_ref_ptr+get_size_of_key_offset();
3661}
3662
3663
3664/*
3665 Prepare to iterate over the BNLH join cache buffer to look for matches
3666
3667 SYNOPSIS
3668 prepare_look_for_matches()
3669 skip_last <-> ignore the last record in the buffer
3670
3671 DESCRIPTION
3672 The function prepares the join cache for an iteration over the
3673 records in the join buffer. The iteration is performed when looking
3674 for matches for the record from the joined table join_tab that
3675 has been placed into the record buffer of the joined table.
3676 If the value of the parameter skip_last is TRUE then the last
3677 record from the join buffer is ignored.
3678 The function builds the hashed key from the join fields of join_tab
3679 and uses this key to look in the hash table of the join cache for
3680 the chain of matching records in in the join buffer. If it finds
3681 such a chain it sets the member last_rec_ref_ptr to point to the
3682 last link of the chain while setting the member next_rec_ref_po 0.
3683
3684 RETURN VALUE
3685 TRUE there are no matching records in the buffer to iterate over
3686 FALSE otherwise
3687*/
3688
3689bool JOIN_CACHE_BNLH::prepare_look_for_matches(bool skip_last)
3690{
3691 uchar *curr_matching_chain;
3692 last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0;
3693 if (!(curr_matching_chain= get_matching_chain_by_join_key()))
3694 return 1;
3695 last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain);
3696 return 0;
3697}
3698
3699
3700/*
3701 Get next record from the BNLH join cache buffer when looking for matches
3702
3703 SYNOPSIS
3704 get_next_candidate_for_match
3705
3706 DESCRIPTION
3707 This method is used for iterations over the records from the join
3708 cache buffer when looking for matches for records from join_tab.
3709 The methods performs the necessary preparations to read the next record
3710 from the join buffer into the record buffer by the method
3711 read_next_candidate_for_match, or, to skip the next record from the join
3712 buffer by the method skip_next_candidate_for_match.
3713 This implementation of the virtual method moves to the next record
3714 in the chain of all records from the join buffer that are to be
3715 equi-joined with the current record from join_tab.
3716
3717 RETURN VALUE
3718 pointer to the beginning of the record fields in the join buffer
3719 if the there is another record to iterate over, 0 - otherwise.
3720*/
3721
3722uchar *JOIN_CACHE_BNLH::get_next_candidate_for_match()
3723{
3724 if (next_matching_rec_ref_ptr == last_matching_rec_ref_ptr)
3725 return 0;
3726 next_matching_rec_ref_ptr= get_next_rec_ref(next_matching_rec_ref_ptr ?
3727 next_matching_rec_ref_ptr :
3728 last_matching_rec_ref_ptr);
3729 return next_matching_rec_ref_ptr+rec_fields_offset;
3730}
3731
3732
3733/*
3734 Check whether the matching record from the BNLH cache is to be skipped
3735
3736 SYNOPSIS
3737 skip_next_candidate_for_match
3738 rec_ptr pointer to the position in the join buffer right after
3739 the previous record
3740
3741 DESCRIPTION
3742 This implementation of the virtual function just calls the
3743 method get_match_flag_by_pos to check whether the record referenced
3744 by ref_ptr has its match flag set to MATCH_FOUND.
3745
3746 RETURN VALUE
3747 TRUE the record referenced by rec_ptr has its match flag set to
3748 MATCH_FOUND
3749 FALSE otherwise
3750*/
3751
3752bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr)
3753{
3754 return join_tab->check_only_first_match() &&
3755 (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
3756}
3757
3758
3759/*
3760 Read next record from the BNLH join cache buffer when looking for matches
3761
3762 SYNOPSIS
3763 read_next_candidate_for_match
3764 rec_ptr pointer to the position in the join buffer right after
3765 the previous record
3766
3767 DESCRIPTION
3768 This implementation of the virtual method read_next_candidate_for_match
3769 calls the method get_record_by_pos to read the record referenced by rec_ptr
3770 from the join buffer into the record buffer. If this record refers to
3771 fields in the other join buffers the call of get_record_by_po ensures that
3772 these fields are read into the corresponding record buffers as well.
3773 This function is supposed to be called after a successful call of
3774 the method get_next_candidate_for_match.
3775
3776 RETURN VALUE
3777 none
3778*/
3779
3780void JOIN_CACHE_BNLH::read_next_candidate_for_match(uchar *rec_ptr)
3781{
3782 get_record_by_pos(rec_ptr);
3783}
3784
3785
3786/*
3787 Initialize the BNLH join cache
3788
3789 SYNOPSIS
3790 init
3791 for_explain join buffer is initialized for explain only
3792
3793 DESCRIPTION
3794 The function initializes the cache structure. It is supposed to be called
3795 right after a constructor for the JOIN_CACHE_BNLH.
3796
3797 NOTES
3798 The function first constructs a companion object of the type JOIN_TAB_SCAN,
3799 then it calls the init method of the parent class.
3800
3801 RETURN VALUE
3802 0 initialization with buffer allocations has been succeeded
3803 1 otherwise
3804*/
3805
3806int JOIN_CACHE_BNLH::init(bool for_explain)
3807{
3808 DBUG_ENTER("JOIN_CACHE_BNLH::init");
3809
3810 if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab)))
3811 DBUG_RETURN(1);
3812
3813 DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain));
3814}
3815
3816
3817/*
3818 Calculate the increment of the MRR buffer for a record write
3819
3820 SYNOPSIS
3821 aux_buffer_incr()
3822
3823 DESCRIPTION
3824 This implementation of the virtual function aux_buffer_incr determines
3825 for how much the size of the MRR buffer should be increased when another
3826 record is added to the cache.
3827
3828 RETURN VALUE
3829 the increment of the size of the MRR buffer for the next record
3830*/
3831
3832uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(size_t recno)
3833{
3834 uint incr= 0;
3835 TABLE_REF *ref= &join_tab->ref;
3836 TABLE *tab= join_tab->table;
3837 ha_rows rec_per_key=
3838 (ha_rows) tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-1);
3839 set_if_bigger(rec_per_key, 1);
3840 if (recno == 1)
3841 incr= ref->key_length + tab->file->ref_length;
3842 incr+= (uint)(tab->file->stats.mrr_length_per_rec * rec_per_key);
3843 return incr;
3844}
3845
3846
3847/*
3848 Initiate iteration over records returned by MRR for the current join buffer
3849
3850 SYNOPSIS
3851 open()
3852
3853 DESCRIPTION
3854 The function initiates the process of iteration over the records from
3855 join_tab returned by the MRR interface functions for records from
3856 the join buffer. Such an iteration is performed by the BKA/BKAH join
3857 algorithm for each new refill of the join buffer.
3858 The function calls the MRR handler function multi_range_read_init to
3859 initiate this process.
3860
3861 RETURN VALUE
3862 0 the initiation is a success
3863 error code otherwise
3864*/
3865
3866int JOIN_TAB_SCAN_MRR::open()
3867{
3868 handler *file= join_tab->table->file;
3869
3870 join_tab->table->null_row= 0;
3871
3872
3873 /* Dynamic range access is never used with BKA */
3874 DBUG_ASSERT(join_tab->use_quick != 2);
3875
3876 join_tab->tracker->r_scans++;
3877 save_or_restore_used_tabs(join_tab, FALSE);
3878
3879 init_mrr_buff();
3880
3881 /*
3882 Prepare to iterate over keys from the join buffer and to get
3883 matching candidates obtained with MMR handler functions.
3884 */
3885 if (!file->inited)
3886 file->ha_index_init(join_tab->ref.key, 1);
3887 ranges= cache->get_number_of_ranges_for_mrr();
3888 if (!join_tab->cache_idx_cond)
3889 range_seq_funcs.skip_index_tuple= 0;
3890 return file->multi_range_read_init(&range_seq_funcs, (void*) cache,
3891 ranges, mrr_mode, &mrr_buff);
3892}
3893
3894
3895/*
3896 Read the next record returned by MRR for the current join buffer
3897
3898 SYNOPSIS
3899 next()
3900
3901 DESCRIPTION
3902 The function reads the next record from the joined table join_tab
3903 returned by the MRR handler function multi_range_read_next for
3904 the current refill of the join buffer. The record is read into
3905 the record buffer used for join_tab records in join operations.
3906
3907 RETURN VALUE
3908 0 the next record exists and has been successfully read
3909 error code otherwise
3910*/
3911
3912int JOIN_TAB_SCAN_MRR::next()
3913{
3914 char **ptr= (char **) cache->get_curr_association_ptr();
3915
3916 DBUG_ASSERT(sizeof(range_id_t) == sizeof(*ptr));
3917 int rc= join_tab->table->file->multi_range_read_next((range_id_t*)ptr) ? -1 : 0;
3918 if (!rc)
3919 {
3920 join_tab->tracker->r_rows++;
3921 join_tab->tracker->r_rows_after_where++;
3922 /*
3923 If a record in in an incremental cache contains no fields then the
3924 association for the last record in cache will be equal to cache->end_pos
3925 */
3926 /*
3927 psergey: this makes no sense where HA_MRR_NO_ASSOC is used.
3928 DBUG_ASSERT(cache->buff <= (uchar *) (*ptr) &&
3929 (uchar *) (*ptr) <= cache->end_pos);
3930 */
3931 }
3932 return rc;
3933}
3934
3935
3936static
3937void bka_range_seq_key_info(void *init_params, uint *length,
3938 key_part_map *map)
3939{
3940 TABLE_REF *ref= &(((JOIN_CACHE*)init_params)->join_tab->ref);
3941 *length= ref->key_length;
3942 *map= (key_part_map(1) << ref->key_parts) - 1;
3943}
3944
3945
3946/*
3947Initialize retrieval of range sequence for BKA join algorithm
3948
3949SYNOPSIS
3950 bka_range_seq_init()
3951 init_params pointer to the BKA join cache object
3952 n_ranges the number of ranges obtained
3953 flags combination of MRR flags
3954
3955DESCRIPTION
3956 The function interprets init_param as a pointer to a JOIN_CACHE_BKA
3957 object. The function prepares for an iteration over the join keys
3958 built for all records from the cache join buffer.
3959
3960NOTE
3961 This function are used only as a callback function.
3962
3963RETURN VALUE
3964 init_param value that is to be used as a parameter of bka_range_seq_next()
3965*/
3966
3967static
3968range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags)
3969{
3970 DBUG_ENTER("bka_range_seq_init");
3971 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param;
3972 cache->reset(0);
3973 DBUG_RETURN((range_seq_t) init_param);
3974}
3975
3976
3977/*
3978Get the next range/key over records from the join buffer used by a BKA cache
3979
3980SYNOPSIS
3981 bka_range_seq_next()
3982 seq the value returned by bka_range_seq_init
3983 range OUT reference to the next range
3984
3985DESCRIPTION
3986 The function interprets seq as a pointer to a JOIN_CACHE_BKA
3987 object. The function returns a pointer to the range descriptor
3988 for the key built over the next record from the join buffer.
3989
3990NOTE
3991 This function are used only as a callback function.
3992
3993RETURN VALUE
3994 FALSE ok, the range structure filled with info about the next range/key
3995 TRUE no more ranges
3996*/
3997
3998static
3999bool bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
4000{
4001 DBUG_ENTER("bka_range_seq_next");
4002 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
4003 TABLE_REF *ref= &cache->join_tab->ref;
4004 key_range *start_key= &range->start_key;
4005 if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
4006 {
4007 start_key->keypart_map= (1 << ref->key_parts) - 1;
4008 start_key->flag= HA_READ_KEY_EXACT;
4009 range->end_key= *start_key;
4010 range->end_key.flag= HA_READ_AFTER_KEY;
4011 range->ptr= (char *) cache->get_curr_rec();
4012 range->range_flag= EQ_RANGE;
4013 DBUG_RETURN(0);
4014 }
4015 DBUG_RETURN(1);
4016}
4017
4018
4019/*
4020Check whether range_info orders to skip the next record from BKA buffer
4021
4022SYNOPSIS
4023 bka_range_seq_skip_record()
4024 seq value returned by bka_range_seq_init()
4025 range_info information about the next range
4026 rowid [NOT USED] rowid of the record to be checked
4027
4028
4029DESCRIPTION
4030 The function interprets seq as a pointer to a JOIN_CACHE_BKA object.
4031 The function returns TRUE if the record with this range_info
4032 is to be filtered out from the stream of records returned by
4033 multi_range_read_next().
4034
4035NOTE
4036 This function are used only as a callback function.
4037
4038RETURN VALUE
4039 1 record with this range_info is to be filtered out from the stream
4040 of records returned by multi_range_read_next()
4041 0 the record is to be left in the stream
4042*/
4043
4044static
4045bool bka_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid)
4046{
4047 DBUG_ENTER("bka_range_seq_skip_record");
4048 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
4049 bool res= cache->get_match_flag_by_pos((uchar *) range_info) ==
4050 JOIN_CACHE::MATCH_FOUND;
4051 DBUG_RETURN(res);
4052}
4053
4054
4055/*
4056Check if the record combination from BKA cache matches the index condition
4057
4058SYNOPSIS
4059 bka_skip_index_tuple()
4060 rseq value returned by bka_range_seq_init()
4061 range_info record chain for the next range/key returned by MRR
4062
4063DESCRIPTION
4064 This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method,
4065 see comments there.
4066
4067NOTE
4068 This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
4069
4070RETURN VALUE
4071 0 The record combination satisfies the index condition
4072 1 Otherwise
4073*/
4074
4075static
4076bool bka_skip_index_tuple(range_seq_t rseq, range_id_t range_info)
4077{
4078 DBUG_ENTER("bka_skip_index_tuple");
4079 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
4080 THD *thd= cache->thd();
4081 bool res;
4082 status_var_increment(thd->status_var.ha_icp_attempts);
4083 if (!(res= cache->skip_index_tuple(range_info)))
4084 status_var_increment(thd->status_var.ha_icp_match);
4085 DBUG_RETURN(res);
4086}
4087
4088
4089/*
4090Prepare to read the record from BKA cache matching the current joined record
4091
4092SYNOPSIS
4093 prepare_look_for_matches()
4094 skip_last <-> ignore the last record in the buffer (always unused here)
4095
4096DESCRIPTION
4097 The function prepares to iterate over records in the join cache buffer
4098 matching the record loaded into the record buffer for join_tab when
4099 performing join operation by BKA join algorithm. With BKA algorithms the
4100 record loaded into the record buffer for join_tab always has a direct
4101 reference to the matching records from the join buffer. When the regular
4102 BKA join algorithm is employed the record from join_tab can refer to
4103 only one such record.
4104 The function sets the counter of the remaining records from the cache
4105 buffer that would match the current join_tab record to 1.
4106
4107RETURN VALUE
4108 TRUE there are no records in the buffer to iterate over
4109 FALSE otherwise
4110*/
4111
4112bool JOIN_CACHE_BKA::prepare_look_for_matches(bool skip_last)
4113{
4114 if (!records)
4115 return TRUE;
4116 rem_records= 1;
4117 return FALSE;
4118}
4119
4120
4121/*
4122Get the record from the BKA cache matching the current joined record
4123
4124SYNOPSIS
4125 get_next_candidate_for_match
4126
4127DESCRIPTION
4128 This method is used for iterations over the records from the join
4129 cache buffer when looking for matches for records from join_tab.
4130 The method performs the necessary preparations to read the next record
4131 from the join buffer into the record buffer by the method
4132 read_next_candidate_for_match, or, to skip the next record from the join
4133 buffer by the method skip_if_not_needed_match.
4134 This implementation of the virtual method get_next_candidate_for_match
4135 just decrements the counter of the records that are to be iterated over
4136 and returns the value of curr_association as a reference to the position
4137 of the beginning of the record fields in the buffer.
4138
4139RETURN VALUE
4140 pointer to the start of the record fields in the join buffer
4141 if the there is another record to iterate over, 0 - otherwise.
4142*/
4143
4144uchar *JOIN_CACHE_BKA::get_next_candidate_for_match()
4145{
4146 if (!rem_records)
4147 return 0;
4148 rem_records--;
4149 return curr_association;
4150}
4151
4152
4153/*
4154Check whether the matching record from the BKA cache is to be skipped
4155
4156SYNOPSIS
4157 skip_next_candidate_for_match
4158 rec_ptr pointer to the position in the join buffer right after
4159 the previous record
4160
4161DESCRIPTION
4162 This implementation of the virtual function just calls the
4163 method get_match_flag_by_pos to check whether the record referenced
4164 by ref_ptr has its match flag set to MATCH_FOUND.
4165
4166RETURN VALUE
4167 TRUE the record referenced by rec_ptr has its match flag set to
4168 MATCH_FOUND
4169 FALSE otherwise
4170*/
4171
4172bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr)
4173{
4174 return join_tab->check_only_first_match() &&
4175 (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
4176}
4177
4178
4179/*
4180Read the next record from the BKA join cache buffer when looking for matches
4181
4182SYNOPSIS
4183 read_next_candidate_for_match
4184 rec_ptr pointer to the position in the join buffer right after
4185 the previous record
4186
4187DESCRIPTION
4188 This implementation of the virtual method read_next_candidate_for_match
4189 calls the method get_record_by_pos to read the record referenced by rec_ptr
4190 from the join buffer into the record buffer. If this record refers to
4191 fields in the other join buffers the call of get_record_by_po ensures that
4192 these fields are read into the corresponding record buffers as well.
4193 This function is supposed to be called after a successful call of
4194 the method get_next_candidate_for_match.
4195
4196RETURN VALUE
4197 none
4198*/
4199
4200void JOIN_CACHE_BKA::read_next_candidate_for_match(uchar *rec_ptr)
4201{
4202 get_record_by_pos(rec_ptr);
4203}
4204
4205
4206/*
4207Initialize the BKA join cache
4208
4209SYNOPSIS
4210 init
4211 for_explain join buffer is initialized for explain only
4212
4213
4214DESCRIPTION
4215 The function initializes the cache structure. It is supposed to be called
4216 right after a constructor for the JOIN_CACHE_BKA.
4217
4218NOTES
4219 The function first constructs a companion object of the type
4220 JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
4221
4222RETURN VALUE
4223 0 initialization with buffer allocations has been succeeded
4224 1 otherwise
4225*/
4226
4227int JOIN_CACHE_BKA::init(bool for_explain)
4228{
4229 int res;
4230 bool check_only_first_match= join_tab->check_only_first_match();
4231
4232 RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info,
4233 bka_range_seq_init,
4234 bka_range_seq_next,
4235 check_only_first_match ? bka_range_seq_skip_record : 0,
4236 bka_skip_index_tuple };
4237
4238 DBUG_ENTER("JOIN_CACHE_BKA::init");
4239
4240 JOIN_TAB_SCAN_MRR *jsm;
4241 if (!(join_tab_scan= jsm= new JOIN_TAB_SCAN_MRR(join, join_tab,
4242 mrr_mode, rs_funcs)))
4243 DBUG_RETURN(1);
4244
4245 if ((res= JOIN_CACHE::init(for_explain)))
4246 DBUG_RETURN(res);
4247
4248 if (use_emb_key)
4249 jsm->mrr_mode |= HA_MRR_MATERIALIZED_KEYS;
4250
4251 DBUG_RETURN(0);
4252}
4253
4254
4255/*
4256Get the key built over the next record from BKA join buffer
4257
4258SYNOPSIS
4259 get_next_key()
4260 key pointer to the buffer where the key value is to be placed
4261
4262DESCRIPTION
4263 The function reads key fields from the current record in the join buffer.
4264 and builds the key value out of these fields that will be used to access
4265 the 'join_tab' table. Some of key fields may belong to previous caches.
4266 They are accessed via record references to the record parts stored in the
4267 previous join buffers. The other key fields always are placed right after
4268 the flag fields of the record.
4269 If the key is embedded, which means that its value can be read directly
4270 from the join buffer, then *key is set to the beginning of the key in
4271 this buffer. Otherwise the key is built in the join_tab->ref->key_buff.
4272 The function returns the length of the key if it succeeds ro read it.
4273 If is assumed that the functions starts reading at the position of
4274 the record length which is provided for each records in a BKA cache.
4275 After the key is built the 'pos' value points to the first position after
4276 the current record.
4277 The function just skips the records with MATCH_IMPOSSIBLE in the
4278 match flag field if there is any.
4279 The function returns 0 if the initial position is after the beginning
4280 of the record fields for last record from the join buffer.
4281
4282RETURN VALUE
4283 length of the key value - if the starting value of 'pos' points to
4284 the position before the fields for the last record,
4285 0 - otherwise.
4286*/
4287
4288uint JOIN_CACHE_BKA::get_next_key(uchar ** key)
4289{
4290 uint len;
4291 uint32 rec_len;
4292 uchar *init_pos;
4293 JOIN_CACHE *cache;
4294
4295start:
4296
4297 /* Any record in a BKA cache is prepended with its length */
4298 DBUG_ASSERT(with_length);
4299
4300 if ((pos+size_of_rec_len) > last_rec_pos || !records)
4301 return 0;
4302
4303 /* Read the length of the record */
4304 rec_len= get_rec_length(pos);
4305 pos+= size_of_rec_len;
4306 init_pos= pos;
4307
4308 /* Read a reference to the previous cache if any */
4309 if (prev_cache)
4310 pos+= prev_cache->get_size_of_rec_offset();
4311
4312 curr_rec_pos= pos;
4313
4314 /* Read all flag fields of the record */
4315 read_flag_fields();
4316
4317 if (with_match_flag &&
4318 (Match_flag) curr_rec_pos[0] == MATCH_IMPOSSIBLE )
4319 {
4320 pos= init_pos+rec_len;
4321 goto start;
4322 }
4323
4324 if (use_emb_key)
4325 {
4326 /* An embedded key is taken directly from the join buffer */
4327 *key= pos;
4328 len= emb_key_length;
4329 }
4330 else
4331 {
4332 /* Read key arguments from previous caches if there are any such fields */
4333 if (external_key_arg_fields)
4334 {
4335 uchar *rec_ptr= curr_rec_pos;
4336 uint key_arg_count= external_key_arg_fields;
4337 CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count;
4338 for (cache= prev_cache; key_arg_count; cache= cache->prev_cache)
4339 {
4340 uint len= 0;
4341 DBUG_ASSERT(cache);
4342 rec_ptr= cache->get_rec_ref(rec_ptr);
4343 while (!cache->referenced_fields)
4344 {
4345 cache= cache->prev_cache;
4346 DBUG_ASSERT(cache);
4347 rec_ptr= cache->get_rec_ref(rec_ptr);
4348 }
4349 while (key_arg_count &&
4350 cache->read_referenced_field(*copy_ptr, rec_ptr, &len))
4351 {
4352 copy_ptr++;
4353 --key_arg_count;
4354 }
4355 }
4356 }
4357
4358 /*
4359 Read the other key arguments from the current record. The fields for
4360 these arguments are always first in the sequence of the record's fields.
4361 */
4362 CACHE_FIELD *copy= field_descr+flag_fields;
4363 CACHE_FIELD *copy_end= copy+local_key_arg_fields;
4364 bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos);
4365 for ( ; copy < copy_end; copy++)
4366 read_record_field(copy, blob_in_rec_buff);
4367
4368 /* Build the key over the fields read into the record buffers */
4369 TABLE_REF *ref= &join_tab->ref;
4370 cp_buffer_from_ref(join->thd, join_tab->table, ref);
4371 *key= ref->key_buff;
4372 len= ref->key_length;
4373 }
4374
4375 pos= init_pos+rec_len;
4376
4377 return len;
4378}
4379
4380
4381/*
4382Check the index condition of the joined table for a record from the BKA cache
4383
4384SYNOPSIS
4385 skip_index_tuple()
4386 range_info pointer to the record returned by MRR
4387
4388DESCRIPTION
4389 This function is invoked from MRR implementation to check if an index
4390 tuple matches the index condition. It is used in the case where the index
4391 condition actually depends on both columns of the used index and columns
4392 from previous tables.
4393
4394NOTES
4395 Accessing columns of the previous tables requires special handling with
4396 BKA. The idea of BKA is to collect record combinations in a buffer and
4397 then do a batch of ref access lookups, i.e. by the time we're doing a
4398 lookup its previous-records-combination is not in prev_table->record[0]
4399 but somewhere in the join buffer.
4400 We need to get it from there back into prev_table(s)->record[0] before we
4401 can evaluate the index condition, and that's why we need this function
4402 instead of regular IndexConditionPushdown.
4403
4404NOTES
4405 Possible optimization:
4406 Before we unpack the record from a previous table
4407 check if this table is used in the condition.
4408 If so then unpack the record otherwise skip the unpacking.
4409 This should be done by a special virtual method
4410 get_partial_record_by_pos().
4411
4412RETURN VALUE
4413 1 the record combination does not satisfies the index condition
4414 0 otherwise
4415*/
4416
4417bool JOIN_CACHE_BKA::skip_index_tuple(range_id_t range_info)
4418{
4419 DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple");
4420 get_record_by_pos((uchar*)range_info);
4421 DBUG_RETURN(!join_tab->cache_idx_cond->val_int());
4422}
4423
4424
4425
4426/*
4427Initialize retrieval of range sequence for the BKAH join algorithm
4428
4429SYNOPSIS
4430 bkah_range_seq_init()
4431 init_params pointer to the BKAH join cache object
4432 n_ranges the number of ranges obtained
4433 flags combination of MRR flags
4434
4435DESCRIPTION
4436 The function interprets init_param as a pointer to a JOIN_CACHE_BKAH
4437 object. The function prepares for an iteration over distinct join keys
4438 built over the records from the cache join buffer.
4439
4440NOTE
4441 This function are used only as a callback function.
4442
4443RETURN VALUE
4444 init_param value that is to be used as a parameter of
4445 bkah_range_seq_next()
4446*/
4447
4448static
4449range_seq_t bkah_range_seq_init(void *init_param, uint n_ranges, uint flags)
4450{
4451 DBUG_ENTER("bkah_range_seq_init");
4452 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) init_param;
4453 cache->reset(0);
4454 DBUG_RETURN((range_seq_t) init_param);
4455}
4456
4457
4458/*
4459Get the next range/key over records from the join buffer of a BKAH cache
4460
4461SYNOPSIS
4462 bkah_range_seq_next()
4463 seq value returned by bkah_range_seq_init()
4464 range OUT reference to the next range
4465
4466DESCRIPTION
4467 The function interprets seq as a pointer to a JOIN_CACHE_BKAH
4468 object. The function returns a pointer to the range descriptor
4469 for the next unique key built over records from the join buffer.
4470
4471NOTE
4472 This function are used only as a callback function.
4473
4474RETURN VALUE
4475 FALSE ok, the range structure filled with info about the next range/key
4476 TRUE no more ranges
4477*/
4478
4479static
4480bool bkah_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
4481{
4482 DBUG_ENTER("bkah_range_seq_next");
4483 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
4484 TABLE_REF *ref= &cache->join_tab->ref;
4485 key_range *start_key= &range->start_key;
4486 if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
4487 {
4488 start_key->keypart_map= (1 << ref->key_parts) - 1;
4489 start_key->flag= HA_READ_KEY_EXACT;
4490 range->end_key= *start_key;
4491 range->end_key.flag= HA_READ_AFTER_KEY;
4492 range->ptr= (char *) cache->get_curr_key_chain();
4493 range->range_flag= EQ_RANGE;
4494 DBUG_RETURN(0);
4495 }
4496 DBUG_RETURN(1);
4497}
4498
4499
4500/*
4501Check whether range_info orders to skip the next record from BKAH join buffer
4502
4503SYNOPSIS
4504 bkah_range_seq_skip_record()
4505 seq value returned by bkah_range_seq_init()
4506 range_info information about the next range/key returned by MRR
4507 rowid [NOT USED] rowid of the record to be checked (not used)
4508
4509DESCRIPTION
4510 The function interprets seq as a pointer to a JOIN_CACHE_BKAH
4511 object. The function returns TRUE if the record with this range_info
4512 is to be filtered out from the stream of records returned by
4513 multi_range_read_next().
4514
4515NOTE
4516 This function are used only as a callback function.
4517
4518RETURN VALUE
4519 1 record with this range_info is to be filtered out from the stream
4520 of records returned by multi_range_read_next()
4521 0 the record is to be left in the stream
4522*/
4523
4524static
4525bool bkah_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid)
4526{
4527 DBUG_ENTER("bkah_range_seq_skip_record");
4528 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
4529 bool res= cache->check_all_match_flags_for_key((uchar *) range_info);
4530 DBUG_RETURN(res);
4531}
4532
4533
4534/*
4535Check if the record combination from BKAH cache matches the index condition
4536
4537SYNOPSIS
4538 bkah_skip_index_tuple()
4539 rseq value returned by bka_range_seq_init()
4540 range_info record chain for the next range/key returned by MRR
4541
4542DESCRIPTION
4543 This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method,
4544 see comments there.
4545
4546NOTE
4547 This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
4548
4549RETURN VALUE
4550 0 some records from the chain satisfy the index condition
4551 1 otherwise
4552*/
4553
4554static
4555bool bkah_skip_index_tuple(range_seq_t rseq, range_id_t range_info)
4556{
4557 DBUG_ENTER("bka_unique_skip_index_tuple");
4558 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
4559 THD *thd= cache->thd();
4560 bool res;
4561 status_var_increment(thd->status_var.ha_icp_attempts);
4562 if (!(res= cache->skip_index_tuple(range_info)))
4563 status_var_increment(thd->status_var.ha_icp_match);
4564 DBUG_RETURN(res);
4565}
4566
4567
4568/*
4569Prepare to read record from BKAH cache matching the current joined record
4570
4571SYNOPSIS
4572 prepare_look_for_matches()
4573 skip_last <-> ignore the last record in the buffer (always unused here)
4574
4575DESCRIPTION
4576 The function prepares to iterate over records in the join cache buffer
4577 matching the record loaded into the record buffer for join_tab when
4578 performing join operation by BKAH join algorithm. With BKAH algorithm, if
4579 association labels are used, then record loaded into the record buffer
4580 for join_tab always has a direct reference to the chain of the mathing
4581 records from the join buffer. If association labels are not used then
4582 then the chain of the matching records is obtained by the call of the
4583 get_key_chain_by_join_key function.
4584
4585RETURN VALUE
4586 TRUE there are no records in the buffer to iterate over
4587 FALSE otherwise
4588*/
4589
4590bool JOIN_CACHE_BKAH::prepare_look_for_matches(bool skip_last)
4591{
4592 last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0;
4593 if (no_association &&
4594 !(curr_matching_chain= get_matching_chain_by_join_key())) //psergey: added '!'
4595 return 1;
4596 last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain);
4597 return 0;
4598}
4599
4600/*
4601 Initialize the BKAH join cache
4602
4603 SYNOPSIS
4604 init
4605 for_explain join buffer is initialized for explain only
4606
4607 DESCRIPTION
4608 The function initializes the cache structure. It is supposed to be called
4609 right after a constructor for the JOIN_CACHE_BKAH.
4610
4611 NOTES
4612 The function first constructs a companion object of the type
4613 JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
4614
4615 RETURN VALUE
4616 0 initialization with buffer allocations has been succeeded
4617 1 otherwise
4618*/
4619
4620int JOIN_CACHE_BKAH::init(bool for_explain)
4621{
4622 bool check_only_first_match= join_tab->check_only_first_match();
4623
4624 no_association= MY_TEST(mrr_mode & HA_MRR_NO_ASSOCIATION);
4625
4626 RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info,
4627 bkah_range_seq_init,
4628 bkah_range_seq_next,
4629 check_only_first_match && !no_association ?
4630 bkah_range_seq_skip_record : 0,
4631 bkah_skip_index_tuple };
4632
4633 DBUG_ENTER("JOIN_CACHE_BKAH::init");
4634
4635 if (!(join_tab_scan= new JOIN_TAB_SCAN_MRR(join, join_tab,
4636 mrr_mode, rs_funcs)))
4637 DBUG_RETURN(1);
4638
4639 DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain));
4640}
4641
4642
4643/*
4644 Check the index condition of the joined table for a record from the BKA cache
4645
4646 SYNOPSIS
4647 skip_index_tuple()
4648 range_info record chain returned by MRR
4649
4650 DESCRIPTION
4651 See JOIN_CACHE_BKA::skip_index_tuple().
4652 This function is the variant for use with rhe class JOIN_CACHE_BKAH.
4653 The difference from JOIN_CACHE_BKA case is that there may be multiple
4654 previous table record combinations that share the same key(MRR range).
4655 As a consequence, we need to loop through the chain of all table record
4656 combinations that match the given MRR range key range_info until we find
4657 one that satisfies the index condition.
4658
4659 NOTE
4660 Possible optimization:
4661 Before we unpack the record from a previous table
4662 check if this table is used in the condition.
4663 If so then unpack the record otherwise skip the unpacking.
4664 This should be done by a special virtual method
4665 get_partial_record_by_pos().
4666
4667 RETURN VALUE
4668 1 any record combination from the chain referred by range_info
4669 does not satisfy the index condition
4670 0 otherwise
4671
4672
4673*/
4674
4675bool JOIN_CACHE_BKAH::skip_index_tuple(range_id_t range_info)
4676{
4677 uchar *last_rec_ref_ptr= get_next_rec_ref((uchar*) range_info);
4678 uchar *next_rec_ref_ptr= last_rec_ref_ptr;
4679 DBUG_ENTER("JOIN_CACHE_BKAH::skip_index_tuple");
4680 do
4681 {
4682 next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
4683 uchar *rec_ptr= next_rec_ref_ptr + rec_fields_offset;
4684 get_record_by_pos(rec_ptr);
4685 if (join_tab->cache_idx_cond->val_int())
4686 DBUG_RETURN(FALSE);
4687 } while(next_rec_ref_ptr != last_rec_ref_ptr);
4688 DBUG_RETURN(TRUE);
4689}
4690