sql_join_cache.cc source code [MariaDB/sql/sql_join_cache.cc]

1	/ Copyright (C) 2000-2006 MySQL AB*
2
3	This program is free software; you can redistribute it and/or modify
4	it under the terms of the GNU General Public License as published by
5	the Free Software Foundation; version 2 of the License.
6
7	This program is distributed in the hope that it will be useful,
8	but WITHOUT ANY WARRANTY; without even the implied warranty of
9	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	GNU General Public License for more details.
11
12	You should have received a copy of the GNU General Public License
13	along with this program; if not, write to the Free Software
14	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA /*
15
16	/**
17	@file
18
19	@brief
20	join cache optimizations
21
22	@defgroup Query_Optimizer Query Optimizer
23	@{
24	*/
25
26	#ifdef USE_PRAGMA_IMPLEMENTATION
27	#pragma implementation // gcc: Class implementation
28	#endif
29
30	#include "mariadb.h"
31	#include "key.h"
32	#include "sql_base.h"
33	#include "sql_select.h"
34	#include "opt_subselect.h"
35
36	#define NO_MORE_RECORDS_IN_BUFFER (uint)(-1)
37
38	static void save_or_restore_used_tabs(JOIN_TAB join_tab, bool* save);
39
40	/*****************************************************************************
41	* Join cache module
42	******************************************************************************/
43
44	/*
45	Fill in the descriptor of a flag field associated with a join cache
46
47	SYNOPSIS
48	add_field_flag_to_join_cache()
49	str position in a record buffer to copy the field from/to
50	length length of the field
51	field IN/OUT pointer to the field descriptor to fill in
52
53	DESCRIPTION
54	The function fill in the descriptor of a cache flag field to which
55	the parameter 'field' points to. The function uses the first two
56	parameters to set the position in the record buffer from/to which
57	the field value is to be copied and the length of the copied fragment.
58	Before returning the result the function increments the value of
59	*field by 1.
60	The function ignores the fields 'blob_length' and 'ofset' of the
61	descriptor.
62
63	RETURN VALUE
64	the length of the field
65	*/
66
67	static
68	uint add_flag_field_to_join_cache(uchar str, uint length, CACHE_FIELD *field)
69	{
70	CACHE_FIELD copy= field;
71	copy->str= str;
72	copy->length= length;
73	copy->type= `0`;
74	copy->field= `0`;
75	copy->referenced_field_no= `0`;
76	(*field)++;
77	return length;
78	}
79
80
81	/*
82	Fill in the descriptors of table data fields associated with a join cache
83
84	SYNOPSIS
85	add_table_data_fields_to_join_cache()
86	tab descriptors of fields from this table are to be filled
87	field_set descriptors for only these fields are to be created
88	field_cnt IN/OUT counter of data fields
89	descr IN/OUT pointer to the first descriptor to be filled
90	field_ptr_cnt IN/OUT counter of pointers to the data fields
91	descr_ptr IN/OUT pointer to the first pointer to blob descriptors
92
93	DESCRIPTION
94	The function fills in the descriptors of cache data fields from the table
95	'tab'. The descriptors are filled only for the fields marked in the
96	bitmap 'field_set'.
97	The function fills the descriptors starting from the position pointed
98	by 'descr'. If an added field is of a BLOB type then a pointer to the
99	its descriptor is added to the array descr_ptr.
100	At the return 'descr' points to the position after the last added
101	descriptor while 'descr_ptr' points to the position right after the
102	last added pointer.
103
104	RETURN VALUE
105	the total length of the added fields
106	*/
107
108	static
109	uint add_table_data_fields_to_join_cache(JOIN_TAB *tab,
110	MY_BITMAP *field_set,
111	uint *field_cnt,
112	CACHE_FIELD **descr,
113	uint *field_ptr_cnt,
114	CACHE_FIELD ***descr_ptr)
115	{
116	Field **fld_ptr;
117	uint len= `0`;
118	CACHE_FIELD copy= descr;
119	CACHE_FIELD *copy_ptr= descr_ptr;
120	uint used_fields= bitmap_bits_set(field_set);
121	for (fld_ptr= tab->table->field; used_fields; fld_ptr++)
122	{
123	if (bitmap_is_set(field_set, (*fld_ptr)->field_index))
124	{
125	len+= (*fld_ptr)->fill_cache_field(copy);
126	if (copy->type == CACHE_BLOB)
127	{
128	*copy_ptr= copy;
129	copy_ptr++;
130	(*field_ptr_cnt)++;
131	}
132	copy->field= *fld_ptr;
133	copy->referenced_field_no= `0`;
134	copy++;
135	(*field_cnt)++;
136	used_fields--;
137	}
138	}
139	*descr= copy;
140	*descr_ptr= copy_ptr;
141	return len;
142	}
143
144	/*
145	Determine different counters of fields associated with a record in the cache
146
147	SYNOPSIS
148	calc_record_fields()
149
150	DESCRIPTION
151	The function counts the number of total fields stored in a record
152	of the cache and saves this number in the 'fields' member. It also
153	determines the number of flag fields and the number of blobs.
154	The function sets 'with_match_flag' on if 'join_tab' needs a match flag
155	i.e. if it is the first inner table of an outer join or a semi-join.
156
157	RETURN VALUE
158	none
159	*/
160
161	void JOIN_CACHE::calc_record_fields()
162	{
163	JOIN_TAB *tab;
164
165	if (prev_cache)
166	tab= prev_cache->join_tab;
167	else
168	{
169	if (join_tab->bush_root_tab)
170	{
171	/*
172	--ot1--SJM1--------------ot2--...
173	\|
174	\|
175	+-it1--...--itN
176	^____________ this->join_tab is somewhere here,
177	inside an sjm nest.
178
179	The join buffer should store the values of it1., it2., ..
180	It should not store values of ot1..*
181	*/
182	tab= join_tab->bush_root_tab->bush_children->start;
183	}
184	else
185	{
186	/*
187	-ot1--ot2--SJM1--SJM2--------------ot3--...--otN
188	\| \| ^
189	\| +-it21--...--it2N \|
190	\| \-- we're somewhere here,
191	+-it11--...--it1N at the top level
192
193	The join buffer should store the values of
194
195	ot1., ot2., it1{i}, it2{j}., ot3., ...
196
197	that is, we should start from the first non-const top-level table.
198
199	We will need to store columns of SJ-inner tables (it_X_Y.), but we're*
200	not interested in storing the columns of materialization tables
201	themselves. Beause of that, if the first non-const top-level table is a
202	materialized table, we move to its bush_children:
203	*/
204	tab= join->join_tab + join->const_tables;
205	if (tab->bush_children)
206	tab= tab->bush_children->start;
207	}
208	}
209	DBUG_ASSERT(!tab->bush_children);
210
211	start_tab= tab;
212	fields= `0`;
213	blobs= `0`;
214	flag_fields= `0`;
215	data_field_count= `0`;
216	data_field_ptr_count= `0`;
217	referenced_fields= `0`;
218
219	/*
220	The following loop will get inside SJM nests, because data may be unpacked
221	to sjm-inner tables.
222	*/
223	for (; tab != join_tab ; tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
224	{
225	tab->calc_used_field_length(FALSE);
226	flag_fields+= MY_TEST(tab->used_null_fields \|\| tab->used_uneven_bit_fields);
227	flag_fields+= MY_TEST(tab->table->maybe_null);
228	fields+= tab->used_fields;
229	blobs+= tab->used_blobs;
230	}
231	if ((with_match_flag= join_tab->use_match_flag()))
232	flag_fields++;
233	fields+= flag_fields;
234	}
235
236
237	/*
238	Collect information on join key arguments
239
240	SYNOPSIS
241	collect_info_on_key_args()
242
243	DESCRIPTION
244	The function traverses the ref expressions that are used to access the
245	joined table join_tab. For each table 'tab' whose fields are to be stored
246	in the join buffer of the cache the function finds the fields from 'tab'
247	that occur in the ref expressions and marks these fields in the bitmap
248	tab->table->tmp_set. The function counts the number of them stored
249	in this cache and the total number of them stored in the previous caches
250	and saves the results of the counting in 'local_key_arg_fields' and
251	'external_key_arg_fields' respectively.
252
253	NOTES
254	The function does not do anything if no key is used to join the records
255	from join_tab.
256
257	RETURN VALUE
258	none
259	*/
260
261	void JOIN_CACHE::collect_info_on_key_args()
262	{
263	JOIN_TAB *tab;
264	JOIN_CACHE *cache;
265	local_key_arg_fields= `0`;
266	external_key_arg_fields= `0`;
267
268	if (!is_key_access())
269	return;
270
271	TABLE_REF *ref= &join_tab->ref;
272	cache= this;
273	do
274	{
275	for (tab= cache->start_tab; tab != cache->join_tab;
276	tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
277	{
278	uint key_args;
279	bitmap_clear_all(&tab->table->tmp_set);
280	for (uint i= `0`; i < ref->key_parts; i++)
281	{
282	Item *ref_item= ref->items[i];
283	if (!(tab->table->map & ref_item->used_tables()))
284	continue;
285	ref_item->walk(&Item::add_field_to_set_processor, `1`, tab->table);
286	}
287	if ((key_args= bitmap_bits_set(&tab->table->tmp_set)))
288	{
289	if (cache == this)
290	local_key_arg_fields+= key_args;
291	else
292	external_key_arg_fields+= key_args;
293	}
294	}
295	cache= cache->prev_cache;
296	}
297	while (cache);
298
299	return;
300	}
301
302
303	/*
304	Allocate memory for descriptors and pointers to them associated with the cache
305
306	SYNOPSIS
307	alloc_fields()
308
309	DESCRIPTION
310	The function allocates memory for the array of fields descriptors
311	and the array of pointers to the field descriptors used to copy
312	join record data from record buffers into the join buffer and
313	backward. Some pointers refer to the field descriptor associated
314	with previous caches. They are placed at the beginning of the array
315	of pointers and its total number is stored in external_key_arg_fields.
316	The pointer of the first array is assigned to field_descr and the number
317	of the elements in it is precalculated by the function calc_record_fields.
318	The allocated arrays are adjacent.
319
320	NOTES
321	The memory is allocated in join->thd->mem_root
322
323	RETURN VALUE
324	pointer to the first array
325	*/
326
327	int JOIN_CACHE::alloc_fields()
328	{
329	uint ptr_cnt= external_key_arg_fields+blobs+`1`;
330	uint fields_size= sizeof(CACHE_FIELD)*fields;
331	field_descr= (CACHE_FIELD*) join->thd->alloc(fields_size +
332	sizeof(CACHE_FIELD)ptr_cnt);
333	blob_ptr= (CACHE_FIELD *) ((uchar ) field_descr + fields_size);
334	return (field_descr == NULL);
335	}
336
337
338	/*
339	Create descriptors of the record flag fields stored in the join buffer
340
341	SYNOPSIS
342	create_flag_fields()
343
344	DESCRIPTION
345	The function creates descriptors of the record flag fields stored
346	in the join buffer. These are descriptors for:
347	- an optional match flag field,
348	- table null bitmap fields,
349	- table null row fields.
350	The match flag field is created when 'join_tab' is the first inner
351	table of an outer join our a semi-join. A null bitmap field is
352	created for any table whose fields are to be stored in the join
353	buffer if at least one of these fields is nullable or is a BIT field
354	whose bits are partially stored with null bits. A null row flag
355	is created for any table assigned to the cache if it is an inner
356	table of an outer join.
357	The descriptor for flag fields are placed one after another at the
358	beginning of the array of field descriptors 'field_descr' that
359	contains 'fields' elements. If there is a match flag field the
360	descriptor for it is always first in the sequence of flag fields.
361	The descriptors for other flag fields can follow in an arbitrary
362	order.
363	The flag field values follow in a record stored in the join buffer
364	in the same order as field descriptors, with the match flag always
365	following first.
366	The function sets the value of 'flag_fields' to the total number
367	of the descriptors created for the flag fields.
368	The function sets the value of 'length' to the total length of the
369	flag fields.
370
371	RETURN VALUE
372	none
373	*/
374
375	void JOIN_CACHE::create_flag_fields()
376	{
377	CACHE_FIELD *copy;
378	JOIN_TAB *tab;
379
380	copy= field_descr;
381
382	length=`0`;
383
384	/ If there is a match flag the first field is always used for this flag /
385	if (with_match_flag)
386	length+= add_flag_field_to_join_cache((uchar*) &join_tab->found,
387	sizeof(join_tab->found),
388	&copy);
389
390	/ Create fields for all null bitmaps and null row flags that are needed /
391	for (tab= start_tab; tab != join_tab;
392	tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
393	{
394	TABLE *table= tab->table;
395
396	/ Create a field for the null bitmap from table if needed /
397	if (tab->used_null_fields \|\| tab->used_uneven_bit_fields)
398	length+= add_flag_field_to_join_cache(table->null_flags,
399	table->s->null_bytes,
400	&copy);
401
402	/ Create table for the null row flag if needed /
403	if (table->maybe_null)
404	length+= add_flag_field_to_join_cache((uchar*) &table->null_row,
405	sizeof(table->null_row),
406	&copy);
407	}
408
409	/ Theoretically the new value of flag_fields can be less than the old one /
410	flag_fields= (uint)(copy-field_descr);
411	}
412
413
414	/*
415	Create descriptors of the fields used to build access keys to the joined table
416
417	SYNOPSIS
418	create_key_arg_fields()
419
420	DESCRIPTION
421	The function creates descriptors of the record fields stored in the join
422	buffer that are used to build access keys to the joined table. These
423	fields are put into the buffer ahead of other records fields stored in
424	the buffer. Such placement helps to optimize construction of access keys.
425	For each field that is used to build access keys to the joined table but
426	is stored in some other join cache buffer the function saves a pointer
427	to the the field descriptor. The array of such pointers are placed in the
428	the join cache structure just before the array of pointers to the
429	blob fields blob_ptr.
430	Any field stored in a join cache buffer that is used to construct keys
431	to access tables associated with other join caches is called a referenced
432	field. It receives a unique number that is saved by the function in the
433	member 'referenced_field_no' of the CACHE_FIELD descriptor for the field.
434	This number is used as index to the array of offsets to the referenced
435	fields that are saved and put in the join cache buffer after all record
436	fields.
437	The function also finds out whether that the keys to access join_tab
438	can be considered as embedded and, if so, sets the flag 'use_emb_key' in
439	this join cache appropriately.
440
441	NOTES.
442	When a key to access the joined table 'join_tab' is constructed the array
443	of pointers to the field descriptors for the external fields is looked
444	through. For each of this pointers we find out in what previous key cache
445	the referenced field is stored. The value of 'referenced_field_no'
446	provides us with the index into the array of offsets for referenced
447	fields stored in the join cache. The offset read by the the index allows
448	us to read the field without reading all other fields of the record
449	stored the join cache buffer. This optimizes the construction of keys
450	to access 'join_tab' when some key arguments are stored in the previous
451	join caches.
452
453	NOTES
454	The function does not do anything if no key is used to join the records
455	from join_tab.
456
457	RETURN VALUE
458	none
459	*/
460	void JOIN_CACHE::create_key_arg_fields()
461	{
462	JOIN_TAB *tab;
463	JOIN_CACHE *cache;
464
465	if (!is_key_access())
466	return;
467
468	/*
469	Save pointers to the cache fields in previous caches
470	that are used to build keys for this key access.
471	*/
472	cache= this;
473	uint ext_key_arg_cnt= external_key_arg_fields;
474	CACHE_FIELD *copy;
475	CACHE_FIELD **copy_ptr= blob_ptr;
476	while (ext_key_arg_cnt)
477	{
478	cache= cache->prev_cache;
479	for (tab= cache->start_tab; tab != cache->join_tab;
480	tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
481	{
482	CACHE_FIELD *copy_end;
483	MY_BITMAP *key_read_set= &tab->table->tmp_set;
484	/ key_read_set contains the bitmap of tab's fields referenced by ref /
485	if (bitmap_is_clear_all(key_read_set))
486	continue;
487	copy_end= cache->field_descr+cache->fields;
488	for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
489	{
490	/*
491	(1) - when we store rowids for DuplicateWeedout, they have
492	copy->field==NULL
493	*/
494	if (copy->field && // (1)
495	copy->field->table == tab->table &&
496	bitmap_is_set(key_read_set, copy->field->field_index))
497	{
498	*copy_ptr++= copy;
499	ext_key_arg_cnt--;
500	if (!copy->referenced_field_no)
501	{
502	/*
503	Register the referenced field 'copy':
504	- set the offset number in copy->referenced_field_no,
505	- adjust the value of the flag 'with_length',
506	- adjust the values of 'pack_length' and
507	of 'pack_length_with_blob_ptrs'.
508	*/
509	copy->referenced_field_no= ++cache->referenced_fields;
510	if (!cache->with_length)
511	{
512	cache->with_length= TRUE;
513	uint sz= cache->get_size_of_rec_length();
514	cache->base_prefix_length+= sz;
515	cache->pack_length+= sz;
516	cache->pack_length_with_blob_ptrs+= sz;
517	}
518	cache->pack_length+= cache->get_size_of_fld_offset();
519	cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset();
520	}
521	}
522	}
523	}
524	}
525	/ After this 'blob_ptr' shall not be be changed /
526	blob_ptr= copy_ptr;
527
528	/ Now create local fields that are used to build ref for this key access /
529	copy= field_descr+flag_fields;
530	for (tab= start_tab; tab != join_tab;
531	tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
532	{
533	length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set,
534	&data_field_count, &copy,
535	&data_field_ptr_count,
536	&copy_ptr);
537	}
538
539	use_emb_key= check_emb_key_usage();
540
541	return;
542	}
543
544
545	/*
546	Create descriptors of all remaining data fields stored in the join buffer
547
548	SYNOPSIS
549	create_remaining_fields()
550
551	DESCRIPTION
552	The function creates descriptors for all remaining data fields of a
553	record from the join buffer. If the value returned by is_key_access() is
554	false the function creates fields for all read record fields that
555	comprise the partial join record joined with join_tab. Otherwise,
556	for each table tab, the set of the read fields for which the descriptors
557	have to be added is determined as the difference between all read fields
558	and and those for which the descriptors have been already created.
559	The latter are supposed to be marked in the bitmap tab->table->tmp_set.
560	The function increases the value of 'length' to the the total length of
561	the added fields.
562
563	NOTES
564	If is_key_access() returns true the function modifies the value of
565	tab->table->tmp_set for a each table whose fields are stored in the cache.
566	The function calls the method Field::fill_cache_field to figure out
567	the type of the cache field and the maximal length of its representation
568	in the join buffer. If this is a blob field then additionally a pointer
569	to this field is added as an element of the array blob_ptr. For a blob
570	field only the size of the length of the blob data is taken into account.
571	It is assumed that 'data_field_count' contains the number of descriptors
572	for data fields that have been already created and 'data_field_ptr_count'
573	contains the number of the pointers to such descriptors having been
574	stored up to the moment.
575
576	RETURN VALUE
577	none
578	*/
579
580	void JOIN_CACHE::create_remaining_fields()
581	{
582	JOIN_TAB *tab;
583	bool all_read_fields= !is_key_access();
584	CACHE_FIELD *copy= field_descr+flag_fields+data_field_count;
585	CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count;
586
587	for (tab= start_tab; tab != join_tab;
588	tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
589	{
590	MY_BITMAP *rem_field_set;
591	TABLE *table= tab->table;
592
593	if (all_read_fields)
594	rem_field_set= table->read_set;
595	else
596	{
597	bitmap_invert(&table->tmp_set);
598	bitmap_intersect(&table->tmp_set, table->read_set);
599	rem_field_set= &table->tmp_set;
600	}
601
602	length+= add_table_data_fields_to_join_cache(tab, rem_field_set,
603	&data_field_count, &copy,
604	&data_field_ptr_count,
605	&copy_ptr);
606
607	/ SemiJoinDuplicateElimination: allocate space for rowid if needed /
608	if (tab->keep_current_rowid)
609	{
610	copy->str= table->file->ref;
611	if (copy->str)
612	copy->length= table->file->ref_length;
613	else
614	{
615	/ This may happen only for materialized derived tables and views /
616	copy->length= `0`;
617	copy->str= (uchar *) table;
618	}
619	copy->type= CACHE_ROWID;
620	copy->field= `0`;
621	copy->referenced_field_no= `0`;
622	/*
623	Note: this may seem odd, but at this point we have
624	table->file->ref==NULL while table->file->ref_length is already set
625	to correct value.
626	*/
627	length += table->file->ref_length;
628	data_field_count++;
629	copy++;
630	}
631	}
632	}
633
634
635
636	/*
637	Calculate and set all cache constants
638
639	SYNOPSIS
640	set_constants()
641
642	DESCRIPTION
643	The function calculates and set all precomputed constants that are used
644	when writing records into the join buffer and reading them from it.
645	It calculates the size of offsets of a record within the join buffer
646	and of a field within a record. It also calculates the number of bytes
647	used to store record lengths.
648	The function also calculates the maximal length of the representation
649	of record in the cache excluding blob_data. This value is used when
650	making a dicision whether more records should be added into the join
651	buffer or not.
652
653	RETURN VALUE
654	none
655	*/
656
657	void JOIN_CACHE::set_constants()
658	{
659	/*
660	Any record from a BKA cache is prepended with the record length.
661	We use the record length when reading the buffer and building key values
662	for each record. The length allows us not to read the fields that are
663	not needed for keys.
664	If a record has match flag it also may be skipped when the match flag
665	is on. It happens if the cache is used for a semi-join operation or
666	for outer join when the 'not exist' optimization can be applied.
667	If some of the fields are referenced from other caches then
668	the record length allows us to easily reach the saved offsets for
669	these fields since the offsets are stored at the very end of the record.
670	However at this moment we don't know whether we have referenced fields for
671	the cache or not. Later when a referenced field is registered for the cache
672	we adjust the value of the flag 'with_length'.
673	*/
674	with_length= is_key_access() \|\|
675	join_tab->is_inner_table_of_semi_join_with_first_match() \|\|
676	join_tab->is_inner_table_of_outer_join();
677	/*
678	At this moment we don't know yet the value of 'referenced_fields',
679	but in any case it can't be greater than the value of 'fields'.
680	*/
681	uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) +
682	(prev_cache ? prev_cache->get_size_of_rec_offset() : `0`) +
683	sizeof(ulong);
684	/*
685	The values of size_of_rec_ofs, size_of_rec_len, size_of_fld_ofs,
686	base_prefix_length, pack_length, pack_length_with_blob_ptrs
687	will be recalculated later in this function when we get the estimate
688	for the actual value of the join buffer size.
689	*/
690	size_of_rec_ofs= size_of_rec_len= size_of_fld_ofs= `4`;
691	base_prefix_length= (with_length ? size_of_rec_len : `0`) +
692	(prev_cache ? prev_cache->get_size_of_rec_offset() : `0`);
693	pack_length= (with_length ? size_of_rec_len : `0`) +
694	(prev_cache ? prev_cache->get_size_of_rec_offset() : `0`) +
695	length + fields*sizeof(uint);
696	pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
697	min_buff_size= `0`;
698	min_records= `1`;
699	buff_size= (size_t)MY_MAX(join->thd->variables.join_buff_size,
700	get_min_join_buffer_size());
701	size_of_rec_ofs= offset_size(buff_size);
702	size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len);
703	size_of_fld_ofs= size_of_rec_len;
704	base_prefix_length= (with_length ? size_of_rec_len : `0`) +
705	(prev_cache ? prev_cache->get_size_of_rec_offset() : `0`);
706	/*
707	The size of the offsets for referenced fields will be added later.
708	The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted
709	every time when the first reference to the referenced field is registered.
710	*/
711	pack_length= (with_length ? size_of_rec_len : `0`) +
712	(prev_cache ? prev_cache->get_size_of_rec_offset() : `0`) +
713	length;
714	pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
715	}
716
717
718	/*
719	Get maximum total length of all affixes of a record in the join cache buffer
720
721	SYNOPSIS
722	get_record_max_affix_length()
723
724	DESCRIPTION
725	The function calculates the maximum possible total length of all affixes
726	of a record in the join cache buffer, that is made of:
727	- the length of all prefixes used in this cache,
728	- the length of the match flag if it's needed
729	- the total length of the maximum possible offsets to the fields of
730	a record in the buffer.
731
732	RETURN VALUE
733	The maximum total length of all affixes of a record in the join buffer
734	*/
735
736	uint JOIN_CACHE::get_record_max_affix_length()
737	{
738	uint len= get_prefix_length() +
739	MY_TEST(with_match_flag) +
740	size_of_fld_ofs * data_field_count;
741	return len;
742	}
743
744
745	/*
746	Get the minimum possible size of the cache join buffer
747
748	SYNOPSIS
749	get_min_join_buffer_size()
750
751	DESCRIPTION
752	At the first its invocation for the cache the function calculates the
753	minimum possible size of the join buffer of the cache. This value depends
754	on the minimal number of records 'min_records' to be stored in the join
755	buffer. The number is supposed to be determined by the procedure that
756	chooses the best access path to the joined table join_tab in the execution
757	plan. After the calculation of the interesting size the function saves it
758	in the field 'min_buff_size' in order to use it directly at the next
759	invocations of the function.
760
761	NOTES
762	Currently the number of minimal records is just set to 1.
763
764	RETURN VALUE
765	The minimal possible size of the join buffer of this cache
766	*/
767
768	size_t JOIN_CACHE::get_min_join_buffer_size()
769	{
770	if (!min_buff_size)
771	{
772	size_t len= `0`;
773	size_t len_last= `0`;
774	for (JOIN_TAB *tab= start_tab; tab != join_tab;
775	tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
776	{
777	len+= tab->get_max_used_fieldlength();
778	len_last+= tab->get_used_fieldlength();
779	}
780	size_t len_addon= get_record_max_affix_length() +
781	get_max_key_addon_space_per_record();
782	len+= len_addon;
783	len_last+= len_addon;
784	size_t min_sz= len*(min_records-`1`) + len_last;
785	min_sz+= pack_length_with_blob_ptrs;
786	size_t add_sz= `0`;
787	for (uint i=`0`; i < min_records; i++)
788	add_sz+= join_tab_scan->aux_buffer_incr(i+`1`);
789	avg_aux_buffer_incr= add_sz/min_records;
790	min_sz+= add_sz;
791	set_if_bigger(min_sz, `1`);
792	min_buff_size= min_sz;
793	}
794	return min_buff_size;
795	}
796
797
798	/*
799	Get the maximum possible size of the cache join buffer
800
801	SYNOPSIS
802	get_max_join_buffer_size()
803
804	optimize_buff_size FALSE <-> do not take more memory than needed for
805	the estimated number of records in the partial join
806
807	DESCRIPTION
808	At the first its invocation for the cache the function calculates the
809	maximum possible size of join buffer for the cache. If the parameter
810	optimize_buff_size true then this value does not exceed the size of the
811	space needed for the estimated number of records 'max_records' in the
812	partial join that joins tables from the first one through join_tab. This
813	value is also capped off by the value of join_tab->join_buffer_size_limit,
814	if it has been set a to non-zero value, and by the value of the system
815	parameter join_buffer_size - otherwise. After the calculation of the
816	interesting size the function saves the value in the field 'max_buff_size'
817	in order to use it directly at the next invocations of the function.
818
819	NOTES
820	Currently the value of join_tab->join_buffer_size_limit is initialized
821	to 0 and is never reset.
822
823	RETURN VALUE
824	The maximum possible size of the join buffer of this cache
825	*/
826
827	size_t JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size)
828	{
829	if (!max_buff_size)
830	{
831	size_t max_sz;
832	size_t min_sz= get_min_join_buffer_size();
833	size_t len= `0`;
834	for (JOIN_TAB *tab= start_tab; tab != join_tab;
835	tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
836	{
837	len+= tab->get_used_fieldlength();
838	}
839	len+= get_record_max_affix_length();
840	avg_record_length= len;
841	len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr;
842	space_per_record= len;
843
844	size_t limit_sz= (size_t)join->thd->variables.join_buff_size;
845	if (join_tab->join_buffer_size_limit)
846	set_if_smaller(limit_sz, join_tab->join_buffer_size_limit);
847	if (!optimize_buff_size)
848	max_sz= limit_sz;
849	else
850	{
851	if (limit_sz / max_records > space_per_record)
852	max_sz= space_per_record * max_records;
853	else
854	max_sz= limit_sz;
855	max_sz+= pack_length_with_blob_ptrs;
856	set_if_smaller(max_sz, limit_sz);
857	}
858	set_if_bigger(max_sz, min_sz);
859	max_buff_size= max_sz;
860	}
861	return max_buff_size;
862	}
863
864
865	/*
866	Allocate memory for a join buffer
867
868	SYNOPSIS
869	alloc_buffer()
870
871	DESCRIPTION
872	The function allocates a lump of memory for the cache join buffer.
873	Initially the function sets the size of the buffer buff_size equal to
874	the value returned by get_max_join_buffer_size(). If the total size of
875	the space intended to be used for the join buffers employed by the
876	tables from the first one through join_tab exceeds the value of the
877	system parameter join_buff_space_limit, then the function first tries
878	to shrink the used buffers to make the occupied space fit the maximum
879	memory allowed to be used for all join buffers in total. After
880	this the function tries to allocate a join buffer for join_tab.
881	If it fails to do so, it decrements the requested size of the join
882	buffer, shrinks proportionally the join buffers used for the previous
883	tables and tries to allocate a buffer for join_tab. In the case of a
884	failure the function repeats its attempts with smaller and smaller
885	requested sizes of the buffer, but not more than 4 times.
886
887	RETURN VALUE
888	0 if the memory has been successfully allocated
889	1 otherwise
890	*/
891
892	int JOIN_CACHE::alloc_buffer()
893	{
894	JOIN_TAB *tab;
895	JOIN_CACHE *cache;
896	ulonglong curr_buff_space_sz= `0`;
897	ulonglong curr_min_buff_space_sz= `0`;
898	ulonglong join_buff_space_limit=
899	join->thd->variables.join_buff_space_limit;
900	bool optimize_buff_size=
901	optimizer_flag(join->thd, OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE);
902	double partial_join_cardinality= (join_tab-`1`)->get_partial_join_cardinality();
903	buff= NULL;
904	min_buff_size= `0`;
905	max_buff_size= `0`;
906	min_records= `1`;
907	max_records= (size_t) (partial_join_cardinality <= join_buff_space_limit ?
908	(ulonglong) partial_join_cardinality : join_buff_space_limit);
909	set_if_bigger(max_records, `10`);
910	min_buff_size= get_min_join_buffer_size();
911	buff_size= get_max_join_buffer_size(optimize_buff_size);
912
913	for (tab= start_tab; tab!= join_tab;
914	tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
915	{
916	cache= tab->cache;
917	if (cache)
918	{
919	curr_min_buff_space_sz+= cache->get_min_join_buffer_size();
920	curr_buff_space_sz+= cache->get_join_buffer_size();
921	}
922	}
923	curr_min_buff_space_sz+= min_buff_size;
924	curr_buff_space_sz+= buff_size;
925
926	if (curr_min_buff_space_sz > join_buff_space_limit \|\|
927	(curr_buff_space_sz > join_buff_space_limit &&
928	(!optimize_buff_size \|\|
929	join->shrink_join_buffers(join_tab, curr_buff_space_sz,
930	join_buff_space_limit))))
931	goto fail;
932
933	if (for_explain_only)
934	return `0`;
935
936	for (size_t buff_size_decr= (buff_size-min_buff_size)/`4` + `1`; ; )
937	{
938	size_t next_buff_size;
939
940	if ((buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC))))
941	break;
942
943	next_buff_size= buff_size > buff_size_decr ? buff_size-buff_size_decr : `0`;
944	if (next_buff_size < min_buff_size \|\|
945	join->shrink_join_buffers(join_tab, curr_buff_space_sz,
946	curr_buff_space_sz-buff_size_decr))
947	goto fail;
948	buff_size= next_buff_size;
949
950	curr_buff_space_sz= `0`;
951	for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++)
952	{
953	cache= tab->cache;
954	if (cache)
955	curr_buff_space_sz+= cache->get_join_buffer_size();
956	}
957	}
958	return `0`;
959
960	fail:
961	buff_size= `0`;
962	return `1`;
963	}
964
965
966	/*
967	Shrink the size if the cache join buffer in a given ratio
968
969	SYNOPSIS
970	shrink_join_buffer_in_ratio()
971	n nominator of the ratio to shrink the buffer in
972	d denominator if the ratio
973
974	DESCRIPTION
975	The function first deallocates the join buffer of the cache. Then
976	it allocates a buffer that is (n/d) times smaller.
977
978	RETURN VALUE
979	FALSE on success with allocation of the smaller join buffer
980	TRUE otherwise
981	*/
982
983	bool JOIN_CACHE::shrink_join_buffer_in_ratio(ulonglong n, ulonglong d)
984	{
985	size_t next_buff_size;
986	if (n < d)
987	return FALSE;
988	next_buff_size= (size_t) ((double) buff_size / n * d);
989	set_if_bigger(next_buff_size, min_buff_size);
990	buff_size= next_buff_size;
991	return realloc_buffer();
992	}
993
994
995	/*
996	Reallocate the join buffer of a join cache
997
998	SYNOPSIS
999	realloc_buffer()
1000
1001	DESCRITION
1002	The function reallocates the join buffer of the join cache. After this
1003	it resets the buffer for writing.
1004
1005	NOTES
1006	The function assumes that buff_size contains the new value for the join
1007	buffer size.
1008
1009	RETURN VALUE
1010	0 if the buffer has been successfully reallocated
1011	1 otherwise
1012	*/
1013
1014	int JOIN_CACHE::realloc_buffer()
1015	{
1016	int rc;
1017	free();
1018	rc= MY_TEST(!(buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC))));
1019	reset(TRUE);
1020	return rc;
1021	}
1022
1023
1024	/*
1025	Initialize a join cache
1026
1027	SYNOPSIS
1028	init()
1029	for_explain join buffer is initialized for explain only
1030
1031	DESCRIPTION
1032	The function initializes the join cache structure. It supposed to be called
1033	by init methods for classes derived from the JOIN_CACHE.
1034	The function allocates memory for the join buffer and for descriptors of
1035	the record fields stored in the buffer.
1036
1037	NOTES
1038	The code of this function should have been included into the constructor
1039	code itself. However the new operator for the class JOIN_CACHE would
1040	never fail while memory allocation for the join buffer is not absolutely
1041	unlikely to fail. That's why this memory allocation has to be placed in a
1042	separate function that is called in a couple with a cache constructor.
1043	It is quite natural to put almost all other constructor actions into
1044	this function.
1045
1046	RETURN VALUE
1047	0 initialization with buffer allocations has been succeeded
1048	1 otherwise
1049	*/
1050
1051	int JOIN_CACHE::init(bool for_explain)
1052	{
1053	DBUG_ENTER("JOIN_CACHE::init");
1054
1055	for_explain_only= for_explain;
1056
1057	calc_record_fields();
1058
1059	collect_info_on_key_args();
1060
1061	if (alloc_fields())
1062	DBUG_RETURN(`1`);
1063
1064	create_flag_fields();
1065
1066	create_key_arg_fields();
1067
1068	create_remaining_fields();
1069
1070	set_constants();
1071
1072	if (alloc_buffer())
1073	DBUG_RETURN(`1`);
1074
1075	reset(TRUE);
1076
1077	DBUG_RETURN(`0`);
1078	}
1079
1080
1081	/*
1082	Check the possibility to read the access keys directly from the join buffer
1083	SYNOPSIS
1084	check_emb_key_usage()
1085
1086	DESCRIPTION
1087	The function checks some conditions at which the key values can be read
1088	directly from the join buffer. This is possible when the key values can be
1089	composed by concatenation of the record fields stored in the join buffer.
1090	Sometimes when the access key is multi-component the function has to re-order
1091	the fields written into the join buffer to make keys embedded. If key
1092	values for the key access are detected as embedded then 'use_emb_key'
1093	is set to TRUE.
1094
1095	EXAMPLE
1096	Let table t2 has an index defined on the columns a,b . Let's assume also
1097	that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all
1098	of the integer type. Then if the query
1099	SELECT COUNT() FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b*
1100	is executed with a join cache in such a way that t1 is the driving
1101	table then the key values to access table t2 can be read directly
1102	from the join buffer.
1103
1104	NOTES
1105	In some cases key values could be read directly from the join buffer but
1106	we still do not consider them embedded. In the future we'll expand the
1107	the class of keys which we identify as embedded.
1108
1109	NOTES
1110	The function returns FALSE if no key is used to join the records
1111	from join_tab.
1112
1113	RETURN VALUE
1114	TRUE key values will be considered as embedded,
1115	FALSE otherwise.
1116	*/
1117
1118	bool JOIN_CACHE::check_emb_key_usage()
1119	{
1120
1121	if (!is_key_access())
1122	return FALSE;
1123
1124	uint i;
1125	Item *item;
1126	KEY_PART_INFO *key_part;
1127	CACHE_FIELD *copy;
1128	CACHE_FIELD *copy_end;
1129	uint len= `0`;
1130	TABLE_REF *ref= &join_tab->ref;
1131	KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key);
1132
1133	/*
1134	If some of the key arguments are not from the local cache the key
1135	is not considered as embedded.
1136	TODO:
1137	Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0.
1138	*/
1139	if (external_key_arg_fields != `0`)
1140	return FALSE;
1141	/*
1142	If the number of the local key arguments is not equal to the number
1143	of key parts the key value cannot be read directly from the join buffer.
1144	*/
1145	if (local_key_arg_fields != ref->key_parts)
1146	return FALSE;
1147
1148	/*
1149	A key is not considered embedded if one of the following is true:
1150	- one of its key parts is not equal to a field
1151	- it is a partial key
1152	- definition of the argument field does not coincide with the
1153	definition of the corresponding key component
1154	- some of the key components are nullable
1155	*/
1156	for (i=`0`; i < ref->key_parts; i++)
1157	{
1158	item= ref->items[i]->real_item();
1159	if (item->type() != Item::FIELD_ITEM)
1160	return FALSE;
1161	key_part= keyinfo->key_part+i;
1162	if (key_part->key_part_flag & HA_PART_KEY_SEG)
1163	return FALSE;
1164	if (!key_part->field->eq_def(((Item_field *) item)->field))
1165	return FALSE;
1166	if (key_part->field->maybe_null())
1167	return FALSE;
1168	}
1169
1170	copy= field_descr+flag_fields;
1171	copy_end= copy+local_key_arg_fields;
1172	for ( ; copy < copy_end; copy++)
1173	{
1174	/*
1175	If some of the key arguments are of variable length the key
1176	is not considered as embedded.
1177	*/
1178	if (copy->type != `0`)
1179	return FALSE;
1180	/*
1181	If some of the key arguments are bit fields whose bits are partially
1182	stored with null bits the key is not considered as embedded.
1183	*/
1184	if (copy->field->type() == MYSQL_TYPE_BIT &&
1185	((Field_bit*) (copy->field))->bit_len)
1186	return FALSE;
1187	len+= copy->length;
1188	}
1189
1190	emb_key_length= len;
1191
1192	/*
1193	Make sure that key fields follow the order of the corresponding
1194	key components these fields are equal to. For this the descriptors
1195	of the fields that comprise the key might be re-ordered.
1196	*/
1197	for (i= `0`; i < ref->key_parts; i++)
1198	{
1199	uint j;
1200	Item *item= ref->items[i]->real_item();
1201	Field fld= ((Item_field ) item)->field;
1202	CACHE_FIELD *init_copy= field_descr+flag_fields+i;
1203	for (j= i, copy= init_copy; i < local_key_arg_fields; i++, copy++)
1204	{
1205	if (fld->eq(copy->field))
1206	{
1207	if (j != i)
1208	{
1209	CACHE_FIELD key_part_copy= *copy;
1210	copy = init_copy;
1211	*init_copy = key_part_copy;
1212	}
1213	break;
1214	}
1215	}
1216	}
1217
1218	return TRUE;
1219	}
1220
1221
1222	/*
1223	Write record fields and their required offsets into the join cache buffer
1224
1225	SYNOPSIS
1226	write_record_data()
1227	link a reference to the associated info in the previous cache
1228	is_full OUT true if it has been decided that no more records will be
1229	added to the join buffer
1230
1231	DESCRIPTION
1232	This function put into the cache buffer the following info that it reads
1233	from the join record buffers or computes somehow:
1234	(1) the length of all fields written for the record (optional)
1235	(2) an offset to the associated info in the previous cache (if there is any)
1236	determined by the link parameter
1237	(3) all flag fields of the tables whose data field are put into the cache:
1238	- match flag (optional),
1239	- null bitmaps for all tables,
1240	- null row flags for all tables
1241	(4) values of all data fields including
1242	- full images of those fixed legth data fields that cannot have
1243	trailing spaces
1244	- significant part of fixed length fields that can have trailing spaces
1245	with the prepanded length
1246	- data of non-blob variable length fields with the prepanded data length
1247	- blob data from blob fields with the prepanded data length
1248	(5) record offset values for the data fields that are referred to from
1249	other caches
1250
1251	The record is written at the current position stored in the field 'pos'.
1252	At the end of the function 'pos' points at the position right after the
1253	written record data.
1254	The function increments the number of records in the cache that is stored
1255	in the 'records' field by 1. The function also modifies the values of
1256	'curr_rec_pos' and 'last_rec_pos' to point to the written record.
1257	The 'end_pos' cursor is modified accordingly.
1258	The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data
1259	remains in the record buffers and not copied to the join buffer. It may
1260	happen only to the blob data from the last record added into the cache.
1261	If on_precond is attached to join_tab and it is not evaluated to TRUE
1262	then MATCH_IMPOSSIBLE is placed in the match flag field of the record
1263	written into the join buffer.
1264
1265	RETURN VALUE
1266	length of the written record data
1267	*/
1268
1269	uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
1270	{
1271	uint len;
1272	bool last_record;
1273	CACHE_FIELD *copy;
1274	CACHE_FIELD *copy_end;
1275	uchar *flags_pos;
1276	uchar *cp= pos;
1277	uchar *init_pos= cp;
1278	uchar *rec_len_ptr= `0`;
1279	uint key_extra= extra_key_length();
1280
1281	records++; / Increment the counter of records in the cache /
1282
1283	len= pack_length + key_extra;
1284
1285	/ Make an adjustment for the size of the auxiliary buffer if there is any /
1286	uint incr= aux_buffer_incr(records);
1287	size_t rem= rem_space();
1288	aux_buff_size+= len+incr < rem ? incr : rem;
1289
1290	/*
1291	For each blob to be put into cache save its length and a pointer
1292	to the value in the corresponding element of the blob_ptr array.
1293	Blobs with null values are skipped.
1294	Increment 'len' by the total length of all these blobs.
1295	*/
1296	if (blobs)
1297	{
1298	CACHE_FIELD **copy_ptr= blob_ptr;
1299	CACHE_FIELD **copy_ptr_end= copy_ptr+blobs;
1300	for ( ; copy_ptr < copy_ptr_end; copy_ptr++)
1301	{
1302	Field_blob blob_field= (Field_blob ) (*copy_ptr)->field;
1303	if (!blob_field->is_null())
1304	{
1305	uint blob_len= blob_field->get_length();
1306	(*copy_ptr)->blob_length= blob_len;
1307	len+= blob_len;
1308	(*copy_ptr)->str= blob_field->get_ptr();
1309	}
1310	}
1311	}
1312
1313	/*
1314	Check whether we won't be able to add any new record into the cache after
1315	this one because the cache will be full. Set last_record to TRUE if it's so.
1316	The assume that the cache will be full after the record has been written
1317	into it if either the remaining space of the cache is not big enough for the
1318	record's blob values or if there is a chance that not all non-blob fields
1319	of the next record can be placed there.
1320	This function is called only in the case when there is enough space left in
1321	the cache to store at least non-blob parts of the current record.
1322	*/
1323	last_record= (len+pack_length_with_blob_ptrs+key_extra) > rem_space();
1324
1325	/*
1326	Save the position for the length of the record in the cache if it's needed.
1327	The length of the record will be inserted here when all fields of the record
1328	are put into the cache.
1329	*/
1330	if (with_length)
1331	{
1332	rec_len_ptr= cp;
1333	DBUG_ASSERT(cp + size_of_rec_len <= buff + buff_size);
1334	cp+= size_of_rec_len;
1335	}
1336
1337	/*
1338	Put a reference to the fields of the record that are stored in the previous
1339	cache if there is any. This reference is passed by the 'link' parameter.
1340	*/
1341	if (prev_cache)
1342	{
1343	DBUG_ASSERT(cp + prev_cache->get_size_of_rec_offset() <= buff + buff_size);
1344	cp+= prev_cache->get_size_of_rec_offset();
1345	prev_cache->store_rec_ref(cp, link);
1346	}
1347
1348	curr_rec_pos= cp;
1349
1350	/ If the there is a match flag set its value to 0 /
1351	copy= field_descr;
1352	if (with_match_flag)
1353	*copy[`0`].str= `0`;
1354
1355	/ First put into the cache the values of all flag fields /
1356	copy_end= field_descr+flag_fields;
1357	flags_pos= cp;
1358	for ( ; copy < copy_end; copy++)
1359	{
1360	DBUG_ASSERT(cp + copy->length <= buff + buff_size);
1361	memcpy(cp, copy->str, copy->length);
1362	cp+= copy->length;
1363	}
1364
1365	/ Now put the values of the remaining fields as soon as they are not nulls /
1366	copy_end= field_descr+fields;
1367	for ( ; copy < copy_end; copy++)
1368	{
1369	Field *field= copy->field;
1370	if (field && field->maybe_null() && field->is_null())
1371	{
1372	if (copy->referenced_field_no)
1373	copy->offset= `0`;
1374	continue;
1375	}
1376	/ Save the offset of the field to put it later at the end of the record /
1377	if (copy->referenced_field_no)
1378	copy->offset= (uint)(cp-curr_rec_pos);
1379
1380	switch (copy->type) {
1381	case CACHE_BLOB:
1382	{
1383	Field_blob blob_field= (Field_blob ) copy->field;
1384	if (last_record)
1385	{
1386	last_rec_blob_data_is_in_rec_buff= `1`;
1387	/ Put down the length of the blob and the pointer to the data /
1388	DBUG_ASSERT(cp + copy->length + sizeof(char*) <= buff + buff_size);
1389	blob_field->get_image(cp, copy->length+sizeof(char*),
1390	blob_field->charset());
1391	cp+= copy->length+sizeof(char*);
1392	}
1393	else
1394	{
1395	/ First put down the length of the blob and then copy the data /
1396	blob_field->get_image(cp, copy->length,
1397	blob_field->charset());
1398	DBUG_ASSERT(cp + copy->length + copy->blob_length <= buff + buff_size);
1399	memcpy(cp+copy->length, copy->str, copy->blob_length);
1400	cp+= copy->length+copy->blob_length;
1401	}
1402	break;
1403	}
1404	case CACHE_VARSTR1:
1405	/ Copy the significant part of the short varstring field /
1406	len= (uint) copy->str[`0`] + `1`;
1407	DBUG_ASSERT(cp + len <= buff + buff_size);
1408	memcpy(cp, copy->str, len);
1409	cp+= len;
1410	break;
1411	case CACHE_VARSTR2:
1412	/ Copy the significant part of the long varstring field /
1413	len= uint2korr(copy->str) + `2`;
1414	DBUG_ASSERT(cp + len <= buff + buff_size);
1415	memcpy(cp, copy->str, len);
1416	cp+= len;
1417	break;
1418	case CACHE_STRIPPED:
1419	{
1420	/*
1421	Put down the field value stripping all trailing spaces off.
1422	After this insert the length of the written sequence of bytes.
1423	*/
1424	uchar str, end;
1425	for (str= copy->str, end= str+copy->length;
1426	end > str && end[-`1`] == `' '`;
1427	end--) ;
1428	len=(uint) (end-str);
1429	DBUG_ASSERT(cp + len + `2` <= buff + buff_size);
1430	int2store(cp, len);
1431	memcpy(cp+`2`, str, len);
1432	cp+= len+`2`;
1433	break;
1434	}
1435	case CACHE_ROWID:
1436	if (!copy->length)
1437	{
1438	/*
1439	This may happen only for ROWID fields of materialized
1440	derived tables and views.
1441	*/
1442	TABLE table= (TABLE ) copy->str;
1443	copy->str= table->file->ref;
1444	copy->length= table->file->ref_length;
1445	if (!copy->str)
1446	{
1447	/*
1448	If table is an empty inner table of an outer join and it is
1449	a materialized derived table then table->file->ref == NULL.
1450	*/
1451	cp+= copy->length;
1452	break;
1453	}
1454	}
1455	/ fall through /
1456	default:
1457	/ Copy the entire image of the field from the record buffer /
1458	DBUG_ASSERT(cp + copy->length <= buff + buff_size);
1459	if (copy->str)
1460	memcpy(cp, copy->str, copy->length);
1461	cp+= copy->length;
1462	}
1463	}
1464
1465	/ Add the offsets of the fields that are referenced from other caches /
1466	if (referenced_fields)
1467	{
1468	uint cnt= `0`;
1469	for (copy= field_descr+flag_fields; copy < copy_end ; copy++)
1470	{
1471	if (copy->referenced_field_no)
1472	{
1473	store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-`1`),
1474	copy->offset);
1475	cnt++;
1476	}
1477	}
1478	DBUG_ASSERT(cp + size_of_fld_ofs*cnt <= buff + buff_size);
1479	cp+= size_of_fld_ofs*cnt;
1480	}
1481
1482	if (rec_len_ptr)
1483	store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len));
1484	last_rec_pos= curr_rec_pos;
1485	end_pos= pos= cp;
1486	*is_full= last_record;
1487
1488	last_written_is_null_compl= `0`;
1489	if (!join_tab->first_unmatched && join_tab->on_precond)
1490	{
1491	join_tab->found= `0`;
1492	join_tab->not_null_compl= `1`;
1493	if (!join_tab->on_precond->val_int())
1494	{
1495	flags_pos[`0`]= MATCH_IMPOSSIBLE;
1496	last_written_is_null_compl= `1`;
1497	}
1498	}
1499
1500	return (uint) (cp-init_pos);
1501	}
1502
1503
1504	/*
1505	Reset the join buffer for reading/writing: default implementation
1506
1507	SYNOPSIS
1508	reset()
1509	for_writing if it's TRUE the function reset the buffer for writing
1510
1511	DESCRIPTION
1512	This default implementation of the virtual function reset() resets
1513	the join buffer for reading or writing.
1514	If the buffer is reset for reading only the 'pos' value is reset
1515	to point to the very beginning of the join buffer. If the buffer is
1516	reset for writing additionally:
1517	- the counter of the records in the buffer is set to 0,
1518	- the the value of 'last_rec_pos' gets pointing at the position just
1519	before the buffer,
1520	- 'end_pos' is set to point to the beginning of the join buffer,
1521	- the size of the auxiliary buffer is reset to 0,
1522	- the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0.
1523
1524	RETURN VALUE
1525	none
1526	*/
1527	void JOIN_CACHE::reset(bool for_writing)
1528	{
1529	pos= buff;
1530	curr_rec_link= `0`;
1531	if (for_writing)
1532	{
1533	records= `0`;
1534	last_rec_pos= buff;
1535	aux_buff_size= `0`;
1536	end_pos= pos;
1537	last_rec_blob_data_is_in_rec_buff= `0`;
1538	}
1539	}
1540
1541
1542	/*
1543	Add a record into the join buffer: the default implementation
1544
1545	SYNOPSIS
1546	put_record()
1547
1548	DESCRIPTION
1549	This default implementation of the virtual function put_record writes
1550	the next matching record into the join buffer.
1551	It also links the record having been written into the join buffer with
1552	the matched record in the previous cache if there is any.
1553	The implementation assumes that the function get_curr_link()
1554	will return exactly the pointer to this matched record.
1555
1556	RETURN VALUE
1557	TRUE if it has been decided that it should be the last record
1558	in the join buffer,
1559	FALSE otherwise
1560	*/
1561
1562	bool JOIN_CACHE::put_record()
1563	{
1564	bool is_full;
1565	uchar *link= `0`;
1566	if (prev_cache)
1567	link= prev_cache->get_curr_rec_link();
1568	write_record_data(link, &is_full);
1569	return is_full;
1570	}
1571
1572
1573	/*
1574	Read the next record from the join buffer: the default implementation
1575
1576	SYNOPSIS
1577	get_record()
1578
1579	DESCRIPTION
1580	This default implementation of the virtual function get_record
1581	reads fields of the next record from the join buffer of this cache.
1582	The function also reads all other fields associated with this record
1583	from the the join buffers of the previous caches. The fields are read
1584	into the corresponding record buffers.
1585	It is supposed that 'pos' points to the position in the buffer
1586	right after the previous record when the function is called.
1587	When the function returns the 'pos' values is updated to point
1588	to the position after the read record.
1589	The value of 'curr_rec_pos' is also updated by the function to
1590	point to the beginning of the first field of the record in the
1591	join buffer.
1592
1593	RETURN VALUE
1594	TRUE there are no more records to read from the join buffer
1595	FALSE otherwise
1596	*/
1597
1598	bool JOIN_CACHE::get_record()
1599	{
1600	bool res;
1601	uchar *prev_rec_ptr= `0`;
1602	if (with_length)
1603	pos+= size_of_rec_len;
1604	if (prev_cache)
1605	{
1606	pos+= prev_cache->get_size_of_rec_offset();
1607	prev_rec_ptr= prev_cache->get_rec_ref(pos);
1608	}
1609	curr_rec_pos= pos;
1610	if (!(res= read_all_record_fields() == NO_MORE_RECORDS_IN_BUFFER))
1611	{
1612	pos+= referenced_fields*size_of_fld_ofs;
1613	if (prev_cache)
1614	prev_cache->get_record_by_pos(prev_rec_ptr);
1615	}
1616	return res;
1617	}
1618
1619
1620	/*
1621	Read a positioned record from the join buffer: the default implementation
1622
1623	SYNOPSIS
1624	get_record_by_pos()
1625	rec_ptr position of the first field of the record in the join buffer
1626
1627	DESCRIPTION
1628	This default implementation of the virtual function get_record_pos
1629	reads the fields of the record positioned at 'rec_ptr' from the join buffer.
1630	The function also reads all other fields associated with this record
1631	from the the join buffers of the previous caches. The fields are read
1632	into the corresponding record buffers.
1633
1634	RETURN VALUE
1635	none
1636	*/
1637
1638	void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
1639	{
1640	uchar *save_pos= pos;
1641	pos= rec_ptr;
1642	read_all_record_fields();
1643	pos= save_pos;
1644	if (prev_cache)
1645	{
1646	uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1647	prev_cache->get_record_by_pos(prev_rec_ptr);
1648	}
1649	}
1650
1651
1652	/*
1653	Get the match flag from the referenced record: the default implementation
1654
1655	SYNOPSIS
1656	get_match_flag_by_pos()
1657	rec_ptr position of the first field of the record in the join buffer
1658
1659	DESCRIPTION
1660	This default implementation of the virtual function get_match_flag_by_pos
1661	get the match flag for the record pointed by the reference at the position
1662	rec_ptr. If the match flag is placed in one of the previous buffers the
1663	function first reaches the linked record fields in this buffer.
1664
1665	RETURN VALUE
1666	match flag for the record at the position rec_ptr
1667	*/
1668
1669	enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
1670	{
1671	Match_flag match_fl= MATCH_NOT_FOUND;
1672	if (with_match_flag)
1673	{
1674	match_fl= (enum Match_flag) rec_ptr[`0`];
1675	return match_fl;
1676	}
1677	if (prev_cache)
1678	{
1679	uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1680	return prev_cache->get_match_flag_by_pos(prev_rec_ptr);
1681	}
1682	DBUG_ASSERT(`0`);
1683	return match_fl;
1684	}
1685
1686
1687	/*
1688	Calculate the increment of the auxiliary buffer for a record write
1689
1690	SYNOPSIS
1691	aux_buffer_incr()
1692	recno the number of the record the increment to be calculated for
1693
1694	DESCRIPTION
1695	This function calls the aux_buffer_incr the method of the
1696	companion member join_tab_scan to calculate the growth of the
1697	auxiliary buffer when the recno-th record is added to the
1698	join_buffer of this cache.
1699
1700	RETURN VALUE
1701	the number of bytes in the increment
1702	*/
1703
1704	uint JOIN_CACHE::aux_buffer_incr(size_t recno)
1705	{
1706	return join_tab_scan->aux_buffer_incr(recno);
1707	}
1708
1709	/*
1710	Read all flag and data fields of a record from the join buffer
1711
1712	SYNOPSIS
1713	read_all_record_fields()
1714
1715	DESCRIPTION
1716	The function reads all flag and data fields of a record from the join
1717	buffer into the corresponding record buffers.
1718	The fields are read starting from the position 'pos' which is
1719	supposed to point to the beginning of the first record field.
1720	The function increments the value of 'pos' by the length of the
1721	read data.
1722
1723	RETURN VALUE
1724	(-1) if there is no more records in the join buffer
1725	length of the data read from the join buffer - otherwise
1726	*/
1727
1728	uint JOIN_CACHE::read_all_record_fields()
1729	{
1730	uchar *init_pos= pos;
1731
1732	if (pos > last_rec_pos \|\| !records)
1733	return NO_MORE_RECORDS_IN_BUFFER;
1734
1735	/ First match flag, read null bitmaps and null_row flag for each table /
1736	read_flag_fields();
1737
1738	/ Now read the remaining table fields if needed /
1739	CACHE_FIELD *copy= field_descr+flag_fields;
1740	CACHE_FIELD *copy_end= field_descr+fields;
1741	bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos);
1742	for ( ; copy < copy_end; copy++)
1743	read_record_field(copy, blob_in_rec_buff);
1744
1745	return (uint) (pos-init_pos);
1746	}
1747
1748
1749	/*
1750	Read all flag fields of a record from the join buffer
1751
1752	SYNOPSIS
1753	read_flag_fields()
1754
1755	DESCRIPTION
1756	The function reads all flag fields of a record from the join
1757	buffer into the corresponding record buffers.
1758	The fields are read starting from the position 'pos'.
1759	The function increments the value of 'pos' by the length of the
1760	read data.
1761
1762	RETURN VALUE
1763	length of the data read from the join buffer
1764	*/
1765
1766	uint JOIN_CACHE::read_flag_fields()
1767	{
1768	uchar *init_pos= pos;
1769	CACHE_FIELD *copy= field_descr;
1770	CACHE_FIELD *copy_end= copy+flag_fields;
1771	if (with_match_flag)
1772	{
1773	copy->str[`0`]= MY_TEST((Match_flag) pos[`0`] == MATCH_FOUND);
1774	pos+= copy->length;
1775	copy++;
1776	}
1777	for ( ; copy < copy_end; copy++)
1778	{
1779	memcpy(copy->str, pos, copy->length);
1780	pos+= copy->length;
1781	}
1782	return (uint)(pos-init_pos);
1783	}
1784
1785
1786	/*
1787	Read a data record field from the join buffer
1788
1789	SYNOPSIS
1790	read_record_field()
1791	copy the descriptor of the data field to be read
1792	blob_in_rec_buff indicates whether this is the field from the record
1793	whose blob data are in record buffers
1794
1795	DESCRIPTION
1796	The function reads the data field specified by the parameter copy
1797	from the join buffer into the corresponding record buffer.
1798	The field is read starting from the position 'pos'.
1799	The data of blob values is not copied from the join buffer.
1800	The function increments the value of 'pos' by the length of the
1801	read data.
1802
1803	RETURN VALUE
1804	length of the data read from the join buffer
1805	*/
1806
1807	uint JOIN_CACHE::read_record_field(CACHE_FIELD copy, bool* blob_in_rec_buff)
1808	{
1809	uint len;
1810	/ Do not copy the field if its value is null /
1811	if (copy->field && copy->field->maybe_null() && copy->field->is_null())
1812	return `0`;
1813	switch (copy->type) {
1814	case CACHE_BLOB:
1815	{
1816	Field_blob blob_field= (Field_blob ) copy->field;
1817	/*
1818	Copy the length and the pointer to data but not the blob data
1819	itself to the record buffer
1820	*/
1821	if (blob_in_rec_buff)
1822	{
1823	blob_field->set_image(pos, copy->length + sizeof(char*),
1824	blob_field->charset());
1825	len= copy->length + sizeof(char*);
1826	}
1827	else
1828	{
1829	blob_field->set_ptr(pos, pos+copy->length);
1830	len= copy->length + blob_field->get_length();
1831	}
1832	}
1833	break;
1834	case CACHE_VARSTR1:
1835	/ Copy the significant part of the short varstring field /
1836	len= (uint) pos[`0`] + `1`;
1837	memcpy(copy->str, pos, len);
1838	break;
1839	case CACHE_VARSTR2:
1840	/ Copy the significant part of the long varstring field /
1841	len= uint2korr(pos) + `2`;
1842	memcpy(copy->str, pos, len);
1843	break;
1844	case CACHE_STRIPPED:
1845	/ Pad the value by spaces that has been stripped off /
1846	len= uint2korr(pos);
1847	memcpy(copy->str, pos+`2`, len);
1848	memset(copy->str+len, `' '`, copy->length-len);
1849	len+= `2`;
1850	break;
1851	case CACHE_ROWID:
1852	if (!copy->str)
1853	{
1854	len= copy->length;
1855	break;
1856	}
1857	/ fall through /
1858	default:
1859	/ Copy the entire image of the field from the record buffer /
1860	len= copy->length;
1861	memcpy(copy->str, pos, len);
1862	}
1863	pos+= len;
1864	return len;
1865	}
1866
1867
1868	/*
1869	Read a referenced field from the join buffer
1870
1871	SYNOPSIS
1872	read_referenced_field()
1873	copy pointer to the descriptor of the referenced field
1874	rec_ptr pointer to the record that may contain this field
1875	len IN/OUT total length of the record fields
1876
1877	DESCRIPTION
1878	The function checks whether copy points to a data field descriptor
1879	for this cache object. If it does not then the function returns
1880	FALSE. Otherwise the function reads the field of the record in
1881	the join buffer pointed by 'rec_ptr' into the corresponding record
1882	buffer and returns TRUE.
1883	If the value of len is 0 then the function sets it to the total*
1884	length of the record fields including possible trailing offset
1885	values. Otherwise len is supposed to provide this value that*
1886	has been obtained earlier.
1887
1888	NOTE
1889	If the value of the referenced field is null then the offset
1890	for the value is set to 0. If the value of a field can be null
1891	then the value of flag_fields is always positive. So the offset
1892	for any non-null value cannot be 0 in this case.
1893
1894	RETURN VALUE
1895	TRUE 'copy' points to a data descriptor of this join cache
1896	FALSE otherwise
1897	*/
1898
1899	bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
1900	uchar *rec_ptr,
1901	uint *len)
1902	{
1903	uchar *ptr;
1904	uint offset;
1905	if (copy < field_descr \|\| copy >= field_descr+fields)
1906	return FALSE;
1907	if (!*len)
1908	{
1909	/ Get the total length of the record fields /
1910	uchar *len_ptr= rec_ptr;
1911	if (prev_cache)
1912	len_ptr-= prev_cache->get_size_of_rec_offset();
1913	*len= get_rec_length(len_ptr-size_of_rec_len);
1914	}
1915
1916	ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : `0`);
1917	offset= get_fld_offset(ptr+ *len -
1918	size_of_fld_ofs*
1919	(referenced_fields+`1`-copy->referenced_field_no));
1920	bool is_null= FALSE;
1921	Field *field= copy->field;
1922	if (offset == `0` && flag_fields)
1923	is_null= TRUE;
1924	if (is_null)
1925	{
1926	field->set_null();
1927	if (!field->real_maybe_null())
1928	field->table->null_row= `1`;
1929	}
1930	else
1931	{
1932	uchar *save_pos= pos;
1933	field->set_notnull();
1934	if (!field->real_maybe_null())
1935	field->table->null_row= `0`;
1936	pos= rec_ptr+offset;
1937	read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr));
1938	pos= save_pos;
1939	}
1940	return TRUE;
1941	}
1942
1943
1944	/*
1945	Skip record from join buffer if's already matched: default implementation
1946
1947	SYNOPSIS
1948	skip_if_matched()
1949
1950	DESCRIPTION
1951	This default implementation of the virtual function skip_if_matched
1952	skips the next record from the join buffer if its match flag is set to
1953	MATCH_FOUND.
1954	If the record is skipped the value of 'pos' is set to point to the position
1955	right after the record.
1956
1957	RETURN VALUE
1958	TRUE the match flag is set to MATCH_FOUND and the record has been skipped
1959	FALSE otherwise
1960	*/
1961
1962	bool JOIN_CACHE::skip_if_matched()
1963	{
1964	DBUG_ASSERT(with_length);
1965	uint offset= size_of_rec_len;
1966	if (prev_cache)
1967	offset+= prev_cache->get_size_of_rec_offset();
1968	/ Check whether the match flag is MATCH_FOUND /
1969	if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND)
1970	{
1971	pos+= size_of_rec_len + get_rec_length(pos);
1972	return TRUE;
1973	}
1974	return FALSE;
1975	}
1976
1977
1978	/*
1979	Skip record from join buffer if the match isn't needed: default implementation
1980
1981	SYNOPSIS
1982	skip_if_not_needed_match()
1983
1984	DESCRIPTION
1985	This default implementation of the virtual function skip_if_not_needed_match
1986	skips the next record from the join buffer if its match flag is not
1987	MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the
1988	first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE
1989	and join_tab is the first inner table of an outer join.
1990	If the record is skipped the value of 'pos' is set to point to the position
1991	right after the record.
1992
1993	RETURN VALUE
1994	TRUE the record has to be skipped
1995	FALSE otherwise
1996	*/
1997
1998	bool JOIN_CACHE::skip_if_not_needed_match()
1999	{
2000	DBUG_ASSERT(with_length);
2001	enum Match_flag match_fl;
2002	uint offset= size_of_rec_len;
2003	if (prev_cache)
2004	offset+= prev_cache->get_size_of_rec_offset();
2005
2006	if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND &&
2007	(join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) )
2008	{
2009	pos+= size_of_rec_len + get_rec_length(pos);
2010	return TRUE;
2011	}
2012	return FALSE;
2013	}
2014
2015
2016	/*
2017	Restore the fields of the last record from the join buffer
2018
2019	SYNOPSIS
2020	restore_last_record()
2021
2022	DESCRIPTION
2023	This function restore the values of the fields of the last record put
2024	into join buffer in record buffers. The values most probably have been
2025	overwritten by the field values from other records when they were read
2026	from the join buffer into the record buffer in order to check pushdown
2027	predicates.
2028
2029	RETURN
2030	none
2031	*/
2032
2033	void JOIN_CACHE::restore_last_record()
2034	{
2035	if (records)
2036	get_record_by_pos(last_rec_pos);
2037	}
2038
2039
2040	/*
2041	Join records from the join buffer with records from the next join table
2042
2043	SYNOPSIS
2044	join_records()
2045	skip_last do not find matches for the last record from the buffer
2046
2047	DESCRIPTION
2048	The functions extends all records from the join buffer by the matched
2049	records from join_tab. In the case of outer join operation it also
2050	adds null complementing extensions for the records from the join buffer
2051	that have no match.
2052	No extensions are generated for the last record from the buffer if
2053	skip_last is true.
2054
2055	NOTES
2056	The function must make sure that if linked join buffers are used then
2057	a join buffer cannot be refilled again until all extensions in the
2058	buffers chained to this one are generated.
2059	Currently an outer join operation with several inner tables always uses
2060	at least two linked buffers with the match join flags placed in the
2061	first buffer. Any record composed of rows of the inner tables that
2062	matches a record in this buffer must refer to the position of the
2063	corresponding match flag.
2064
2065	IMPLEMENTATION
2066	When generating extensions for outer tables of an outer join operation
2067	first we generate all extensions for those records from the join buffer
2068	that have matches, after which null complementing extension for all
2069	unmatched records from the join buffer are generated.
2070
2071	RETURN VALUE
2072	return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
2073	*/
2074
2075	enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last)
2076	{
2077	JOIN_TAB *tab;
2078	enum_nested_loop_state rc= NESTED_LOOP_OK;
2079	bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
2080	DBUG_ENTER("JOIN_CACHE::join_records");
2081
2082	if (outer_join_first_inner && !join_tab->first_unmatched)
2083	join_tab->not_null_compl= TRUE;
2084
2085	if (!join_tab->first_unmatched)
2086	{
2087	/ Find all records from join_tab that match records from join buffer /
2088	rc= join_matching_records(skip_last);
2089	if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2090	goto finish;
2091	if (outer_join_first_inner)
2092	{
2093	if (next_cache && join_tab != join_tab->last_inner)
2094	{
2095	/*
2096	Ensure that all matches for outer records from join buffer are to be
2097	found. Now we ensure that all full records are found for records from
2098	join buffer. Generally this is an overkill.
2099	TODO: Ensure that only matches of the inner table records have to be
2100	found for the records from join buffer.
2101	*/
2102	rc= next_cache->join_records(skip_last);
2103	if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2104	goto finish;
2105	}
2106	join_tab->not_null_compl= FALSE;
2107	/ Prepare for generation of null complementing extensions /
2108	for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
2109	tab->first_unmatched= join_tab->first_inner;
2110	}
2111	}
2112	if (join_tab->first_unmatched)
2113	{
2114	if (is_key_access())
2115	restore_last_record();
2116
2117	/*
2118	Generate all null complementing extensions for the records from
2119	join buffer that don't have any matching rows from the inner tables.
2120	*/
2121	reset(FALSE);
2122	rc= join_null_complements(skip_last);
2123	if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2124	goto finish;
2125	}
2126	if(next_cache)
2127	{
2128	/*
2129	When using linked caches we must ensure the records in the next caches
2130	that refer to the records in the join buffer are fully extended.
2131	Otherwise we could have references to the records that have been
2132	already erased from the join buffer and replaced for new records.
2133	*/
2134	rc= next_cache->join_records(skip_last);
2135	if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2136	goto finish;
2137	}
2138
2139	if (skip_last)
2140	{
2141	DBUG_ASSERT(!is_key_access());
2142	/*
2143	Restore the last record from the join buffer to generate
2144	all extentions for it.
2145	*/
2146	get_record();
2147	}
2148
2149	finish:
2150	if (outer_join_first_inner)
2151	{
2152	/*
2153	All null complemented rows have been already generated for all
2154	outer records from join buffer. Restore the state of the
2155	first_unmatched values to 0 to avoid another null complementing.
2156	*/
2157	for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
2158	tab->first_unmatched= `0`;
2159	}
2160	restore_last_record();
2161	reset(TRUE);
2162	DBUG_PRINT("exit", ("rc: %d", rc));
2163	DBUG_RETURN(rc);
2164	}
2165
2166
2167	/*
2168	Find matches from the next table for records from the join buffer
2169
2170	SYNOPSIS
2171	join_matching_records()
2172	skip_last do not look for matches for the last partial join record
2173
2174	DESCRIPTION
2175	The function retrieves rows of the join_tab table and checks whether they
2176	match partial join records from the join buffer. If a match is found
2177	the function will call the sub_select function trying to look for matches
2178	for the remaining join operations.
2179	This function currently is called only from the function join_records.
2180	If the value of skip_last is true the function writes the partial join
2181	record from the record buffer into the join buffer to save its value for
2182	the future processing in the caller function.
2183
2184	NOTES
2185	If employed by BNL or BNLH join algorithms the function performs a full
2186	scan of join_tab for each refill of the join buffer. If BKA or BKAH
2187	algorithms are used then the function iterates only over those records
2188	from join_tab that can be accessed by keys built over records in the join
2189	buffer. To apply a proper method of iteration the function just calls
2190	virtual iterator methods (open, next, close) of the member join_tab_scan.
2191	The member can be either of the JOIN_TAB_SCAN or JOIN_TAB_SCAN_MMR type.
2192	The class JOIN_TAB_SCAN provides the iterator methods for BNL/BNLH join
2193	algorithms. The class JOIN_TAB_SCAN_MRR provides the iterator methods
2194	for BKA/BKAH join algorithms.
2195	When the function looks for records from the join buffer that would
2196	match a record from join_tab it iterates either over all records in
2197	the buffer or only over selected records. If BNL join operation is
2198	performed all records are checked for the match. If BNLH or BKAH
2199	algorithm is employed to join join_tab then the function looks only
2200	through the records with the same join key as the record from join_tab.
2201	With the BKA join algorithm only one record from the join buffer is checked
2202	for a match for any record from join_tab. To iterate over the candidates
2203	for a match the virtual function get_next_candidate_for_match is used,
2204	while the virtual function prepare_look_for_matches is called to prepare
2205	for such iteration process.
2206
2207	NOTES
2208	The function produces all matching extensions for the records in the
2209	join buffer following the path of the employed blocked algorithm.
2210	When an outer join operation is performed all unmatched records from
2211	the join buffer must be extended by null values. The function
2212	'join_null_complements' serves this purpose.
2213
2214	RETURN VALUE
2215	return one of enum_nested_loop_state
2216	*/
2217
2218	enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last)
2219	{
2220	int error;
2221	enum_nested_loop_state rc= NESTED_LOOP_OK;
2222	join_tab->table->null_row= `0`;
2223	bool check_only_first_match= join_tab->check_only_first_match();
2224	bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
2225	DBUG_ENTER("JOIN_CACHE::join_matching_records");
2226
2227	/ Return at once if there are no records in the join buffer /
2228	if (!records)
2229	DBUG_RETURN(NESTED_LOOP_OK);
2230
2231	/*
2232	When joining we read records from the join buffer back into record buffers.
2233	If matches for the last partial join record are found through a call to
2234	the sub_select function then this partial join record must be saved in the
2235	join buffer in order to be restored just before the sub_select call.
2236	*/
2237	if (skip_last)
2238	put_record();
2239
2240	if (join_tab->use_quick == `2` && join_tab->select->quick)
2241	{
2242	/ A dynamic range access was used last. Clean up after it /
2243	delete join_tab->select->quick;
2244	join_tab->select->quick= `0`;
2245	}
2246
2247	if ((rc= join_tab_execution_startup(join_tab)) < `0`)
2248	goto finish2;
2249
2250	/ Prepare to retrieve all records of the joined table /
2251	if (unlikely((error= join_tab_scan->open())))
2252	{
2253	/*
2254	TODO: if we get here, we will assert in net_send_statement(). Add test
2255	coverage and fix.
2256	*/
2257	goto finish;
2258	}
2259
2260	while (!(error= join_tab_scan->next()))
2261	{
2262	if (unlikely(join->thd->check_killed()))
2263	{
2264	/ The user has aborted the execution of the query /
2265	join->thd->send_kill_message();
2266	rc= NESTED_LOOP_KILLED;
2267	goto finish;
2268	}
2269
2270	if (join_tab->keep_current_rowid)
2271	join_tab->table->file->position(join_tab->table->record[`0`]);
2272
2273	/ Prepare to read matching candidates from the join buffer /
2274	if (prepare_look_for_matches(skip_last))
2275	continue;
2276	join_tab->jbuf_tracker->r_scans++;
2277
2278	uchar *rec_ptr;
2279	/ Read each possible candidate from the buffer and look for matches /
2280	while ((rec_ptr= get_next_candidate_for_match()))
2281	{
2282	join_tab->jbuf_tracker->r_rows++;
2283	/*
2284	If only the first match is needed, and, it has been already found for
2285	the next record read from the join buffer, then the record is skipped.
2286	Also those records that must be null complemented are not considered
2287	as candidates for matches.
2288	*/
2289	if ((!check_only_first_match && !outer_join_first_inner) \|\|
2290	!skip_next_candidate_for_match(rec_ptr))
2291	{
2292	read_next_candidate_for_match(rec_ptr);
2293	rc= generate_full_extensions(rec_ptr);
2294	if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2295	goto finish;
2296	}
2297	}
2298	}
2299
2300	finish:
2301	if (error)
2302	rc= error < `0` ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
2303	finish2:
2304	join_tab_scan->close();
2305	DBUG_RETURN(rc);
2306	}
2307
2308
2309	/*
2310	Set match flag for a record in join buffer if it has not been set yet
2311
2312	SYNOPSIS
2313	set_match_flag_if_none()
2314	first_inner the join table to which this flag is attached to
2315	rec_ptr pointer to the record in the join buffer
2316
2317	DESCRIPTION
2318	If the records of the table are accumulated in a join buffer the function
2319	sets the match flag for the record in the buffer that is referred to by
2320	the record from this cache positioned at 'rec_ptr'.
2321	The function also sets the match flag 'found' of the table first inner
2322	if it has not been set before.
2323
2324	NOTES
2325	The function assumes that the match flag for any record in any cache
2326	is placed in the first byte occupied by the record fields.
2327
2328	RETURN VALUE
2329	TRUE the match flag is set by this call for the first time
2330	FALSE the match flag has been set before this call
2331	*/
2332
2333	bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner,
2334	uchar *rec_ptr)
2335	{
2336	if (!first_inner->cache)
2337	{
2338	/*
2339	Records of the first inner table to which the flag is attached to
2340	are not accumulated in a join buffer.
2341	*/
2342	if (first_inner->found)
2343	return FALSE;
2344	else
2345	{
2346	first_inner->found= `1`;
2347	return TRUE;
2348	}
2349	}
2350	JOIN_CACHE cache= this*;
2351	while (cache->join_tab != first_inner)
2352	{
2353	cache= cache->prev_cache;
2354	DBUG_ASSERT(cache);
2355	rec_ptr= cache->get_rec_ref(rec_ptr);
2356	}
2357	if ((Match_flag) rec_ptr[`0`] != MATCH_FOUND)
2358	{
2359	rec_ptr[`0`]= MATCH_FOUND;
2360	first_inner->found= `1`;
2361	return TRUE;
2362	}
2363	return FALSE;
2364	}
2365
2366
2367	/*
2368	Generate all full extensions for a partial join record in the buffer
2369
2370	SYNOPSIS
2371	generate_full_extensions()
2372	rec_ptr pointer to the record from join buffer to generate extensions
2373
2374	DESCRIPTION
2375	The function first checks whether the current record of 'join_tab' matches
2376	the partial join record from join buffer located at 'rec_ptr'. If it is the
2377	case the function calls the join_tab->next_select method to generate
2378	all full extension for this partial join match.
2379
2380	RETURN VALUE
2381	return one of enum_nested_loop_state.
2382	*/
2383
2384	enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr)
2385	{
2386	enum_nested_loop_state rc= NESTED_LOOP_OK;
2387	DBUG_ENTER("JOIN_CACHE::generate_full_extensions");
2388
2389	/*
2390	Check whether the extended partial join record meets
2391	the pushdown conditions.
2392	*/
2393	if (check_match(rec_ptr))
2394	{
2395	int res= `0`;
2396
2397	if (!join_tab->check_weed_out_table \|\|
2398	!(res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd)))
2399	{
2400	set_curr_rec_link(rec_ptr);
2401	rc= (join_tab->next_select)(join, join_tab+`1`, `0`);
2402	if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2403	{
2404	reset(TRUE);
2405	DBUG_RETURN(rc);
2406	}
2407	}
2408	if (res == -`1`)
2409	{
2410	rc= NESTED_LOOP_ERROR;
2411	DBUG_RETURN(rc);
2412	}
2413	}
2414	else if (unlikely(join->thd->is_error()))
2415	rc= NESTED_LOOP_ERROR;
2416	DBUG_RETURN(rc);
2417	}
2418
2419
2420	/*
2421	Check matching to a partial join record from the join buffer
2422
2423	SYNOPSIS
2424	check_match()
2425	rec_ptr pointer to the record from join buffer to check matching to
2426
2427	DESCRIPTION
2428	The function checks whether the current record of 'join_tab' matches
2429	the partial join record from join buffer located at 'rec_ptr'. If this is
2430	the case and 'join_tab' is the last inner table of a semi-join or an outer
2431	join the function turns on the match flag for the 'rec_ptr' record unless
2432	it has been already set.
2433
2434	NOTES
2435	Setting the match flag on can trigger re-evaluation of pushdown conditions
2436	for the record when join_tab is the last inner table of an outer join.
2437
2438	RETURN VALUE
2439	TRUE there is a match
2440	FALSE there is no match
2441	In this case the caller must also check thd->is_error() to see
2442	if there was a fatal error for the query.
2443	*/
2444
2445	inline bool JOIN_CACHE::check_match(uchar *rec_ptr)
2446	{
2447	/ Check whether pushdown conditions are satisfied /
2448	DBUG_ENTER("JOIN_CACHE:check_match");
2449
2450	if (join_tab->select && join_tab->select->skip_record(join->thd) <= `0`)
2451	DBUG_RETURN(FALSE);
2452
2453	join_tab->jbuf_tracker->r_rows_after_where++;
2454
2455	if (!join_tab->is_last_inner_table())
2456	DBUG_RETURN(TRUE);
2457
2458	/*
2459	This is the last inner table of an outer join,
2460	and maybe of other embedding outer joins, or
2461	this is the last inner table of a semi-join.
2462	*/
2463	JOIN_TAB *first_inner= join_tab->get_first_inner_table();
2464	do
2465	{
2466	set_match_flag_if_none(first_inner, rec_ptr);
2467	if (first_inner->check_only_first_match() &&
2468	!join_tab->first_inner)
2469	DBUG_RETURN(TRUE);
2470	/*
2471	This is the first match for the outer table row.
2472	The function set_match_flag_if_none has turned the flag
2473	first_inner->found on. The pushdown predicates for
2474	inner tables must be re-evaluated with this flag on.
2475	Note that, if first_inner is the first inner table
2476	of a semi-join, but is not an inner table of an outer join
2477	such that 'not exists' optimization can be applied to it,
2478	the re-evaluation of the pushdown predicates is not needed.
2479	*/
2480	for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++)
2481	{
2482	if (tab->select && tab->select->skip_record(join->thd) <= `0`)
2483	DBUG_RETURN(FALSE);
2484	}
2485	}
2486	while ((first_inner= first_inner->first_upper) &&
2487	first_inner->last_inner == join_tab);
2488	DBUG_RETURN(TRUE);
2489	}
2490
2491
2492	/*
2493	Add null complements for unmatched outer records from join buffer
2494
2495	SYNOPSIS
2496	join_null_complements()
2497	skip_last do not add null complements for the last record
2498
2499	DESCRIPTION
2500	This function is called only for inner tables of outer joins.
2501	The function retrieves all rows from the join buffer and adds null
2502	complements for those of them that do not have matches for outer
2503	table records.
2504	If the 'join_tab' is the last inner table of the embedding outer
2505	join and the null complemented record satisfies the outer join
2506	condition then the the corresponding match flag is turned on
2507	unless it has been set earlier. This setting may trigger
2508	re-evaluation of pushdown conditions for the record.
2509
2510	NOTES
2511	The same implementation of the virtual method join_null_complements
2512	is used for BNL/BNLH/BKA/BKA join algorthm.
2513
2514	RETURN VALUE
2515	return one of enum_nested_loop_state.
2516	*/
2517
2518	enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last)
2519	{
2520	ulonglong cnt;
2521	enum_nested_loop_state rc= NESTED_LOOP_OK;
2522	bool is_first_inner= join_tab == join_tab->first_unmatched;
2523	DBUG_ENTER("JOIN_CACHE::join_null_complements");
2524
2525	/ Return at once if there are no records in the join buffer /
2526	if (!records)
2527	DBUG_RETURN(NESTED_LOOP_OK);
2528
2529	cnt= records - (is_key_access() ? `0` : MY_TEST(skip_last));
2530
2531	/ This function may be called only for inner tables of outer joins /
2532	DBUG_ASSERT(join_tab->first_inner);
2533
2534	for ( ; cnt; cnt--)
2535	{
2536	if (unlikely(join->thd->check_killed()))
2537	{
2538	/ The user has aborted the execution of the query /
2539	join->thd->send_kill_message();
2540	rc= NESTED_LOOP_KILLED;
2541	goto finish;
2542	}
2543	/ Just skip the whole record if a match for it has been already found /
2544	if (!is_first_inner \|\| !skip_if_matched())
2545	{
2546	get_record();
2547	/ The outer row is complemented by nulls for each inner table /
2548	restore_record(join_tab->table, s->default_values);
2549	mark_as_null_row(join_tab->table);
2550	rc= generate_full_extensions(get_curr_rec());
2551	if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2552	goto finish;
2553	}
2554	}
2555
2556	finish:
2557	DBUG_RETURN(rc);
2558	}
2559
2560
2561	/*
2562	Save data on the join algorithm employed by the join cache
2563
2564	SYNOPSIS
2565	save_explain_data()
2566	str string to add the comment on the employed join algorithm to
2567
2568	DESCRIPTION
2569	This function puts info about the type of the used join buffer (flat or
2570	incremental) and on the type of the the employed join algorithm (BNL,
2571	BNLH, BKA or BKAH) to the data structure
2572
2573	RETURN VALUE
2574	0 ok
2575	1 error
2576	*/
2577
2578	bool JOIN_CACHE::save_explain_data(EXPLAIN_BKA_TYPE *explain)
2579	{
2580	explain->incremental= MY_TEST(prev_cache);
2581
2582	explain->join_buffer_size= get_join_buffer_size();
2583
2584	switch (get_join_alg()) {
2585	case BNL_JOIN_ALG:
2586	explain->join_alg= "BNL";
2587	break;
2588	case BNLH_JOIN_ALG:
2589	explain->join_alg= "BNLH";
2590	break;
2591	case BKA_JOIN_ALG:
2592	explain->join_alg= "BKA";
2593	break;
2594	case BKAH_JOIN_ALG:
2595	explain->join_alg= "BKAH";
2596	break;
2597	default:
2598	DBUG_ASSERT(`0`);
2599	}
2600	return `0`;
2601	}
2602
2603	/**
2604	get thread handle.
2605	*/
2606
2607	THD *JOIN_CACHE::thd()
2608	{
2609	return join->thd;
2610	}
2611
2612
2613	static bool add_mrr_explain_info(String str, uint mrr_mode, handler file)
2614	{
2615	char mrr_str_buf[`128`]={`0`};
2616	int len;
2617	len= file->multi_range_read_explain_info(mrr_mode, mrr_str_buf,
2618	sizeof(mrr_str_buf));
2619	if (len > `0`)
2620	{
2621	if (str->length())
2622	{
2623	if (str->append(STRING_WITH_LEN("; ")))
2624	return `1`;
2625	}
2626	if (str->append(mrr_str_buf, len))
2627	return `1`;
2628	}
2629	return `0`;
2630	}
2631
2632
2633	bool JOIN_CACHE_BKA::save_explain_data(EXPLAIN_BKA_TYPE *explain)
2634	{
2635	if (JOIN_CACHE::save_explain_data(explain))
2636	return `1`;
2637	return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file);
2638	}
2639
2640
2641	bool JOIN_CACHE_BKAH::save_explain_data(EXPLAIN_BKA_TYPE *explain)
2642	{
2643	if (JOIN_CACHE::save_explain_data(explain))
2644	return `1`;
2645	return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file);
2646	}
2647
2648
2649	/*
2650	Initialize a hashed join cache
2651
2652	SYNOPSIS
2653	init()
2654	for_explain join buffer is initialized for explain only
2655
2656	DESCRIPTION
2657	The function initializes the cache structure with a hash table in it.
2658	The hash table will be used to store key values for the records from
2659	the join buffer.
2660	The function allocates memory for the join buffer and for descriptors of
2661	the record fields stored in the buffer.
2662	The function also initializes a hash table for record keys within the join
2663	buffer space.
2664
2665	NOTES VALUE
2666	The function is supposed to be called by the init methods of the classes
2667	derived from JOIN_CACHE_HASHED.
2668
2669	RETURN VALUE
2670	0 initialization with buffer allocations has been succeeded
2671	1 otherwise
2672	*/
2673
2674	int JOIN_CACHE_HASHED::init(bool for_explain)
2675	{
2676	int rc= `0`;
2677	TABLE_REF *ref= &join_tab->ref;
2678
2679	DBUG_ENTER("JOIN_CACHE_HASHED::init");
2680
2681	hash_table= `0`;
2682	key_entries= `0`;
2683
2684	key_length= ref->key_length;
2685
2686	if ((rc= JOIN_CACHE::init(for_explain)) \|\| for_explain)
2687	DBUG_RETURN (rc);
2688
2689	if (!(key_buff= (uchar*) join->thd->alloc(key_length)))
2690	DBUG_RETURN(`1`);
2691
2692	/ Take into account a reference to the next record in the key chain /
2693	pack_length+= get_size_of_rec_offset();
2694	pack_length_with_blob_ptrs+= get_size_of_rec_offset();
2695
2696	ref_key_info= join_tab->get_keyinfo_by_key_no(join_tab->ref.key);
2697	ref_used_key_parts= join_tab->ref.key_parts;
2698
2699	hash_func= &JOIN_CACHE_HASHED::get_hash_idx_simple;
2700	hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_simple;
2701
2702	KEY_PART_INFO *key_part= ref_key_info->key_part;
2703	KEY_PART_INFO *key_part_end= key_part+ref_used_key_parts;
2704	for ( ; key_part < key_part_end; key_part++)
2705	{
2706	if (!key_part->field->eq_cmp_as_binary())
2707	{
2708	hash_func= &JOIN_CACHE_HASHED::get_hash_idx_complex;
2709	hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_complex;
2710	break;
2711	}
2712	}
2713
2714	init_hash_table();
2715
2716	rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
2717	(prev_cache ? prev_cache->get_size_of_rec_offset() : `0`);
2718
2719	data_fields_offset= `0`;
2720	if (use_emb_key)
2721	{
2722	CACHE_FIELD *copy= field_descr;
2723	CACHE_FIELD *copy_end= copy+flag_fields;
2724	for ( ; copy < copy_end; copy++)
2725	data_fields_offset+= copy->length;
2726	}
2727
2728	DBUG_RETURN(`0`);
2729	}
2730
2731
2732	/*
2733	Initialize the hash table of a hashed join cache
2734
2735	SYNOPSIS
2736	init_hash_table()
2737
2738	DESCRIPTION
2739	The function estimates the number of hash table entries in the hash
2740	table to be used and initializes this hash table within the join buffer
2741	space.
2742
2743	RETURN VALUE
2744	Currently the function always returns 0;
2745	*/
2746
2747	int JOIN_CACHE_HASHED::init_hash_table()
2748	{
2749	hash_table= `0`;
2750	key_entries= `0`;
2751
2752	/ Calculate the minimal possible value of size_of_key_ofs greater than 1 /
2753	uint max_size_of_key_ofs= MY_MAX(`2`, get_size_of_rec_offset());
2754	for (size_of_key_ofs= `2`;
2755	size_of_key_ofs <= max_size_of_key_ofs;
2756	size_of_key_ofs+= `2`)
2757	{
2758	key_entry_length= get_size_of_rec_offset() + // key chain header
2759	size_of_key_ofs + // reference to the next key
2760	(use_emb_key ? get_size_of_rec_offset() : key_length);
2761
2762	size_t space_per_rec= avg_record_length +
2763	avg_aux_buffer_incr +
2764	key_entry_length+size_of_key_ofs;
2765	size_t n= buff_size / space_per_rec;
2766
2767	/*
2768	TODO: Make a better estimate for this upper bound of
2769	the number of records in in the join buffer.
2770	*/
2771	size_t max_n= buff_size / (pack_length-length+
2772	key_entry_length+size_of_key_ofs);
2773
2774	hash_entries= (uint) (n / `0.7`);
2775	set_if_bigger(hash_entries, `1`);
2776
2777	if (offset_size((uint)(max_n*key_entry_length)) <=
2778	size_of_key_ofs)
2779	break;
2780	}
2781
2782	/ Initialize the hash table /
2783	hash_table= buff + (buff_size-hash_entries*size_of_key_ofs);
2784	cleanup_hash_table();
2785	curr_key_entry= hash_table;
2786
2787	return `0`;
2788	}
2789
2790
2791	/*
2792	Reallocate the join buffer of a hashed join cache
2793
2794	SYNOPSIS
2795	realloc_buffer()
2796
2797	DESCRITION
2798	The function reallocates the join buffer of the hashed join cache.
2799	After this it initializes a hash table within the buffer space and
2800	resets the join cache for writing.
2801
2802	NOTES
2803	The function assumes that buff_size contains the new value for the join
2804	buffer size.
2805
2806	RETURN VALUE
2807	0 if the buffer has been successfully reallocated
2808	1 otherwise
2809	*/
2810
2811	int JOIN_CACHE_HASHED::realloc_buffer()
2812	{
2813	int rc;
2814	free();
2815	rc= MY_TEST(!(buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC))));
2816	init_hash_table();
2817	reset(TRUE);
2818	return rc;
2819	}
2820
2821
2822	/*
2823	Get maximum size of the additional space per record used for record keys
2824
2825	SYNOPSYS
2826	get_max_key_addon_space_per_record()
2827
2828	DESCRIPTION
2829	The function returns the size of the space occupied by one key entry
2830	and one hash table entry.
2831
2832	RETURN VALUE
2833	maximum size of the additional space per record that is used to store
2834	record keys in the hash table
2835	*/
2836
2837	uint JOIN_CACHE_HASHED::get_max_key_addon_space_per_record()
2838	{
2839	ulong len;
2840	TABLE_REF *ref= &join_tab->ref;
2841	/*
2842	The total number of hash entries in the hash tables is bounded by
2843	ceiling(N/0.7) where N is the maximum number of records in the buffer.
2844	That's why the multiplier 2 is used in the formula below.
2845	*/
2846	len= (use_emb_key ? get_size_of_rec_offset() : ref->key_length) +
2847	size_of_rec_ofs + // size of the key chain header
2848	size_of_rec_ofs + // >= size of the reference to the next key
2849	`2`size_of_rec_ofs; // >= 2( size of hash table entry)
2850	return len;
2851	}
2852
2853
2854	/*
2855	Reset the buffer of a hashed join cache for reading/writing
2856
2857	SYNOPSIS
2858	reset()
2859	for_writing if it's TRUE the function reset the buffer for writing
2860
2861	DESCRIPTION
2862	This implementation of the virtual function reset() resets the join buffer
2863	of the JOIN_CACHE_HASHED class for reading or writing.
2864	Additionally to what the default implementation does this function
2865	cleans up the hash table allocated within the buffer.
2866
2867	RETURN VALUE
2868	none
2869	*/
2870
2871	void JOIN_CACHE_HASHED::reset(bool for_writing)
2872	{
2873	this->JOIN_CACHE::reset(for_writing);
2874	if (for_writing && hash_table)
2875	cleanup_hash_table();
2876	curr_key_entry= hash_table;
2877	}
2878
2879
2880	/*
2881	Add a record into the buffer of a hashed join cache
2882
2883	SYNOPSIS
2884	put_record()
2885
2886	DESCRIPTION
2887	This implementation of the virtual function put_record writes the next
2888	matching record into the join buffer of the JOIN_CACHE_HASHED class.
2889	Additionally to what the default implementation does this function
2890	performs the following.
2891	It extracts from the record the key value used in lookups for matching
2892	records and searches for this key in the hash tables from the join cache.
2893	If it finds the key in the hash table it joins the record to the chain
2894	of records with this key. If the key is not found in the hash table the
2895	key is placed into it and a chain containing only the newly added record
2896	is attached to the key entry. The key value is either placed in the hash
2897	element added for the key or, if the use_emb_key flag is set, remains in
2898	the record from the partial join.
2899	If the match flag field of a record contains MATCH_IMPOSSIBLE the key is
2900	not created for this record.
2901
2902	RETURN VALUE
2903	TRUE if it has been decided that it should be the last record
2904	in the join buffer,
2905	FALSE otherwise
2906	*/
2907
2908	bool JOIN_CACHE_HASHED::put_record()
2909	{
2910	bool is_full;
2911	uchar *key;
2912	uint key_len= key_length;
2913	uchar *key_ref_ptr;
2914	uchar *link= `0`;
2915	TABLE_REF *ref= &join_tab->ref;
2916	uchar *next_ref_ptr= pos;
2917
2918	pos+= get_size_of_rec_offset();
2919	/ Write the record into the join buffer /
2920	if (prev_cache)
2921	link= prev_cache->get_curr_rec_link();
2922	write_record_data(link, &is_full);
2923
2924	if (last_written_is_null_compl)
2925	return is_full;
2926
2927	if (use_emb_key)
2928	key= get_curr_emb_key();
2929	else
2930	{
2931	/ Build the key over the fields read into the record buffers /
2932	cp_buffer_from_ref(join->thd, join_tab->table, ref);
2933	key= ref->key_buff;
2934	}
2935
2936	/ Look for the key in the hash table /
2937	if (key_search(key, key_len, &key_ref_ptr))
2938	{
2939	uchar *last_next_ref_ptr;
2940	/*
2941	The key is found in the hash table.
2942	Add the record to the circular list of the records attached to this key.
2943	Below 'rec' is the record to be added into the record chain for the found
2944	key, 'key_ref' points to a flatten representation of the st_key_entry
2945	structure that contains the key and the head of the record chain.
2946	*/
2947	last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset());
2948	/ rec->next_rec= key_entry->last_rec->next_rec /
2949	memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset());
2950	/ key_entry->last_rec->next_rec= rec /
2951	store_next_rec_ref(last_next_ref_ptr, next_ref_ptr);
2952	/ key_entry->last_rec= rec /
2953	store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr);
2954	}
2955	else
2956	{
2957	/*
2958	The key is not found in the hash table.
2959	Put the key into the join buffer linking it with the keys for the
2960	corresponding hash entry. Create a circular list with one element
2961	referencing the record and attach the list to the key in the buffer.
2962	*/
2963	uchar *cp= last_key_entry;
2964	cp-= get_size_of_rec_offset()+get_size_of_key_offset();
2965	store_next_key_ref(key_ref_ptr, cp);
2966	store_null_key_ref(cp);
2967	store_next_rec_ref(next_ref_ptr, next_ref_ptr);
2968	store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr);
2969	if (use_emb_key)
2970	{
2971	cp-= get_size_of_rec_offset();
2972	store_emb_key_ref(cp, key);
2973	}
2974	else
2975	{
2976	cp-= key_len;
2977	memcpy(cp, key, key_len);
2978	}
2979	last_key_entry= cp;
2980	DBUG_ASSERT(last_key_entry >= end_pos);
2981	/ Increment the counter of key_entries in the hash table /
2982	key_entries++;
2983	}
2984	return is_full;
2985	}
2986
2987
2988	/*
2989	Read the next record from the buffer of a hashed join cache
2990
2991	SYNOPSIS
2992	get_record()
2993
2994	DESCRIPTION
2995	Additionally to what the default implementation of the virtual
2996	function get_record does this implementation skips the link element
2997	used to connect the records with the same key into a chain.
2998
2999	RETURN VALUE
3000	TRUE there are no more records to read from the join buffer
3001	FALSE otherwise
3002	*/
3003
3004	bool JOIN_CACHE_HASHED::get_record()
3005	{
3006	pos+= get_size_of_rec_offset();
3007	return this->JOIN_CACHE::get_record();
3008	}
3009
3010
3011	/*
3012	Skip record from a hashed join buffer if its match flag is set to MATCH_FOUND
3013
3014	SYNOPSIS
3015	skip_if_matched()
3016
3017	DESCRIPTION
3018	This implementation of the virtual function skip_if_matched does
3019	the same as the default implementation does, but it takes into account
3020	the link element used to connect the records with the same key into a chain.
3021
3022	RETURN VALUE
3023	TRUE the match flag is MATCH_FOUND and the record has been skipped
3024	FALSE otherwise
3025	*/
3026
3027	bool JOIN_CACHE_HASHED::skip_if_matched()
3028	{
3029	uchar *save_pos= pos;
3030	pos+= get_size_of_rec_offset();
3031	if (!this->JOIN_CACHE::skip_if_matched())
3032	{
3033	pos= save_pos;
3034	return FALSE;
3035	}
3036	return TRUE;
3037	}
3038
3039
3040	/*
3041	Skip record from a hashed join buffer if its match flag dictates to do so
3042
3043	SYNOPSIS
3044	skip_if_uneeded_match()
3045
3046	DESCRIPTION
3047	This implementation of the virtual function skip_if_not_needed_match does
3048	the same as the default implementation does, but it takes into account
3049	the link element used to connect the records with the same key into a chain.
3050
3051	RETURN VALUE
3052	TRUE the match flag dictates to skip the record
3053	FALSE the match flag is off
3054	*/
3055
3056	bool JOIN_CACHE_HASHED::skip_if_not_needed_match()
3057	{
3058	uchar *save_pos= pos;
3059	pos+= get_size_of_rec_offset();
3060	if (!this->JOIN_CACHE::skip_if_not_needed_match())
3061	{
3062	pos= save_pos;
3063	return FALSE;
3064	}
3065	return TRUE;
3066	}
3067
3068
3069	/*
3070	Search for a key in the hash table of the join buffer
3071
3072	SYNOPSIS
3073	key_search()
3074	key pointer to the key value
3075	key_len key value length
3076	key_ref_ptr OUT position of the reference to the next key from
3077	the hash element for the found key , or
3078	a position where the reference to the the hash
3079	element for the key is to be added in the
3080	case when the key has not been found
3081
3082	DESCRIPTION
3083	The function looks for a key in the hash table of the join buffer.
3084	If the key is found the functionreturns the position of the reference
3085	to the next key from to the hash element for the given key.
3086	Otherwise the function returns the position where the reference to the
3087	newly created hash element for the given key is to be added.
3088
3089	RETURN VALUE
3090	TRUE the key is found in the hash table
3091	FALSE otherwise
3092	*/
3093
3094	bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len,
3095	uchar **key_ref_ptr)
3096	{
3097	bool is_found= FALSE;
3098	uint idx= (this->*hash_func)(key, key_length);
3099	uchar ref_ptr= hash_table+size_of_key_ofsidx;
3100	while (!is_null_key_ref(ref_ptr))
3101	{
3102	uchar *next_key;
3103	ref_ptr= get_next_key_ref(ref_ptr);
3104	next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) :
3105	ref_ptr-key_length;
3106
3107	if ((this->*hash_cmp_func)(next_key, key, key_len))
3108	{
3109	is_found= TRUE;
3110	break;
3111	}
3112	}
3113	*key_ref_ptr= ref_ptr;
3114	return is_found;
3115	}
3116
3117
3118	/*
3119	Hash function that considers a key in the hash table as byte array
3120
3121	SYNOPSIS
3122	get_hash_idx_simple()
3123	key pointer to the key value
3124	key_len key value length
3125
3126	DESCRIPTION
3127	The function calculates an index of the hash entry in the hash table
3128	of the join buffer for the given key. It considers the key just as
3129	a sequence of bytes of the length key_len.
3130
3131	RETURN VALUE
3132	the calculated index of the hash entry for the given key
3133	*/
3134
3135	inline
3136	uint JOIN_CACHE_HASHED::get_hash_idx_simple(uchar* key, uint key_len)
3137	{
3138	ulong nr= `1`;
3139	ulong nr2= `4`;
3140	uchar *pos= key;
3141	uchar *end= key+key_len;
3142	for (; pos < end ; pos++)
3143	{
3144	nr^= (ulong) ((((uint) nr & `63`)+nr2)((uint) pos))+ (nr << `8`);
3145	nr2+= `3`;
3146	}
3147	return nr % hash_entries;
3148	}
3149
3150
3151	/*
3152	Hash function that takes into account collations of the components of the key
3153
3154	SYNOPSIS
3155	get_hash_idx_complex()
3156	key pointer to the key value
3157	key_len key value length
3158
3159	DESCRIPTION
3160	The function calculates an index of the hash entry in the hash table
3161	of the join buffer for the given key. It takes into account that the
3162	components of the key may be of a varchar type with different collations.
3163	The function guarantees that the same hash value for any two equal
3164	keys that may differ as byte sequences.
3165	The function takes the info about the components of the key, their
3166	types and used collations from the class member ref_key_info containing
3167	a pointer to the descriptor of the index that can be used for the join
3168	operation.
3169
3170	RETURN VALUE
3171	the calculated index of the hash entry for the given key
3172	*/
3173
3174	inline
3175	uint JOIN_CACHE_HASHED::get_hash_idx_complex(uchar *key, uint key_len)
3176	{
3177	return
3178	(uint) (key_hashnr(ref_key_info, ref_used_key_parts, key) % hash_entries);
3179	}
3180
3181
3182	/*
3183	Compare two key entries in the hash table as sequence of bytes
3184
3185	SYNOPSIS
3186	equal_keys_simple()
3187	key1 pointer to the first key entry
3188	key2 pointer to the second key entry
3189	key_len the length of the key values
3190
3191	DESCRIPTION
3192	The function compares two key entries in the hash table key1 and key2
3193	as two sequences bytes of the length key_len
3194
3195	RETURN VALUE
3196	TRUE key1 coincides with key2
3197	FALSE otherwise
3198	*/
3199
3200	inline
3201	bool JOIN_CACHE_HASHED::equal_keys_simple(uchar key1, uchar key2,
3202	uint key_len)
3203	{
3204	return memcmp(key1, key2, key_len) == `0`;
3205	}
3206
3207
3208	/*
3209	Compare two key entries taking into account the used collation
3210
3211	SYNOPSIS
3212	equal_keys_complex()
3213	key1 pointer to the first key entry
3214	key2 pointer to the second key entry
3215	key_len the length of the key values
3216
3217	DESCRIPTION
3218	The function checks whether two key entries in the hash table
3219	key1 and key2 are equal as, possibly, compound keys of a certain
3220	structure whose components may be of a varchar type and may
3221	employ different collations.
3222	The descriptor of the key structure is taken from the class
3223	member ref_key_info.
3224
3225	RETURN VALUE
3226	TRUE key1 is equal tokey2
3227	FALSE otherwise
3228	*/
3229
3230	inline
3231	bool JOIN_CACHE_HASHED::equal_keys_complex(uchar key1, uchar key2,
3232	uint key_len)
3233	{
3234	return key_buf_cmp(ref_key_info, ref_used_key_parts, key1, key2) == `0`;
3235	}
3236
3237
3238	/*
3239	Clean up the hash table of the join buffer
3240
3241	SYNOPSIS
3242	cleanup_hash_table()
3243	key pointer to the key value
3244	key_len key value length
3245
3246	DESCRIPTION
3247	The function cleans up the hash table in the join buffer removing all
3248	hash elements from the table.
3249
3250	RETURN VALUE
3251	none
3252	*/
3253
3254	void JOIN_CACHE_HASHED:: cleanup_hash_table()
3255	{
3256	last_key_entry= hash_table;
3257	bzero(hash_table, (buff+buff_size)-hash_table);
3258	key_entries= `0`;
3259	}
3260
3261
3262	/*
3263	Check whether all records in a key chain have their match flags set on
3264
3265	SYNOPSIS
3266	check_all_match_flags_for_key()
3267	key_chain_ptr
3268
3269	DESCRIPTION
3270	This function retrieves records in the given circular chain and checks
3271	whether their match flags are set on. The parameter key_chain_ptr shall
3272	point to the position in the join buffer storing the reference to the
3273	last element of this chain.
3274
3275	RETURN VALUE
3276	TRUE if each retrieved record has its match flag set to MATCH_FOUND
3277	FALSE otherwise
3278	*/
3279
3280	bool JOIN_CACHE_HASHED::check_all_match_flags_for_key(uchar *key_chain_ptr)
3281	{
3282	uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
3283	uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3284	do
3285	{
3286	next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
3287	uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
3288	if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND)
3289	return FALSE;
3290	}
3291	while (next_rec_ref_ptr != last_rec_ref_ptr);
3292	return TRUE;
3293	}
3294
3295
3296	/*
3297	Get the next key built for the records from the buffer of a hashed join cache
3298
3299	SYNOPSIS
3300	get_next_key()
3301	key pointer to the buffer where the key value is to be placed
3302
3303	DESCRIPTION
3304	The function reads the next key value stored in the hash table of the
3305	join buffer. Depending on the value of the use_emb_key flag of the
3306	join cache the value is read either from the table itself or from
3307	the record field where it occurs.
3308
3309	RETURN VALUE
3310	length of the key value - if the starting value of 'cur_key_entry' refers
3311	to the position after that referred by the the value of 'last_key_entry',
3312	0 - otherwise.
3313	*/
3314
3315	uint JOIN_CACHE_HASHED::get_next_key(uchar ** key)
3316	{
3317	if (curr_key_entry == last_key_entry)
3318	return `0`;
3319
3320	curr_key_entry-= key_entry_length;
3321
3322	*key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry;
3323
3324	DBUG_ASSERT(key >= buff && key < hash_table);
3325
3326	return key_length;
3327	}
3328
3329
3330	/*
3331	Initiate an iteration process over records in the joined table
3332
3333	SYNOPSIS
3334	open()
3335
3336	DESCRIPTION
3337	The function initiates the process of iteration over records from the
3338	joined table recurrently performed by the BNL/BKLH join algorithm.
3339
3340	RETURN VALUE
3341	0 the initiation is a success
3342	error code otherwise
3343	*/
3344
3345	int JOIN_TAB_SCAN::open()
3346	{
3347	save_or_restore_used_tabs(join_tab, FALSE);
3348	is_first_record= TRUE;
3349	join_tab->tracker->r_scans++;
3350	return join_init_read_record(join_tab);
3351	}
3352
3353
3354	/*
3355	Read the next record that can match while scanning the joined table
3356
3357	SYNOPSIS
3358	next()
3359
3360	DESCRIPTION
3361	The function reads the next record from the joined table that can
3362	match some records in the buffer of the join cache 'cache'. To do
3363	this the function calls the function that scans table records and
3364	looks for the next one that meets the condition pushed to the
3365	joined table join_tab.
3366
3367	NOTES
3368	The function catches the signal that kills the query.
3369
3370	RETURN VALUE
3371	0 the next record exists and has been successfully read
3372	error code otherwise
3373	*/
3374
3375	int JOIN_TAB_SCAN::next()
3376	{
3377	int err= `0`;
3378	int skip_rc;
3379	READ_RECORD *info= &join_tab->read_record;
3380	SQL_SELECT *select= join_tab->cache_select;
3381	THD *thd= join->thd;
3382
3383	if (is_first_record)
3384	is_first_record= FALSE;
3385	else
3386	err= info->read_record();
3387
3388	if (!err)
3389	{
3390	join_tab->tracker->r_rows++;
3391	}
3392
3393	while (!err && select && (skip_rc= select->skip_record(thd)) <= `0`)
3394	{
3395	if (unlikely(thd->check_killed()) \|\| skip_rc < `0`)
3396	return `1`;
3397	/*
3398	Move to the next record if the last retrieved record does not
3399	meet the condition pushed to the table join_tab.
3400	*/
3401	err= info->read_record();
3402	if (!err)
3403	{
3404	join_tab->tracker->r_rows++;
3405	}
3406	}
3407
3408	if (!err)
3409	join_tab->tracker->r_rows_after_where++;
3410	return err;
3411	}
3412
3413
3414	/*
3415	Walk back in join order from join_tab until we encounter a join tab with
3416	tab->cache!=NULL, and save/restore tab->table->status along the way.
3417
3418	@param save TRUE save
3419	FALSE restore
3420	*/
3421
3422	static void save_or_restore_used_tabs(JOIN_TAB join_tab, bool* save)
3423	{
3424	JOIN_TAB *first= join_tab->bush_root_tab?
3425	join_tab->bush_root_tab->bush_children->start :
3426	join_tab->join->join_tab + join_tab->join->const_tables;
3427
3428	for (JOIN_TAB *tab= join_tab-`1`; tab != first && !tab->cache; tab--)
3429	{
3430	if (tab->bush_children)
3431	{
3432	for (JOIN_TAB *child= tab->bush_children->start;
3433	child != tab->bush_children->end;
3434	child++)
3435	{
3436	if (save)
3437	child->table->status= child->status;
3438	else
3439	{
3440	tab->status= tab->table->status;
3441	tab->table->status= `0`;
3442	}
3443	}
3444	}
3445
3446	if (save)
3447	tab->table->status= tab->status;
3448	else
3449	{
3450	tab->status= tab->table->status;
3451	tab->table->status= `0`;
3452	}
3453	}
3454	}
3455
3456
3457	/*
3458	Perform finalizing actions for a scan over the table records
3459
3460	SYNOPSIS
3461	close()
3462
3463	DESCRIPTION
3464	The function performs the necessary restoring actions after
3465	the table scan over the joined table has been finished.
3466
3467	RETURN VALUE
3468	none
3469	*/
3470
3471	void JOIN_TAB_SCAN::close()
3472	{
3473	save_or_restore_used_tabs(join_tab, TRUE);
3474	}
3475
3476
3477	/*
3478	Prepare to iterate over the BNL join cache buffer to look for matches
3479
3480	SYNOPSIS
3481	prepare_look_for_matches()
3482	skip_last <-> ignore the last record in the buffer
3483
3484	DESCRIPTION
3485	The function prepares the join cache for an iteration over the
3486	records in the join buffer. The iteration is performed when looking
3487	for matches for the record from the joined table join_tab that
3488	has been placed into the record buffer of the joined table.
3489	If the value of the parameter skip_last is TRUE then the last
3490	record from the join buffer is ignored.
3491	The function initializes the counter of the records that have been
3492	not iterated over yet.
3493
3494	RETURN VALUE
3495	TRUE there are no records in the buffer to iterate over
3496	FALSE otherwise
3497	*/
3498
3499	bool JOIN_CACHE_BNL::prepare_look_for_matches(bool skip_last)
3500	{
3501	if (!records)
3502	return TRUE;
3503	reset(FALSE);
3504	rem_records= (uint)records - MY_TEST(skip_last);
3505	return rem_records == `0`;
3506	}
3507
3508
3509	/*
3510	Get next record from the BNL join cache buffer when looking for matches
3511
3512	SYNOPSIS
3513	get_next_candidate_for_match
3514
3515	DESCRIPTION
3516	This method is used for iterations over the records from the join
3517	cache buffer when looking for matches for records from join_tab.
3518	The methods performs the necessary preparations to read the next record
3519	from the join buffer into the record buffer by the method
3520	read_next_candidate_for_match, or, to skip the next record from the join
3521	buffer by the method skip_recurrent_candidate_for_match.
3522	This implementation of the virtual method get_next_candidate_for_match
3523	just decrements the counter of the records that are to be iterated over
3524	and returns the current value of the cursor 'pos' as the position of
3525	the record to be processed.
3526
3527	RETURN VALUE
3528	pointer to the position right after the prefix of the current record
3529	in the join buffer if the there is another record to iterate over,
3530	0 - otherwise.
3531	*/
3532
3533	uchar *JOIN_CACHE_BNL::get_next_candidate_for_match()
3534	{
3535	if (!rem_records)
3536	return `0`;
3537	rem_records--;
3538	return pos+base_prefix_length;
3539	}
3540
3541
3542	/*
3543	Check whether the matching record from the BNL cache is to be skipped
3544
3545	SYNOPSIS
3546	skip_next_candidate_for_match
3547	rec_ptr pointer to the position in the join buffer right after the prefix
3548	of the current record
3549
3550	DESCRIPTION
3551	This implementation of the virtual function just calls the
3552	method skip_if_not_needed_match to check whether the record referenced by
3553	ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the
3554	first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and
3555	join_tab is the first inner table of an outer join.
3556	If so, the function just skips this record setting the value of the
3557	cursor 'pos' to the position right after it.
3558
3559	RETURN VALUE
3560	TRUE the record referenced by rec_ptr has been skipped
3561	FALSE otherwise
3562	*/
3563
3564	bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr)
3565	{
3566	pos= rec_ptr-base_prefix_length;
3567	return skip_if_not_needed_match();
3568	}
3569
3570
3571	/*
3572	Read next record from the BNL join cache buffer when looking for matches
3573
3574	SYNOPSIS
3575	read_next_candidate_for_match
3576	rec_ptr pointer to the position in the join buffer right after the prefix
3577	the current record.
3578
3579	DESCRIPTION
3580	This implementation of the virtual method read_next_candidate_for_match
3581	calls the method get_record to read the record referenced by rec_ptr from
3582	the join buffer into the record buffer. If this record refers to the
3583	fields in the other join buffers the call of get_record ensures that
3584	these fields are read into the corresponding record buffers as well.
3585	This function is supposed to be called after a successful call of
3586	the method get_next_candidate_for_match.
3587
3588	RETURN VALUE
3589	none
3590	*/
3591
3592	void JOIN_CACHE_BNL::read_next_candidate_for_match(uchar *rec_ptr)
3593	{
3594	pos= rec_ptr-base_prefix_length;
3595	get_record();
3596	}
3597
3598
3599	/*
3600	Initialize the BNL join cache
3601
3602	SYNOPSIS
3603	init
3604	for_explain join buffer is initialized for explain only
3605
3606	DESCRIPTION
3607	The function initializes the cache structure. It is supposed to be called
3608	right after a constructor for the JOIN_CACHE_BNL.
3609
3610	NOTES
3611	The function first constructs a companion object of the type JOIN_TAB_SCAN,
3612	then it calls the init method of the parent class.
3613
3614	RETURN VALUE
3615	0 initialization with buffer allocations has been succeeded
3616	1 otherwise
3617	*/
3618
3619	int JOIN_CACHE_BNL::init(bool for_explain)
3620	{
3621	DBUG_ENTER("JOIN_CACHE_BNL::init");
3622
3623	if (!(join_tab_scan= new JOIN_TAB_SCAN (join, join_tab)))
3624	DBUG_RETURN(`1`);
3625
3626	DBUG_RETURN(JOIN_CACHE::init(for_explain));
3627	}
3628
3629
3630	/*
3631	Get the chain of records from buffer matching the current candidate for join
3632
3633	SYNOPSIS
3634	get_matching_chain_by_join_key()
3635
3636	DESCRIPTION
3637	This function first build a join key for the record of join_tab that
3638	currently is in the join buffer for this table. Then it looks for
3639	the key entry with this key in the hash table of the join cache.
3640	If such a key entry is found the function returns the pointer to
3641	the head of the chain of records in the join_buffer that match this
3642	key.
3643
3644	RETURN VALUE
3645	The pointer to the corresponding circular list of records if
3646	the key entry with the join key is found, 0 - otherwise.
3647	*/
3648
3649	uchar *JOIN_CACHE_BNLH::get_matching_chain_by_join_key()
3650	{
3651	uchar *key_ref_ptr;
3652	TABLE *table= join_tab->table;
3653	TABLE_REF *ref= &join_tab->ref;
3654	KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key);
3655	/ Build the join key value out of the record in the record buffer /
3656	key_copy(key_buff, table->record[`0`], keyinfo, key_length, TRUE);
3657	/ Look for this key in the join buffer /
3658	if (!key_search(key_buff, key_length, &key_ref_ptr))
3659	return `0`;
3660	return key_ref_ptr+get_size_of_key_offset();
3661	}
3662
3663
3664	/*
3665	Prepare to iterate over the BNLH join cache buffer to look for matches
3666
3667	SYNOPSIS
3668	prepare_look_for_matches()
3669	skip_last <-> ignore the last record in the buffer
3670
3671	DESCRIPTION
3672	The function prepares the join cache for an iteration over the
3673	records in the join buffer. The iteration is performed when looking
3674	for matches for the record from the joined table join_tab that
3675	has been placed into the record buffer of the joined table.
3676	If the value of the parameter skip_last is TRUE then the last
3677	record from the join buffer is ignored.
3678	The function builds the hashed key from the join fields of join_tab
3679	and uses this key to look in the hash table of the join cache for
3680	the chain of matching records in in the join buffer. If it finds
3681	such a chain it sets the member last_rec_ref_ptr to point to the
3682	last link of the chain while setting the member next_rec_ref_po 0.
3683
3684	RETURN VALUE
3685	TRUE there are no matching records in the buffer to iterate over
3686	FALSE otherwise
3687	*/
3688
3689	bool JOIN_CACHE_BNLH::prepare_look_for_matches(bool skip_last)
3690	{
3691	uchar *curr_matching_chain;
3692	last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= `0`;
3693	if (!(curr_matching_chain= get_matching_chain_by_join_key()))
3694	return `1`;
3695	last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain);
3696	return `0`;
3697	}
3698
3699
3700	/*
3701	Get next record from the BNLH join cache buffer when looking for matches
3702
3703	SYNOPSIS
3704	get_next_candidate_for_match
3705
3706	DESCRIPTION
3707	This method is used for iterations over the records from the join
3708	cache buffer when looking for matches for records from join_tab.
3709	The methods performs the necessary preparations to read the next record
3710	from the join buffer into the record buffer by the method
3711	read_next_candidate_for_match, or, to skip the next record from the join
3712	buffer by the method skip_next_candidate_for_match.
3713	This implementation of the virtual method moves to the next record
3714	in the chain of all records from the join buffer that are to be
3715	equi-joined with the current record from join_tab.
3716
3717	RETURN VALUE
3718	pointer to the beginning of the record fields in the join buffer
3719	if the there is another record to iterate over, 0 - otherwise.
3720	*/
3721
3722	uchar *JOIN_CACHE_BNLH::get_next_candidate_for_match()
3723	{
3724	if (next_matching_rec_ref_ptr == last_matching_rec_ref_ptr)
3725	return `0`;
3726	next_matching_rec_ref_ptr= get_next_rec_ref(next_matching_rec_ref_ptr ?
3727	next_matching_rec_ref_ptr :
3728	last_matching_rec_ref_ptr);
3729	return next_matching_rec_ref_ptr+rec_fields_offset;
3730	}
3731
3732
3733	/*
3734	Check whether the matching record from the BNLH cache is to be skipped
3735
3736	SYNOPSIS
3737	skip_next_candidate_for_match
3738	rec_ptr pointer to the position in the join buffer right after
3739	the previous record
3740
3741	DESCRIPTION
3742	This implementation of the virtual function just calls the
3743	method get_match_flag_by_pos to check whether the record referenced
3744	by ref_ptr has its match flag set to MATCH_FOUND.
3745
3746	RETURN VALUE
3747	TRUE the record referenced by rec_ptr has its match flag set to
3748	MATCH_FOUND
3749	FALSE otherwise
3750	*/
3751
3752	bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr)
3753	{
3754	return join_tab->check_only_first_match() &&
3755	(get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
3756	}
3757
3758
3759	/*
3760	Read next record from the BNLH join cache buffer when looking for matches
3761
3762	SYNOPSIS
3763	read_next_candidate_for_match
3764	rec_ptr pointer to the position in the join buffer right after
3765	the previous record
3766
3767	DESCRIPTION
3768	This implementation of the virtual method read_next_candidate_for_match
3769	calls the method get_record_by_pos to read the record referenced by rec_ptr
3770	from the join buffer into the record buffer. If this record refers to
3771	fields in the other join buffers the call of get_record_by_po ensures that
3772	these fields are read into the corresponding record buffers as well.
3773	This function is supposed to be called after a successful call of
3774	the method get_next_candidate_for_match.
3775
3776	RETURN VALUE
3777	none
3778	*/
3779
3780	void JOIN_CACHE_BNLH::read_next_candidate_for_match(uchar *rec_ptr)
3781	{
3782	get_record_by_pos(rec_ptr);
3783	}
3784
3785
3786	/*
3787	Initialize the BNLH join cache
3788
3789	SYNOPSIS
3790	init
3791	for_explain join buffer is initialized for explain only
3792
3793	DESCRIPTION
3794	The function initializes the cache structure. It is supposed to be called
3795	right after a constructor for the JOIN_CACHE_BNLH.
3796
3797	NOTES
3798	The function first constructs a companion object of the type JOIN_TAB_SCAN,
3799	then it calls the init method of the parent class.
3800
3801	RETURN VALUE
3802	0 initialization with buffer allocations has been succeeded
3803	1 otherwise
3804	*/
3805
3806	int JOIN_CACHE_BNLH::init(bool for_explain)
3807	{
3808	DBUG_ENTER("JOIN_CACHE_BNLH::init");
3809
3810	if (!(join_tab_scan= new JOIN_TAB_SCAN (join, join_tab)))
3811	DBUG_RETURN(`1`);
3812
3813	DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain));
3814	}
3815
3816
3817	/*
3818	Calculate the increment of the MRR buffer for a record write
3819
3820	SYNOPSIS
3821	aux_buffer_incr()
3822
3823	DESCRIPTION
3824	This implementation of the virtual function aux_buffer_incr determines
3825	for how much the size of the MRR buffer should be increased when another
3826	record is added to the cache.
3827
3828	RETURN VALUE
3829	the increment of the size of the MRR buffer for the next record
3830	*/
3831
3832	uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(size_t recno)
3833	{
3834	uint incr= `0`;
3835	TABLE_REF *ref= &join_tab->ref;
3836	TABLE *tab= join_tab->table;
3837	ha_rows rec_per_key=
3838	(ha_rows) tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-`1`);
3839	set_if_bigger(rec_per_key, `1`);
3840	if (recno == `1`)
3841	incr= ref->key_length + tab->file->ref_length;
3842	incr+= (uint)(tab->file->stats.mrr_length_per_rec * rec_per_key);
3843	return incr;
3844	}
3845
3846
3847	/*
3848	Initiate iteration over records returned by MRR for the current join buffer
3849
3850	SYNOPSIS
3851	open()
3852
3853	DESCRIPTION
3854	The function initiates the process of iteration over the records from
3855	join_tab returned by the MRR interface functions for records from
3856	the join buffer. Such an iteration is performed by the BKA/BKAH join
3857	algorithm for each new refill of the join buffer.
3858	The function calls the MRR handler function multi_range_read_init to
3859	initiate this process.
3860
3861	RETURN VALUE
3862	0 the initiation is a success
3863	error code otherwise
3864	*/
3865
3866	int JOIN_TAB_SCAN_MRR::open()
3867	{
3868	handler *file= join_tab->table->file;
3869
3870	join_tab->table->null_row= `0`;
3871
3872
3873	/ Dynamic range access is never used with BKA /
3874	DBUG_ASSERT(join_tab->use_quick != `2`);
3875
3876	join_tab->tracker->r_scans++;
3877	save_or_restore_used_tabs(join_tab, FALSE);
3878
3879	init_mrr_buff();
3880
3881	/*
3882	Prepare to iterate over keys from the join buffer and to get
3883	matching candidates obtained with MMR handler functions.
3884	*/
3885	if (!file->inited)
3886	file->ha_index_init(join_tab->ref.key, `1`);
3887	ranges= cache->get_number_of_ranges_for_mrr();
3888	if (!join_tab->cache_idx_cond)
3889	range_seq_funcs.skip_index_tuple= `0`;
3890	return file->multi_range_read_init(&range_seq_funcs, (void*) cache,
3891	ranges, mrr_mode, &mrr_buff);
3892	}
3893
3894
3895	/*
3896	Read the next record returned by MRR for the current join buffer
3897
3898	SYNOPSIS
3899	next()
3900
3901	DESCRIPTION
3902	The function reads the next record from the joined table join_tab
3903	returned by the MRR handler function multi_range_read_next for
3904	the current refill of the join buffer. The record is read into
3905	the record buffer used for join_tab records in join operations.
3906
3907	RETURN VALUE
3908	0 the next record exists and has been successfully read
3909	error code otherwise
3910	*/
3911
3912	int JOIN_TAB_SCAN_MRR::next()
3913	{
3914	char *ptr= (char* **) cache->get_curr_association_ptr();
3915
3916	DBUG_ASSERT(sizeof(range_id_t) == sizeof(*ptr));
3917	int rc= join_tab->table->file->multi_range_read_next((range_id_t*)ptr) ? -`1` : `0`;
3918	if (!rc)
3919	{
3920	join_tab->tracker->r_rows++;
3921	join_tab->tracker->r_rows_after_where++;
3922	/*
3923	If a record in in an incremental cache contains no fields then the
3924	association for the last record in cache will be equal to cache->end_pos
3925	*/
3926	/*
3927	psergey: this makes no sense where HA_MRR_NO_ASSOC is used.
3928	DBUG_ASSERT(cache->buff <= (uchar ) (ptr) &&
3929	(uchar ) (ptr) <= cache->end_pos);
3930	*/
3931	}
3932	return rc;
3933	}
3934
3935
3936	static
3937	void bka_range_seq_key_info(void init_params, uint length,
3938	key_part_map *map)
3939	{
3940	TABLE_REF ref= &(((JOIN_CACHE)init_params)->join_tab->ref);
3941	*length= ref->key_length;
3942	*map= (key_part_map(`1`) << ref->key_parts) - `1`;
3943	}
3944
3945
3946	/*
3947	Initialize retrieval of range sequence for BKA join algorithm
3948
3949	SYNOPSIS
3950	bka_range_seq_init()
3951	init_params pointer to the BKA join cache object
3952	n_ranges the number of ranges obtained
3953	flags combination of MRR flags
3954
3955	DESCRIPTION
3956	The function interprets init_param as a pointer to a JOIN_CACHE_BKA
3957	object. The function prepares for an iteration over the join keys
3958	built for all records from the cache join buffer.
3959
3960	NOTE
3961	This function are used only as a callback function.
3962
3963	RETURN VALUE
3964	init_param value that is to be used as a parameter of bka_range_seq_next()
3965	*/
3966
3967	static
3968	range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags)
3969	{
3970	DBUG_ENTER("bka_range_seq_init");
3971	JOIN_CACHE_BKA cache= (JOIN_CACHE_BKA ) init_param;
3972	cache->reset(`0`);
3973	DBUG_RETURN((range_seq_t) init_param);
3974	}
3975
3976
3977	/*
3978	Get the next range/key over records from the join buffer used by a BKA cache
3979
3980	SYNOPSIS
3981	bka_range_seq_next()
3982	seq the value returned by bka_range_seq_init
3983	range OUT reference to the next range
3984
3985	DESCRIPTION
3986	The function interprets seq as a pointer to a JOIN_CACHE_BKA
3987	object. The function returns a pointer to the range descriptor
3988	for the key built over the next record from the join buffer.
3989
3990	NOTE
3991	This function are used only as a callback function.
3992
3993	RETURN VALUE
3994	FALSE ok, the range structure filled with info about the next range/key
3995	TRUE no more ranges
3996	*/
3997
3998	static
3999	bool bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
4000	{
4001	DBUG_ENTER("bka_range_seq_next");
4002	JOIN_CACHE_BKA cache= (JOIN_CACHE_BKA ) rseq;
4003	TABLE_REF *ref= &cache->join_tab->ref;
4004	key_range *start_key= &range->start_key;
4005	if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
4006	{
4007	start_key->keypart_map= (`1` << ref->key_parts) - `1`;
4008	start_key->flag= HA_READ_KEY_EXACT;
4009	range->end_key = *start_key;
4010	range->end_key.flag= HA_READ_AFTER_KEY;
4011	range->ptr= (char *) cache->get_curr_rec();
4012	range->range_flag= EQ_RANGE;
4013	DBUG_RETURN(`0`);
4014	}
4015	DBUG_RETURN(`1`);
4016	}
4017
4018
4019	/*
4020	Check whether range_info orders to skip the next record from BKA buffer
4021
4022	SYNOPSIS
4023	bka_range_seq_skip_record()
4024	seq value returned by bka_range_seq_init()
4025	range_info information about the next range
4026	rowid [NOT USED] rowid of the record to be checked
4027
4028
4029	DESCRIPTION
4030	The function interprets seq as a pointer to a JOIN_CACHE_BKA object.
4031	The function returns TRUE if the record with this range_info
4032	is to be filtered out from the stream of records returned by
4033	multi_range_read_next().
4034
4035	NOTE
4036	This function are used only as a callback function.
4037
4038	RETURN VALUE
4039	1 record with this range_info is to be filtered out from the stream
4040	of records returned by multi_range_read_next()
4041	0 the record is to be left in the stream
4042	*/
4043
4044	static
4045	bool bka_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid)
4046	{
4047	DBUG_ENTER("bka_range_seq_skip_record");
4048	JOIN_CACHE_BKA cache= (JOIN_CACHE_BKA ) rseq;
4049	bool res= cache->get_match_flag_by_pos((uchar *) range_info) ==
4050	JOIN_CACHE::MATCH_FOUND;
4051	DBUG_RETURN(res);
4052	}
4053
4054
4055	/*
4056	Check if the record combination from BKA cache matches the index condition
4057
4058	SYNOPSIS
4059	bka_skip_index_tuple()
4060	rseq value returned by bka_range_seq_init()
4061	range_info record chain for the next range/key returned by MRR
4062
4063	DESCRIPTION
4064	This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method,
4065	see comments there.
4066
4067	NOTE
4068	This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
4069
4070	RETURN VALUE
4071	0 The record combination satisfies the index condition
4072	1 Otherwise
4073	*/
4074
4075	static
4076	bool bka_skip_index_tuple(range_seq_t rseq, range_id_t range_info)
4077	{
4078	DBUG_ENTER("bka_skip_index_tuple");
4079	JOIN_CACHE_BKA cache= (JOIN_CACHE_BKA ) rseq;
4080	THD *thd= cache->thd();
4081	bool res;
4082	status_var_increment(thd->status_var.ha_icp_attempts);
4083	if (!(res= cache->skip_index_tuple(range_info)))
4084	status_var_increment(thd->status_var.ha_icp_match);
4085	DBUG_RETURN(res);
4086	}
4087
4088
4089	/*
4090	Prepare to read the record from BKA cache matching the current joined record
4091
4092	SYNOPSIS
4093	prepare_look_for_matches()
4094	skip_last <-> ignore the last record in the buffer (always unused here)
4095
4096	DESCRIPTION
4097	The function prepares to iterate over records in the join cache buffer
4098	matching the record loaded into the record buffer for join_tab when
4099	performing join operation by BKA join algorithm. With BKA algorithms the
4100	record loaded into the record buffer for join_tab always has a direct
4101	reference to the matching records from the join buffer. When the regular
4102	BKA join algorithm is employed the record from join_tab can refer to
4103	only one such record.
4104	The function sets the counter of the remaining records from the cache
4105	buffer that would match the current join_tab record to 1.
4106
4107	RETURN VALUE
4108	TRUE there are no records in the buffer to iterate over
4109	FALSE otherwise
4110	*/
4111
4112	bool JOIN_CACHE_BKA::prepare_look_for_matches(bool skip_last)
4113	{
4114	if (!records)
4115	return TRUE;
4116	rem_records= `1`;
4117	return FALSE;
4118	}
4119
4120
4121	/*
4122	Get the record from the BKA cache matching the current joined record
4123
4124	SYNOPSIS
4125	get_next_candidate_for_match
4126
4127	DESCRIPTION
4128	This method is used for iterations over the records from the join
4129	cache buffer when looking for matches for records from join_tab.
4130	The method performs the necessary preparations to read the next record
4131	from the join buffer into the record buffer by the method
4132	read_next_candidate_for_match, or, to skip the next record from the join
4133	buffer by the method skip_if_not_needed_match.
4134	This implementation of the virtual method get_next_candidate_for_match
4135	just decrements the counter of the records that are to be iterated over
4136	and returns the value of curr_association as a reference to the position
4137	of the beginning of the record fields in the buffer.
4138
4139	RETURN VALUE
4140	pointer to the start of the record fields in the join buffer
4141	if the there is another record to iterate over, 0 - otherwise.
4142	*/
4143
4144	uchar *JOIN_CACHE_BKA::get_next_candidate_for_match()
4145	{
4146	if (!rem_records)
4147	return `0`;
4148	rem_records--;
4149	return curr_association;
4150	}
4151
4152
4153	/*
4154	Check whether the matching record from the BKA cache is to be skipped
4155
4156	SYNOPSIS
4157	skip_next_candidate_for_match
4158	rec_ptr pointer to the position in the join buffer right after
4159	the previous record
4160
4161	DESCRIPTION
4162	This implementation of the virtual function just calls the
4163	method get_match_flag_by_pos to check whether the record referenced
4164	by ref_ptr has its match flag set to MATCH_FOUND.
4165
4166	RETURN VALUE
4167	TRUE the record referenced by rec_ptr has its match flag set to
4168	MATCH_FOUND
4169	FALSE otherwise
4170	*/
4171
4172	bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr)
4173	{
4174	return join_tab->check_only_first_match() &&
4175	(get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
4176	}
4177
4178
4179	/*
4180	Read the next record from the BKA join cache buffer when looking for matches
4181
4182	SYNOPSIS
4183	read_next_candidate_for_match
4184	rec_ptr pointer to the position in the join buffer right after
4185	the previous record
4186
4187	DESCRIPTION
4188	This implementation of the virtual method read_next_candidate_for_match
4189	calls the method get_record_by_pos to read the record referenced by rec_ptr
4190	from the join buffer into the record buffer. If this record refers to
4191	fields in the other join buffers the call of get_record_by_po ensures that
4192	these fields are read into the corresponding record buffers as well.
4193	This function is supposed to be called after a successful call of
4194	the method get_next_candidate_for_match.
4195
4196	RETURN VALUE
4197	none
4198	*/
4199
4200	void JOIN_CACHE_BKA::read_next_candidate_for_match(uchar *rec_ptr)
4201	{
4202	get_record_by_pos(rec_ptr);
4203	}
4204
4205
4206	/*
4207	Initialize the BKA join cache
4208
4209	SYNOPSIS
4210	init
4211	for_explain join buffer is initialized for explain only
4212
4213
4214	DESCRIPTION
4215	The function initializes the cache structure. It is supposed to be called
4216	right after a constructor for the JOIN_CACHE_BKA.
4217
4218	NOTES
4219	The function first constructs a companion object of the type
4220	JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
4221
4222	RETURN VALUE
4223	0 initialization with buffer allocations has been succeeded
4224	1 otherwise
4225	*/
4226
4227	int JOIN_CACHE_BKA::init(bool for_explain)
4228	{
4229	int res;
4230	bool check_only_first_match= join_tab->check_only_first_match();
4231
4232	RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info,
4233	bka_range_seq_init,
4234	bka_range_seq_next,
4235	check_only_first_match ? bka_range_seq_skip_record : `0`,
4236	bka_skip_index_tuple };
4237
4238	DBUG_ENTER("JOIN_CACHE_BKA::init");
4239
4240	JOIN_TAB_SCAN_MRR *jsm;
4241	if (!(join_tab_scan= jsm= new JOIN_TAB_SCAN_MRR (join, join_tab,
4242	mrr_mode, rs_funcs)))
4243	DBUG_RETURN(`1`);
4244
4245	if ((res= JOIN_CACHE::init(for_explain)))
4246	DBUG_RETURN(res);
4247
4248	if (use_emb_key)
4249	jsm->mrr_mode \|= HA_MRR_MATERIALIZED_KEYS;
4250
4251	DBUG_RETURN(`0`);
4252	}
4253
4254
4255	/*
4256	Get the key built over the next record from BKA join buffer
4257
4258	SYNOPSIS
4259	get_next_key()
4260	key pointer to the buffer where the key value is to be placed
4261
4262	DESCRIPTION
4263	The function reads key fields from the current record in the join buffer.
4264	and builds the key value out of these fields that will be used to access
4265	the 'join_tab' table. Some of key fields may belong to previous caches.
4266	They are accessed via record references to the record parts stored in the
4267	previous join buffers. The other key fields always are placed right after
4268	the flag fields of the record.
4269	If the key is embedded, which means that its value can be read directly
4270	from the join buffer, then key is set to the beginning of the key in*
4271	this buffer. Otherwise the key is built in the join_tab->ref->key_buff.
4272	The function returns the length of the key if it succeeds ro read it.
4273	If is assumed that the functions starts reading at the position of
4274	the record length which is provided for each records in a BKA cache.
4275	After the key is built the 'pos' value points to the first position after
4276	the current record.
4277	The function just skips the records with MATCH_IMPOSSIBLE in the
4278	match flag field if there is any.
4279	The function returns 0 if the initial position is after the beginning
4280	of the record fields for last record from the join buffer.
4281
4282	RETURN VALUE
4283	length of the key value - if the starting value of 'pos' points to
4284	the position before the fields for the last record,
4285	0 - otherwise.
4286	*/
4287
4288	uint JOIN_CACHE_BKA::get_next_key(uchar ** key)
4289	{
4290	uint len;
4291	uint32 rec_len;
4292	uchar *init_pos;
4293	JOIN_CACHE *cache;
4294
4295	start:
4296
4297	/ Any record in a BKA cache is prepended with its length /
4298	DBUG_ASSERT(with_length);
4299
4300	if ((pos+size_of_rec_len) > last_rec_pos \|\| !records)
4301	return `0`;
4302
4303	/ Read the length of the record /
4304	rec_len= get_rec_length(pos);
4305	pos+= size_of_rec_len;
4306	init_pos= pos;
4307
4308	/ Read a reference to the previous cache if any /
4309	if (prev_cache)
4310	pos+= prev_cache->get_size_of_rec_offset();
4311
4312	curr_rec_pos= pos;
4313
4314	/ Read all flag fields of the record /
4315	read_flag_fields();
4316
4317	if (with_match_flag &&
4318	(Match_flag) curr_rec_pos[`0`] == MATCH_IMPOSSIBLE )
4319	{
4320	pos= init_pos+rec_len;
4321	goto start;
4322	}
4323
4324	if (use_emb_key)
4325	{
4326	/ An embedded key is taken directly from the join buffer /
4327	*key= pos;
4328	len= emb_key_length;
4329	}
4330	else
4331	{
4332	/ Read key arguments from previous caches if there are any such fields /
4333	if (external_key_arg_fields)
4334	{
4335	uchar *rec_ptr= curr_rec_pos;
4336	uint key_arg_count= external_key_arg_fields;
4337	CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count;
4338	for (cache= prev_cache; key_arg_count; cache= cache->prev_cache)
4339	{
4340	uint len= `0`;
4341	DBUG_ASSERT(cache);
4342	rec_ptr= cache->get_rec_ref(rec_ptr);
4343	while (!cache->referenced_fields)
4344	{
4345	cache= cache->prev_cache;
4346	DBUG_ASSERT(cache);
4347	rec_ptr= cache->get_rec_ref(rec_ptr);
4348	}
4349	while (key_arg_count &&
4350	cache->read_referenced_field(*copy_ptr, rec_ptr, &len))
4351	{
4352	copy_ptr++;
4353	--key_arg_count;
4354	}
4355	}
4356	}
4357
4358	/*
4359	Read the other key arguments from the current record. The fields for
4360	these arguments are always first in the sequence of the record's fields.
4361	*/
4362	CACHE_FIELD *copy= field_descr+flag_fields;
4363	CACHE_FIELD *copy_end= copy+local_key_arg_fields;
4364	bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos);
4365	for ( ; copy < copy_end; copy++)
4366	read_record_field(copy, blob_in_rec_buff);
4367
4368	/ Build the key over the fields read into the record buffers /
4369	TABLE_REF *ref= &join_tab->ref;
4370	cp_buffer_from_ref(join->thd, join_tab->table, ref);
4371	*key= ref->key_buff;
4372	len= ref->key_length;
4373	}
4374
4375	pos= init_pos+rec_len;
4376
4377	return len;
4378	}
4379
4380
4381	/*
4382	Check the index condition of the joined table for a record from the BKA cache
4383
4384	SYNOPSIS
4385	skip_index_tuple()
4386	range_info pointer to the record returned by MRR
4387
4388	DESCRIPTION
4389	This function is invoked from MRR implementation to check if an index
4390	tuple matches the index condition. It is used in the case where the index
4391	condition actually depends on both columns of the used index and columns
4392	from previous tables.
4393
4394	NOTES
4395	Accessing columns of the previous tables requires special handling with
4396	BKA. The idea of BKA is to collect record combinations in a buffer and
4397	then do a batch of ref access lookups, i.e. by the time we're doing a
4398	lookup its previous-records-combination is not in prev_table->record[0]
4399	but somewhere in the join buffer.
4400	We need to get it from there back into prev_table(s)->record[0] before we
4401	can evaluate the index condition, and that's why we need this function
4402	instead of regular IndexConditionPushdown.
4403
4404	NOTES
4405	Possible optimization:
4406	Before we unpack the record from a previous table
4407	check if this table is used in the condition.
4408	If so then unpack the record otherwise skip the unpacking.
4409	This should be done by a special virtual method
4410	get_partial_record_by_pos().
4411
4412	RETURN VALUE
4413	1 the record combination does not satisfies the index condition
4414	0 otherwise
4415	*/
4416
4417	bool JOIN_CACHE_BKA::skip_index_tuple(range_id_t range_info)
4418	{
4419	DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple");
4420	get_record_by_pos((uchar*)range_info);
4421	DBUG_RETURN(!join_tab->cache_idx_cond->val_int());
4422	}
4423
4424
4425
4426	/*
4427	Initialize retrieval of range sequence for the BKAH join algorithm
4428
4429	SYNOPSIS
4430	bkah_range_seq_init()
4431	init_params pointer to the BKAH join cache object
4432	n_ranges the number of ranges obtained
4433	flags combination of MRR flags
4434
4435	DESCRIPTION
4436	The function interprets init_param as a pointer to a JOIN_CACHE_BKAH
4437	object. The function prepares for an iteration over distinct join keys
4438	built over the records from the cache join buffer.
4439
4440	NOTE
4441	This function are used only as a callback function.
4442
4443	RETURN VALUE
4444	init_param value that is to be used as a parameter of
4445	bkah_range_seq_next()
4446	*/
4447
4448	static
4449	range_seq_t bkah_range_seq_init(void *init_param, uint n_ranges, uint flags)
4450	{
4451	DBUG_ENTER("bkah_range_seq_init");
4452	JOIN_CACHE_BKAH cache= (JOIN_CACHE_BKAH ) init_param;
4453	cache->reset(`0`);
4454	DBUG_RETURN((range_seq_t) init_param);
4455	}
4456
4457
4458	/*
4459	Get the next range/key over records from the join buffer of a BKAH cache
4460
4461	SYNOPSIS
4462	bkah_range_seq_next()
4463	seq value returned by bkah_range_seq_init()
4464	range OUT reference to the next range
4465
4466	DESCRIPTION
4467	The function interprets seq as a pointer to a JOIN_CACHE_BKAH
4468	object. The function returns a pointer to the range descriptor
4469	for the next unique key built over records from the join buffer.
4470
4471	NOTE
4472	This function are used only as a callback function.
4473
4474	RETURN VALUE
4475	FALSE ok, the range structure filled with info about the next range/key
4476	TRUE no more ranges
4477	*/
4478
4479	static
4480	bool bkah_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
4481	{
4482	DBUG_ENTER("bkah_range_seq_next");
4483	JOIN_CACHE_BKAH cache= (JOIN_CACHE_BKAH ) rseq;
4484	TABLE_REF *ref= &cache->join_tab->ref;
4485	key_range *start_key= &range->start_key;
4486	if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
4487	{
4488	start_key->keypart_map= (`1` << ref->key_parts) - `1`;
4489	start_key->flag= HA_READ_KEY_EXACT;
4490	range->end_key = *start_key;
4491	range->end_key.flag= HA_READ_AFTER_KEY;
4492	range->ptr= (char *) cache->get_curr_key_chain();
4493	range->range_flag= EQ_RANGE;
4494	DBUG_RETURN(`0`);
4495	}
4496	DBUG_RETURN(`1`);
4497	}
4498
4499
4500	/*
4501	Check whether range_info orders to skip the next record from BKAH join buffer
4502
4503	SYNOPSIS
4504	bkah_range_seq_skip_record()
4505	seq value returned by bkah_range_seq_init()
4506	range_info information about the next range/key returned by MRR
4507	rowid [NOT USED] rowid of the record to be checked (not used)
4508
4509	DESCRIPTION
4510	The function interprets seq as a pointer to a JOIN_CACHE_BKAH
4511	object. The function returns TRUE if the record with this range_info
4512	is to be filtered out from the stream of records returned by
4513	multi_range_read_next().
4514
4515	NOTE
4516	This function are used only as a callback function.
4517
4518	RETURN VALUE
4519	1 record with this range_info is to be filtered out from the stream
4520	of records returned by multi_range_read_next()
4521	0 the record is to be left in the stream
4522	*/
4523
4524	static
4525	bool bkah_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid)
4526	{
4527	DBUG_ENTER("bkah_range_seq_skip_record");
4528	JOIN_CACHE_BKAH cache= (JOIN_CACHE_BKAH ) rseq;
4529	bool res= cache->check_all_match_flags_for_key((uchar *) range_info);
4530	DBUG_RETURN(res);
4531	}
4532
4533
4534	/*
4535	Check if the record combination from BKAH cache matches the index condition
4536
4537	SYNOPSIS
4538	bkah_skip_index_tuple()
4539	rseq value returned by bka_range_seq_init()
4540	range_info record chain for the next range/key returned by MRR
4541
4542	DESCRIPTION
4543	This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method,
4544	see comments there.
4545
4546	NOTE
4547	This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
4548
4549	RETURN VALUE
4550	0 some records from the chain satisfy the index condition
4551	1 otherwise
4552	*/
4553
4554	static
4555	bool bkah_skip_index_tuple(range_seq_t rseq, range_id_t range_info)
4556	{
4557	DBUG_ENTER("bka_unique_skip_index_tuple");
4558	JOIN_CACHE_BKAH cache= (JOIN_CACHE_BKAH ) rseq;
4559	THD *thd= cache->thd();
4560	bool res;
4561	status_var_increment(thd->status_var.ha_icp_attempts);
4562	if (!(res= cache->skip_index_tuple(range_info)))
4563	status_var_increment(thd->status_var.ha_icp_match);
4564	DBUG_RETURN(res);
4565	}
4566
4567
4568	/*
4569	Prepare to read record from BKAH cache matching the current joined record
4570
4571	SYNOPSIS
4572	prepare_look_for_matches()
4573	skip_last <-> ignore the last record in the buffer (always unused here)
4574
4575	DESCRIPTION
4576	The function prepares to iterate over records in the join cache buffer
4577	matching the record loaded into the record buffer for join_tab when
4578	performing join operation by BKAH join algorithm. With BKAH algorithm, if
4579	association labels are used, then record loaded into the record buffer
4580	for join_tab always has a direct reference to the chain of the mathing
4581	records from the join buffer. If association labels are not used then
4582	then the chain of the matching records is obtained by the call of the
4583	get_key_chain_by_join_key function.
4584
4585	RETURN VALUE
4586	TRUE there are no records in the buffer to iterate over
4587	FALSE otherwise
4588	*/
4589
4590	bool JOIN_CACHE_BKAH::prepare_look_for_matches(bool skip_last)
4591	{
4592	last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= `0`;
4593	if (no_association &&
4594	!(curr_matching_chain= get_matching_chain_by_join_key())) //psergey: added '!'
4595	return `1`;
4596	last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain);
4597	return `0`;
4598	}
4599
4600	/*
4601	Initialize the BKAH join cache
4602
4603	SYNOPSIS
4604	init
4605	for_explain join buffer is initialized for explain only
4606
4607	DESCRIPTION
4608	The function initializes the cache structure. It is supposed to be called
4609	right after a constructor for the JOIN_CACHE_BKAH.
4610
4611	NOTES
4612	The function first constructs a companion object of the type
4613	JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
4614
4615	RETURN VALUE
4616	0 initialization with buffer allocations has been succeeded
4617	1 otherwise
4618	*/
4619
4620	int JOIN_CACHE_BKAH::init(bool for_explain)
4621	{
4622	bool check_only_first_match= join_tab->check_only_first_match();
4623
4624	no_association= MY_TEST(mrr_mode & HA_MRR_NO_ASSOCIATION);
4625
4626	RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info,
4627	bkah_range_seq_init,
4628	bkah_range_seq_next,
4629	check_only_first_match && !no_association ?
4630	bkah_range_seq_skip_record : `0`,
4631	bkah_skip_index_tuple };
4632
4633	DBUG_ENTER("JOIN_CACHE_BKAH::init");
4634
4635	if (!(join_tab_scan= new JOIN_TAB_SCAN_MRR (join, join_tab,
4636	mrr_mode, rs_funcs)))
4637	DBUG_RETURN(`1`);
4638
4639	DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain));
4640	}
4641
4642
4643	/*
4644	Check the index condition of the joined table for a record from the BKA cache
4645
4646	SYNOPSIS
4647	skip_index_tuple()
4648	range_info record chain returned by MRR
4649
4650	DESCRIPTION
4651	See JOIN_CACHE_BKA::skip_index_tuple().
4652	This function is the variant for use with rhe class JOIN_CACHE_BKAH.
4653	The difference from JOIN_CACHE_BKA case is that there may be multiple
4654	previous table record combinations that share the same key(MRR range).
4655	As a consequence, we need to loop through the chain of all table record
4656	combinations that match the given MRR range key range_info until we find
4657	one that satisfies the index condition.
4658
4659	NOTE
4660	Possible optimization:
4661	Before we unpack the record from a previous table
4662	check if this table is used in the condition.
4663	If so then unpack the record otherwise skip the unpacking.
4664	This should be done by a special virtual method
4665	get_partial_record_by_pos().
4666
4667	RETURN VALUE
4668	1 any record combination from the chain referred by range_info
4669	does not satisfy the index condition
4670	0 otherwise
4671
4672
4673	*/
4674
4675	bool JOIN_CACHE_BKAH::skip_index_tuple(range_id_t range_info)
4676	{
4677	uchar last_rec_ref_ptr= get_next_rec_ref((uchar) range_info);
4678	uchar *next_rec_ref_ptr= last_rec_ref_ptr;
4679	DBUG_ENTER("JOIN_CACHE_BKAH::skip_index_tuple");
4680	do
4681	{
4682	next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
4683	uchar *rec_ptr= next_rec_ref_ptr + rec_fields_offset;
4684	get_record_by_pos(rec_ptr);
4685	if (join_tab->cache_idx_cond->val_int())
4686	DBUG_RETURN(FALSE);
4687	} while(next_rec_ref_ptr != last_rec_ref_ptr);
4688	DBUG_RETURN(TRUE);
4689	}
4690

Browse the source code of MariaDB/sql/sql_join_cache.cc