opt_subselect.h source code [MariaDB/sql/opt_subselect.h]

1	/*
2	Copyright (c) 2010, 2015, MariaDB
3
4	This program is free software; you can redistribute it and/or modify
5	it under the terms of the GNU General Public License as published by
6	the Free Software Foundation; version 2 of the License.
7
8	This program is distributed in the hope that it will be useful,
9	but WITHOUT ANY WARRANTY; without even the implied warranty of
10	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11	GNU General Public License for more details.
12
13	You should have received a copy of the GNU General Public License
14	along with this program; if not, write to the Free Software
15	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA /*
16
17	/*
18	Semi-join subquery optimization code definitions
19	*/
20
21	#ifdef USE_PRAGMA_INTERFACE
22	#pragma interface /* gcc class implementation */
23	#endif
24
25	int check_and_do_in_subquery_rewrites(JOIN *join);
26	bool convert_join_subqueries_to_semijoins(JOIN *join);
27	int pull_out_semijoin_tables(JOIN *join);
28	bool optimize_semijoin_nests(JOIN *join, table_map all_table_map);
29	bool setup_jtbm_semi_joins(JOIN join, List<TABLE_LIST> join_list,
30	Item **join_where);
31	void cleanup_empty_jtbm_semi_joins(JOIN join, List<TABLE_LIST> join_list);
32
33	// used by Loose_scan_opt
34	ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest,
35	table_map remaining_tables);
36
37	/*
38	This is a class for considering possible loose index scan optimizations.
39	It's usage pattern is as follows:
40	best_access_path()
41	{
42	Loose_scan_opt opt;
43
44	opt.init()
45	for each index we can do ref access with
46	{
47	opt.next_ref_key();
48	for each keyuse
49	opt.add_keyuse();
50	opt.check_ref_access();
51	}
52
53	if (some criteria for range scans)
54	opt.check_range_access();
55
56	opt.get_best_option();
57	}
58	*/
59
60	class Loose_scan_opt
61	{
62	/ All methods must check this before doing anything else /
63	bool try_loosescan;
64
65	/*
66	If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is
67	called sj-equality. If oeK depends only on preceding tables then such
68	equality is called 'bound'.
69	*/
70	ulonglong bound_sj_equalities;
71
72	/ Accumulated properties of ref access we're now considering: /
73	ulonglong handled_sj_equalities;
74	key_part_map loose_scan_keyparts;
75	uint max_loose_keypart;
76	bool part1_conds_met;
77
78	/*
79	Use of quick select is a special case. Some of its properties:
80	*/
81	uint quick_uses_applicable_index;
82	uint quick_max_loose_keypart;
83
84	/ Best loose scan method so far /
85	uint best_loose_scan_key;
86	double best_loose_scan_cost;
87	double best_loose_scan_records;
88	KEYUSE *best_loose_scan_start_key;
89
90	uint best_max_loose_keypart;
91
92	public:
93	Loose_scan_opt():
94	try_loosescan(FALSE),
95	bound_sj_equalities(`0`),
96	quick_uses_applicable_index(FALSE)
97	{
98	/ Protected by quick_uses_applicable_index /
99	LINT_INIT(quick_max_loose_keypart);
100	/ The following are protected by best_loose_scan_cost!= DBL_MAX /
101	LINT_INIT(best_loose_scan_key);
102	LINT_INIT(best_loose_scan_records);
103	LINT_INIT(best_max_loose_keypart);
104	LINT_INIT(best_loose_scan_start_key);
105	}
106
107	void init(JOIN join, JOIN_TAB s, table_map remaining_tables)
108	{
109	/*
110	Discover the bound equalities. We need to do this if
111	1. The next table is an SJ-inner table, and
112	2. It is the first table from that semijoin, and
113	3. We're not within a semi-join range (i.e. all semi-joins either have
114	all or none of their tables in join_table_map), except
115	s->emb_sj_nest (which we've just entered, see #2).
116	4. All non-IN-equality correlation references from this sj-nest are
117	bound
118	5. But some of the IN-equalities aren't (so this can't be handled by
119	FirstMatch strategy)
120	*/
121	best_loose_scan_cost= DBL_MAX;
122	if (!join->emb_sjm_nest && s->emb_sj_nest && // (1)
123	s->emb_sj_nest->sj_in_exprs < `64` &&
124	((remaining_tables & s->emb_sj_nest->sj_inner_tables) == // (2)
125	s->emb_sj_nest->sj_inner_tables) && // (2)
126	join->cur_sj_inner_tables == `0` && // (3)
127	!(remaining_tables &
128	s->emb_sj_nest->nested_join->sj_corr_tables) && // (4)
129	remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5)
130	optimizer_flag(join->thd, OPTIMIZER_SWITCH_LOOSE_SCAN))
131	{
132	/ This table is an LooseScan scan candidate /
133	bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest,
134	remaining_tables);
135	try_loosescan= TRUE;
136	DBUG_PRINT("info", ("Will try LooseScan scan, bound_map=%llx",
137	(longlong)bound_sj_equalities));
138	}
139	}
140
141	void next_ref_key()
142	{
143	handled_sj_equalities=`0`;
144	loose_scan_keyparts= `0`;
145	max_loose_keypart= `0`;
146	part1_conds_met= FALSE;
147	}
148
149	void add_keyuse(table_map remaining_tables, KEYUSE *keyuse)
150	{
151	if (try_loosescan && keyuse->sj_pred_no != UINT_MAX &&
152	(keyuse->table->file->index_flags(keyuse->key, `0`, `1` ) & HA_READ_ORDER))
153
154	{
155	if (!(remaining_tables & keyuse->used_tables))
156	{
157	/*
158	This allows to use equality propagation to infer that some
159	sj-equalities are bound.
160	*/
161	bound_sj_equalities \|= `1ULL` << keyuse->sj_pred_no;
162	}
163	else
164	{
165	handled_sj_equalities \|= `1ULL` << keyuse->sj_pred_no;
166	loose_scan_keyparts \|= ((key_part_map)`1`) << keyuse->keypart;
167	set_if_bigger(max_loose_keypart, keyuse->keypart);
168	}
169	}
170	}
171
172	bool have_a_case() { return MY_TEST(handled_sj_equalities); }
173
174	void check_ref_access_part1(JOIN_TAB s, uint key, KEYUSE start_key,
175	table_map found_part)
176	{
177	/*
178	Check if we can use LooseScan semi-join strategy. We can if
179	1. This is the right table at right location
180	2. All IN-equalities are either
181	- "bound", ie. the outer_expr part refers to the preceding tables
182	- "handled", ie. covered by the index we're considering
183	3. Index order allows to enumerate subquery's duplicate groups in
184	order. This happens when the index definition matches this
185	pattern:
186
187	(handled_col\|bound_col) (other_col\|bound_col)*
188
189	*/
190	if (try_loosescan && // (1)
191	(handled_sj_equalities \| bound_sj_equalities) == // (2)
192	PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) && // (2)
193	(PREV_BITS(key_part_map, max_loose_keypart+`1`) & // (3)
194	(found_part \| loose_scan_keyparts)) == // (3)
195	PREV_BITS(key_part_map, max_loose_keypart+`1`) && // (3)
196	!key_uses_partial_cols(s->table->s, key))
197	{
198	if (s->quick && s->quick->index == key &&
199	s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
200	{
201	quick_uses_applicable_index= TRUE;
202	quick_max_loose_keypart= max_loose_keypart;
203	}
204	DBUG_PRINT("info", ("Can use LooseScan scan"));
205
206	if (found_part & `1`)
207	{
208	/ Can use LooseScan on ref access if the first key part is bound /
209	part1_conds_met= TRUE;
210	}
211
212	/*
213	Check if this is a special case where there are no usable bound
214	IN-equalities, i.e. we have
215
216	outer_expr IN (SELECT innertbl.key FROM ...)
217
218	and outer_expr cannot be evaluated yet, so it's actually full
219	index scan and not a ref access.
220	We can do full index scan if it uses index-only.
221	*/
222	if (!(found_part & `1` ) && / no usable ref access for 1st key part /
223	s->table->covering_keys.is_set(key))
224	{
225	part1_conds_met= TRUE;
226	DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
227
228	/ Calculate the cost of complete loose index scan. /
229	double records= rows2double(s->table->file->stats.records);
230
231	/ The cost is entire index scan cost (divided by 2) /
232	double read_time= s->table->file->keyread_time(key, `1`,
233	(ha_rows) records);
234
235	/*
236	Now find out how many different keys we will get (for now we
237	ignore the fact that we have "keypart_i=const" restriction for
238	some key components, that may make us think think that loose
239	scan will produce more distinct records than it actually will)
240	*/
241	ulong rpc;
242	if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart]))
243	records= records / rpc;
244
245	// TODO: previous version also did /2
246	if (read_time < best_loose_scan_cost)
247	{
248	best_loose_scan_key= key;
249	best_loose_scan_cost= read_time;
250	best_loose_scan_records= records;
251	best_max_loose_keypart= max_loose_keypart;
252	best_loose_scan_start_key= start_key;
253	}
254	}
255	}
256	}
257
258	void check_ref_access_part2(uint key, KEYUSE start_key, double* records,
259	double read_time)
260	{
261	if (part1_conds_met && read_time < best_loose_scan_cost)
262	{
263	/ TODO use rec-per-key-based fanout calculations /
264	best_loose_scan_key= key;
265	best_loose_scan_cost= read_time;
266	best_loose_scan_records= records;
267	best_max_loose_keypart= max_loose_keypart;
268	best_loose_scan_start_key= start_key;
269	}
270	}
271
272	void check_range_access(JOIN join, uint idx, QUICK_SELECT_I quick)
273	{
274	/ TODO: this the right part restriction: /
275	if (quick_uses_applicable_index && idx == join->const_tables &&
276	quick->read_time < best_loose_scan_cost)
277	{
278	best_loose_scan_key= quick->index;
279	best_loose_scan_cost= quick->read_time;
280	/ this is ok because idx == join->const_tables /
281	best_loose_scan_records= rows2double(quick->records);
282	best_max_loose_keypart= quick_max_loose_keypart;
283	best_loose_scan_start_key= NULL;
284	}
285	}
286
287	void save_to_position(JOIN_TAB tab, POSITION pos)
288	{
289	pos->read_time= best_loose_scan_cost;
290	if (best_loose_scan_cost != DBL_MAX)
291	{
292	pos->records_read= best_loose_scan_records;
293	pos->key= best_loose_scan_start_key;
294	pos->cond_selectivity= `1.0`;
295	pos->loosescan_picker.loosescan_key= best_loose_scan_key;
296	pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + `1`;
297	pos->use_join_buffer= FALSE;
298	pos->table= tab;
299	// todo need ref_depend_map ?
300	DBUG_PRINT("info", ("Produced a LooseScan plan, key %s, %s",
301	tab->table->key_info[best_loose_scan_key].name.str,
302	best_loose_scan_start_key? "(ref access)":
303	"(range/index access)"));
304	}
305	}
306	};
307
308
309	void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
310	double current_record_count, double* *current_read_time,
311	POSITION *loose_scan_pos);
312	void restore_prev_sj_state(const table_map remaining_tables,
313	const JOIN_TAB *tab, uint idx);
314
315	void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
316
317	bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab);
318	bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab);
319
320
321	/*
322	Temporary table used by semi-join DuplicateElimination strategy
323
324	This consists of the temptable itself and data needed to put records
325	into it. The table's DDL is as follows:
326
327	CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col));
328
329	where the primary key can be replaced with unique constraint if n exceeds
330	the limit (as it is always done for query execution-time temptables).
331
332	The record value is a concatenation of rowids of tables from the join we're
333	executing. If a join table is on the inner side of the outer join, we
334	assume that its rowid can be NULL and provide means to store this rowid in
335	the tuple.
336	*/
337
338	class SJ_TMP_TABLE : public Sql_alloc
339	{
340	public:
341	/*
342	Array of pointers to tables whose rowids compose the temporary table
343	record.
344	*/
345	class TAB
346	{
347	public:
348	JOIN_TAB *join_tab;
349	uint rowid_offset;
350	ushort null_byte;
351	uchar null_bit;
352	};
353	TAB *tabs;
354	TAB *tabs_end;
355
356	/*
357	is_degenerate==TRUE means this is a special case where the temptable record
358	has zero length (and presence of a unique key means that the temptable can
359	have either 0 or 1 records).
360	In this case we don't create the physical temptable but instead record
361	its state in SJ_TMP_TABLE::have_degenerate_row.
362	*/
363	bool is_degenerate;
364
365	/*
366	When is_degenerate==TRUE: the contents of the table (whether it has the
367	record or not).
368	*/
369	bool have_degenerate_row;
370
371	/ table record parameters /
372	uint null_bits;
373	uint null_bytes;
374	uint rowid_len;
375
376	/ The temporary table itself (NULL means not created yet) /
377	TABLE *tmp_table;
378
379	/*
380	These are the members we got from temptable creation code. We'll need
381	them if we'll need to convert table from HEAP to MyISAM/Maria.
382	*/
383	TMP_ENGINE_COLUMNDEF *start_recinfo;
384	TMP_ENGINE_COLUMNDEF *recinfo;
385
386	SJ_TMP_TABLE *next_flush_table;
387
388	int sj_weedout_delete_rows();
389	int sj_weedout_check_row(THD *thd);
390	bool create_sj_weedout_tmp_table(THD *thd);
391	};
392
393	int setup_semijoin_loosescan(JOIN *join);
394	int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
395	uint no_jbuf_after);
396	void destroy_sj_tmp_tables(JOIN *join);
397	int clear_sj_tmp_tables(JOIN *join);
398	int rewrite_to_index_subquery_engine(JOIN *join);
399
400
401	void get_delayed_table_estimates(TABLE *table,
402	ha_rows *out_rows,
403	double *scan_time,
404	double *startup_cost);
405
406	enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab);
407
408

Browse the source code of MariaDB/sql/opt_subselect.h