row0merge.h source code [MariaDB/storage/innobase/include/row0merge.h]

1	/*****************************************************************************
2
3	Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
4	Copyright (c) 2015, 2017, MariaDB Corporation.
5
6	This program is free software; you can redistribute it and/or modify it under
7	the terms of the GNU General Public License as published by the Free Software
8	Foundation; version 2 of the License.
9
10	This program is distributed in the hope that it will be useful, but WITHOUT
11	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License along with
15	this program; if not, write to the Free Software Foundation, Inc.,
16	51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18	*****************************************************************************/
19
20	/************************************************//**
21	@file include/row0merge.h
22	Index build routines using a merge sort
23
24	Created 13/06/2005 Jan Lindstrom
25	*******************************************************/
26
27	#ifndef row0merge_h
28	#define row0merge_h
29
30	#include "univ.i"
31	#include "data0data.h"
32	#include "dict0types.h"
33	#include "trx0types.h"
34	#include "que0types.h"
35	#include "mtr0mtr.h"
36	#include "rem0types.h"
37	#include "rem0rec.h"
38	#include "btr0types.h"
39	#include "row0mysql.h"
40	#include "lock0types.h"
41	#include "srv0srv.h"
42	#include "ut0stage.h"
43
44	/ Reserve free space from every block for key_version /
45	#define ROW_MERGE_RESERVE_SIZE 4
46
47	/ Cluster index read task is mandatory /
48	#define COST_READ_CLUSTERED_INDEX 1.0
49
50	/ Basic fixed cost to build all type of index /
51	#define COST_BUILD_INDEX_STATIC 0.5
52	/ Dynamic cost to build all type of index, dynamic cost will be re-distributed based on page count ratio of each index /
53	#define COST_BUILD_INDEX_DYNAMIC 0.5
54
55	/ Sum of below two must be 1.0 /
56	#define PCT_COST_MERGESORT_INDEX 0.4
57	#define PCT_COST_INSERT_INDEX 0.6
58
59	// Forward declaration
60	struct ib_sequence_t;
61
62	/* @brief Block size for I/O operations in merge sort.*
63
64	The minimum is srv_page_size, or page_get_free_space_of_empty()
65	rounded to a power of 2.
66
67	When not creating a PRIMARY KEY that contains column prefixes, this
68	can be set as small as srv_page_size / 2. /*
69	typedef byte row_merge_block_t;
70
71	/* @brief Secondary buffer for I/O operations of merge records.*
72
73	This buffer is used for writing or reading a record that spans two
74	row_merge_block_t. Thus, it must be able to hold one merge record,
75	whose maximum size is the same as the minimum size of
76	row_merge_block_t. /*
77	typedef byte mrec_buf_t[UNIV_PAGE_SIZE_MAX];
78
79	/* @brief Merge record in row_merge_block_t.*
80
81	The format is the same as a record in ROW_FORMAT=COMPACT with the
82	exception that the REC_N_NEW_EXTRA_BYTES are omitted. /*
83	typedef byte mrec_t;
84
85	/* Merge record in row_merge_buf_t /
86	struct mtuple_t {
87	dfield_t* fields; /!< data fields /
88	};
89
90	/* Buffer for sorting in main memory. /
91	struct row_merge_buf_t {
92	mem_heap_t* heap; /!< memory heap where allocated /
93	dict_index_t* index; /!< the index the tuples belong to /
94	ulint total_size; /!< total amount of data bytes /
95	ulint n_tuples; /!< number of data tuples /
96	ulint max_tuples; /!< maximum number of data tuples /
97	mtuple_t* tuples; /!< array of data tuples /
98	mtuple_t* tmp_tuples; /!< temporary copy of tuples,*
99	for sorting /*
100	};
101
102	/* Information about temporary files used in merge sort /
103	struct merge_file_t {
104	pfs_os_file_t fd; /!< file descriptor /
105	ulint offset; /!< file offset (end of file) /
106	ib_uint64_t n_rec; /!< number of records in the file /
107	};
108
109	/* Index field definition /
110	struct index_field_t {
111	ulint col_no; /!< column offset /
112	ulint prefix_len; /!< column prefix length, or 0*
113	if indexing the whole column /*
114	bool is_v_col; /!< whether this is a virtual column /
115	};
116
117	/* Definition of an index being created /
118	struct index_def_t {
119	const char* name; /!< index name /
120	bool rebuild; /!< whether the table is rebuilt /
121	ulint ind_type; /!< 0, DICT_UNIQUE,*
122	or DICT_CLUSTERED /*
123	ulint key_number; /!< MySQL key number,*
124	or ULINT_UNDEFINED if none /*
125	ulint n_fields; /!< number of fields in index /
126	index_field_t* fields; /!< field definitions /
127	st_mysql_ftparser*
128	parser; /!< fulltext parser plugin /
129	};
130
131	/* Structure for reporting duplicate records. /
132	struct row_merge_dup_t {
133	dict_index_t* index; /!< index being sorted /
134	struct TABLE* table; /!< MySQL table object /
135	const ulint* col_map;/!< mapping of column numbers*
136	in table to the rebuilt table
137	(index->table), or NULL if not
138	rebuilding table /*
139	ulint n_dup; /!< number of duplicates /
140	};
141
142	/***********************************************************//**
143	Report a duplicate key. /*
144	void
145	row_merge_dup_report(
146	/=================/
147	row_merge_dup_t* dup, /!< in/out: for reporting duplicates /
148	const dfield_t* entry) /!< in: duplicate index entry /
149	MY_ATTRIBUTE((nonnull));
150
151	/*******************************************************************//**
152	Sets an exclusive lock on a table, for the duration of creating indexes.
153	@return error code or DB_SUCCESS /*
154	dberr_t
155	row_merge_lock_table(
156	/=================/
157	trx_t* trx, /!< in/out: transaction /
158	dict_table_t* table, /!< in: table to lock /
159	enum lock_mode mode) /!< in: LOCK_X or LOCK_S /
160	MY_ATTRIBUTE((nonnull(`1`,`2`), warn_unused_result));
161
162	/*******************************************************************//**
163	Drop indexes that were created before an error occurred.
164	The data dictionary must have been locked exclusively by the caller,
165	because the transaction will not be committed. /*
166	void
167	row_merge_drop_indexes_dict(
168	/========================/
169	trx_t* trx, /!< in/out: dictionary transaction /
170	table_id_t table_id)/!< in: table identifier /
171	MY_ATTRIBUTE((nonnull));
172
173	/*******************************************************************//**
174	Drop those indexes which were created before an error occurred.
175	The data dictionary must have been locked exclusively by the caller,
176	because the transaction will not be committed. /*
177	void
178	row_merge_drop_indexes(
179	/===================/
180	trx_t* trx, /!< in/out: transaction /
181	dict_table_t* table, /!< in/out: table containing the indexes /
182	ibool locked) /!< in: TRUE=table locked,*
183	FALSE=may need to do a lazy drop /*
184	MY_ATTRIBUTE((nonnull));
185
186	/*******************************************************************//**
187	Drop all partially created indexes during crash recovery. /*
188	void
189	row_merge_drop_temp_indexes(void);
190	/=============================/
191
192	/* Create temporary merge files in the given paramater path, and if*
193	UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
194	@param[in] path location for creating temporary merge files, or NULL
195	@return File descriptor /*
196	pfs_os_file_t
197	row_merge_file_create_low(
198	const char* path)
199	MY_ATTRIBUTE((warn_unused_result));
200	/*******************************************************************//**
201	Destroy a merge file. And de-register the file from Performance Schema
202	if UNIV_PFS_IO is defined. /*
203	void
204	row_merge_file_destroy_low(
205	/=======================/
206	const pfs_os_file_t& fd); /!< in: merge file descriptor /
207
208	/*******************************************************************//**
209	Provide a new pathname for a table that is being renamed if it belongs to
210	a file-per-table tablespace. The caller is responsible for freeing the
211	memory allocated for the return value.
212	@return new pathname of tablespace file, or NULL if space = 0 /*
213	char*
214	row_make_new_pathname(
215	/==================/
216	dict_table_t* table, /!< in: table to be renamed /
217	const char* new_name) /!< in: new name /
218	MY_ATTRIBUTE((nonnull, warn_unused_result));
219
220	/*******************************************************************//**
221	Rename the tables in the data dictionary. The data dictionary must
222	have been locked exclusively by the caller, because the transaction
223	will not be committed.
224	@return error code or DB_SUCCESS /*
225	dberr_t
226	row_merge_rename_tables_dict(
227	/=========================/
228	dict_table_t* old_table, /!< in/out: old table, renamed to*
229	tmp_name /*
230	dict_table_t* new_table, /!< in/out: new table, renamed to*
231	old_table->name /*
232	const char* tmp_name, /!< in: new name for old_table /
233	trx_t* trx) /!< in/out: dictionary transaction /
234	MY_ATTRIBUTE((nonnull, warn_unused_result));
235
236	/*******************************************************************//**
237	Rename an index in the dictionary that was created. The data
238	dictionary must have been locked exclusively by the caller, because
239	the transaction will not be committed.
240	@return DB_SUCCESS if all OK /*
241	dberr_t
242	row_merge_rename_index_to_add(
243	/==========================/
244	trx_t* trx, /!< in/out: transaction /
245	table_id_t table_id, /!< in: table identifier /
246	index_id_t index_id) /!< in: index identifier /
247	MY_ATTRIBUTE((nonnull(`1`), warn_unused_result));
248
249	/*******************************************************************//**
250	Rename an index in the dictionary that is to be dropped. The data
251	dictionary must have been locked exclusively by the caller, because
252	the transaction will not be committed.
253	@return DB_SUCCESS if all OK /*
254	dberr_t
255	row_merge_rename_index_to_drop(
256	/===========================/
257	trx_t* trx, /!< in/out: transaction /
258	table_id_t table_id, /!< in: table identifier /
259	index_id_t index_id) /!< in: index identifier /
260	MY_ATTRIBUTE((nonnull(`1`), warn_unused_result));
261
262	/* Create the index and load in to the dictionary.*
263	@param[in,out] table the index is on this table
264	@param[in] index_def the index definition
265	@param[in] add_v new virtual columns added along with add
266	index call
267	@return index, or NULL on error /*
268	dict_index_t*
269	row_merge_create_index(
270	dict_table_t* table,
271	const index_def_t* index_def,
272	const dict_add_v_col_t* add_v)
273	MY_ATTRIBUTE((warn_unused_result));
274
275	/*******************************************************************//**
276	Check if a transaction can use an index.
277	@return whether the index can be used by the transaction /*
278	bool
279	row_merge_is_index_usable(
280	/======================/
281	const trx_t* trx, /!< in: transaction /
282	const dict_index_t* index) /!< in: index to check /
283	MY_ATTRIBUTE((nonnull, warn_unused_result));
284
285	/*******************************************************************//**
286	Drop a table. The caller must have ensured that the background stats
287	thread is not processing the table. This can be done by calling
288	dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
289	before calling this function.
290	@return DB_SUCCESS or error code /*
291	dberr_t
292	row_merge_drop_table(
293	/=================/
294	trx_t* trx, /!< in: transaction /
295	dict_table_t* table) /!< in: table instance to drop /
296	MY_ATTRIBUTE((nonnull, warn_unused_result));
297
298	/* Build indexes on a table by reading a clustered index, creating a temporary*
299	file containing index entries, merge sorting these index entries and inserting
300	sorted index entries to indexes.
301	@param[in] trx transaction
302	@param[in] old_table table where rows are read from
303	@param[in] new_table table where indexes are created; identical to
304	old_table unless creating a PRIMARY KEY
305	@param[in] online true if creating indexes online
306	@param[in] indexes indexes to be created
307	@param[in] key_numbers MySQL key numbers
308	@param[in] n_indexes size of indexes[]
309	@param[in,out] table MySQL table, for reporting erroneous key value
310	if applicable
311	@param[in] defaults default values of added, changed columns, or NULL
312	@param[in] col_map mapping of old column numbers to new ones, or
313	NULL if old_table == new_table
314	@param[in] add_autoinc number of added AUTO_INCREMENT columns, or
315	ULINT_UNDEFINED if none is added
316	@param[in,out] sequence autoinc sequence
317	@param[in] skip_pk_sort whether the new PRIMARY KEY will follow
318	existing order
319	@param[in,out] stage performance schema accounting object, used by
320	ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of
321	this function and it will be passed to other functions for further accounting.
322	@param[in] add_v new virtual columns added along with indexes
323	@param[in] eval_table mysql table used to evaluate virtual column
324	value, see innobase_get_computed_value().
325	@param[in] drop_historical whether to drop historical system rows
326	@return DB_SUCCESS or error code /*
327	dberr_t
328	row_merge_build_indexes(
329	trx_t* trx,
330	dict_table_t* old_table,
331	dict_table_t* new_table,
332	bool online,
333	dict_index_t** indexes,
334	const ulint* key_numbers,
335	ulint n_indexes,
336	struct TABLE* table,
337	const dtuple_t* defaults,
338	const ulint* col_map,
339	ulint add_autoinc,
340	ib_sequence_t& sequence,
341	bool skip_pk_sort,
342	ut_stage_alter_t* stage,
343	const dict_add_v_col_t* add_v,
344	struct TABLE* eval_table,
345	bool drop_historical)
346	MY_ATTRIBUTE((warn_unused_result));
347
348	/******************************************************************//**
349	Write a buffer to a block. /*
350	void
351	row_merge_buf_write(
352	/================/
353	const row_merge_buf_t* buf, /!< in: sorted buffer /
354	const merge_file_t* of, /!< in: output file /
355	row_merge_block_t* block) /!< out: buffer for writing to file /
356	MY_ATTRIBUTE((nonnull));
357
358	/******************************************************************//**
359	Sort a buffer. /*
360	void
361	row_merge_buf_sort(
362	/===============/
363	row_merge_buf_t* buf, /!< in/out: sort buffer /
364	row_merge_dup_t* dup) /!< in/out: reporter of duplicates*
365	(NULL if non-unique index) /*
366	MY_ATTRIBUTE((nonnull(`1`)));
367
368	/******************************************************************//**
369	Write a merge block to the file system.
370	@return whether the request was completed successfully /*
371	UNIV_INTERN
372	bool
373	row_merge_write(
374	/============/
375	const pfs_os_file_t& fd, /!< in: file descriptor /
376	ulint offset, /!< in: offset where to write,*
377	in number of row_merge_block_t elements /*
378	const void* buf, /!< in: data /
379	void* crypt_buf, /!< in: crypt buf or NULL /
380	ulint space) /!< in: space id /
381	MY_ATTRIBUTE((warn_unused_result));
382
383	/******************************************************************//**
384	Empty a sort buffer.
385	@return sort buffer /*
386	row_merge_buf_t*
387	row_merge_buf_empty(
388	/================/
389	row_merge_buf_t* buf) /!< in,own: sort buffer /
390	MY_ATTRIBUTE((warn_unused_result, nonnull));
391
392	/* Create a merge file in the given location.*
393	@param[out] merge_file merge file structure
394	@param[in] path location for creating temporary file, or NULL
395	@return file descriptor, or -1 on failure /*
396	pfs_os_file_t
397	row_merge_file_create(
398	merge_file_t* merge_file,
399	const char* path)
400	MY_ATTRIBUTE((warn_unused_result, nonnull(`1`)));
401
402	/* Merge disk files.*
403	@param[in] trx transaction
404	@param[in] dup descriptor of index being created
405	@param[in,out] file file containing index entries
406	@param[in,out] block 3 buffers
407	@param[in,out] tmpfd temporary file handle
408	@param[in] update_progress true, if we should update progress status
409	@param[in] pct_progress total progress percent until now
410	@param[in] pct_ocst current progress percent
411	@param[in] crypt_block crypt buf or NULL
412	@param[in] space space_id
413	@param[in,out] stage performance schema accounting object, used by
414	ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially
415	and then stage->inc() will be called for each record processed.
416	@return DB_SUCCESS or error code /*
417	dberr_t
418	row_merge_sort(
419	/===========/
420	trx_t* trx,
421	const row_merge_dup_t* dup,
422	merge_file_t* file,
423	row_merge_block_t* block,
424	pfs_os_file_t* tmpfd,
425	const bool update_progress,
426	const double pct_progress,
427	const double pct_cost,
428	row_merge_block_t* crypt_block,
429	ulint space,
430	ut_stage_alter_t* stage = NULL)
431	MY_ATTRIBUTE((warn_unused_result));
432
433	/*******************************************************************//**
434	Allocate a sort buffer.
435	@return own: sort buffer /*
436	row_merge_buf_t*
437	row_merge_buf_create(
438	/=================/
439	dict_index_t* index) /!< in: secondary index /
440	MY_ATTRIBUTE((warn_unused_result, nonnull, malloc));
441
442	/*******************************************************************//**
443	Deallocate a sort buffer. /*
444	void
445	row_merge_buf_free(
446	/===============/
447	row_merge_buf_t* buf) /!< in,own: sort buffer to be freed /
448	MY_ATTRIBUTE((nonnull));
449
450	/*******************************************************************//**
451	Destroy a merge file. /*
452	void
453	row_merge_file_destroy(
454	/===================/
455	merge_file_t* merge_file) /!< in/out: merge file structure /
456	MY_ATTRIBUTE((nonnull));
457
458	/* Read a merge block from the file system.*
459	@return whether the request was completed successfully /*
460	bool
461	row_merge_read(
462	/===========/
463	const pfs_os_file_t& fd, /!< in: file descriptor /
464	ulint offset, /!< in: offset where to read*
465	in number of row_merge_block_t
466	elements /*
467	row_merge_block_t* buf, /!< out: data /
468	row_merge_block_t* crypt_buf, /!< in: crypt buf or NULL /
469	ulint space) /!< in: space id /
470	MY_ATTRIBUTE((warn_unused_result));
471
472	/******************************************************************//**
473	Read a merge record.
474	@return pointer to next record, or NULL on I/O error or end of list /*
475	const byte*
476	row_merge_read_rec(
477	/===============/
478	row_merge_block_t* block, /!< in/out: file buffer /
479	mrec_buf_t* buf, /!< in/out: secondary buffer /
480	const byte* b, /!< in: pointer to record /
481	const dict_index_t* index, /!< in: index of the record /
482	const pfs_os_file_t& fd, /!< in: file descriptor /
483	ulint* foffs, /!< in/out: file offset /
484	const mrec_t** mrec, /!< out: pointer to merge record,*
485	or NULL on end of list
486	(non-NULL on I/O error) /*
487	ulint* offsets,/!< out: offsets of mrec /
488	row_merge_block_t* crypt_block, /!< in: crypt buf or NULL /
489	ulint space) /!< in: space id /
490	MY_ATTRIBUTE((warn_unused_result));
491	#endif /* row0merge.h */
492

Browse the source code of MariaDB/storage/innobase/include/row0merge.h