row0ftsort.h source code [MariaDB/storage/innobase/include/row0ftsort.h]

1	/*****************************************************************************
2
3	Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
4	Copyright (c) 2015, 2018, MariaDB Corporation.
5
6	This program is free software; you can redistribute it and/or modify it under
7	the terms of the GNU General Public License as published by the Free Software
8	Foundation; version 2 of the License.
9
10	This program is distributed in the hope that it will be useful, but WITHOUT
11	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License along with
15	this program; if not, write to the Free Software Foundation, Inc.,
16	51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18	*****************************************************************************/
19
20	/************************************************//**
21	@file include/row0ftsort.h
22	Create Full Text Index with (parallel) merge sort
23
24	Created 10/13/2010 Jimmy Yang
25	*******************************************************/
26
27	#ifndef row0ftsort_h
28	#define row0ftsort_h
29
30	#include "univ.i"
31	#include "data0data.h"
32	#include "dict0types.h"
33	#include "row0mysql.h"
34	#include "fts0fts.h"
35	#include "fts0types.h"
36	#include "fts0priv.h"
37	#include "row0merge.h"
38	#include "btr0bulk.h"
39	#include "os0thread.h"
40
41	/* This structure defineds information the scan thread will fetch*
42	and put to the linked list for parallel tokenization/sort threads
43	to process /*
44	typedef struct fts_doc_item fts_doc_item_t;
45
46	/* Information about temporary files used in merge sort /
47	struct fts_doc_item {
48	dfield_t* field; /!< field contains document string /
49	doc_id_t doc_id; /!< document ID /
50	UT_LIST_NODE_T(fts_doc_item_t) doc_list;
51	/!< list of doc items /
52	};
53
54	/* This defines the list type that scan thread would feed the parallel*
55	tokenization threads and sort threads. /*
56	typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t;
57
58	#define FTS_PLL_MERGE 1
59
60	/* Sort information passed to each individual parallel sort thread /
61	struct fts_psort_t;
62
63	/* Common info passed to each parallel sort thread /
64	struct fts_psort_common_t {
65	row_merge_dup_t* dup; /!< descriptor of FTS index /
66	dict_table_t* new_table; /!< source table /
67	trx_t* trx; /!< transaction /
68	fts_psort_t* all_info; /!< all parallel sort info /
69	os_event_t sort_event; /!< sort event /
70	os_event_t merge_event; /!< merge event /
71	ibool opt_doc_id_size;/!< whether to use 4 bytes*
72	instead of 8 bytes integer to
73	store Doc ID during sort, if
74	Doc ID will not be big enough
75	to use 8 bytes value /*
76	};
77
78	struct fts_psort_t {
79	ulint psort_id; /!< Parallel sort ID /
80	row_merge_buf_t* merge_buf[FTS_NUM_AUX_INDEX];
81	/!< sort buffer /
82	merge_file_t* merge_file[FTS_NUM_AUX_INDEX];
83	/!< sort file /
84	row_merge_block_t* merge_block[FTS_NUM_AUX_INDEX];
85	/!< buffer to write to file /
86	row_merge_block_t* block_alloc[FTS_NUM_AUX_INDEX];
87	/!< buffer to allocated /
88	row_merge_block_t* crypt_block[FTS_NUM_AUX_INDEX];
89	/!< buffer to crypt data /
90	row_merge_block_t* crypt_alloc[FTS_NUM_AUX_INDEX];
91	/!< buffer to allocated /
92	ulint child_status; /!< child thread status /
93	ulint state; /!< parent thread state /
94	fts_doc_list_t fts_doc_list; /!< doc list to process /
95	fts_psort_common_t* psort_common; /!< ptr to all psort info /
96	os_thread_t thread_hdl; /!< thread handler /
97	dberr_t error; /!< db error during psort /
98	ulint memory_used; /!< memory used by fts_doc_list /
99	ib_mutex_t mutex; /!< mutex for fts_doc_list /
100	};
101
102	/* Row fts token for plugin parser /
103	struct row_fts_token_t {
104	fts_string_t* text; /!< token /
105	UT_LIST_NODE_T(row_fts_token_t)
106	token_list; /!< next token link /
107	};
108
109	typedef UT_LIST_BASE_NODE_T(row_fts_token_t) fts_token_list_t;
110
111	/* Structure stores information from string tokenization operation /
112	struct fts_tokenize_ctx {
113	ulint processed_len; /!< processed string length /
114	ulint init_pos; /!< doc start position /
115	ulint buf_used; /!< the sort buffer (ID) when*
116	tokenization stops, which
117	could due to sort buffer full /*
118	ulint rows_added[FTS_NUM_AUX_INDEX];
119	/!< number of rows added for*
120	each FTS index partition /*
121	ib_rbt_t* cached_stopword;/!< in: stopword list /
122	dfield_t sort_field[FTS_NUM_FIELDS_SORT];
123	/!< in: sort field /
124	fts_token_list_t fts_token_list;
125	};
126
127	typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
128
129	/* Structure stores information needed for the insertion phase of FTS*
130	parallel sort. /*
131	struct fts_psort_insert {
132	CHARSET_INFO* charset; /!< charset info /
133	mem_heap_t* heap; /!< heap /
134	ibool opt_doc_id_size;/!< Whether to use smaller (4 bytes)*
135	integer for Doc ID /*
136	BtrBulk* btr_bulk; /!< Bulk load instance /
137	dtuple_t* tuple; /!< Tuple to insert /
138
139	#ifdef UNIV_DEBUG
140	ulint aux_index_id; /!< Auxiliary index id /
141	#endif
142	};
143
144	typedef struct fts_psort_insert fts_psort_insert_t;
145
146
147	/* status bit used for communication between parent and child thread /
148	#define FTS_PARENT_COMPLETE 1
149	#define FTS_PARENT_EXITING 2
150	#define FTS_CHILD_COMPLETE 1
151	#define FTS_CHILD_EXITING 2
152
153	/* Print some debug information /
154	#define FTSORT_PRINT
155
156	#ifdef FTSORT_PRINT
157	#define DEBUG_FTS_SORT_PRINT(str) \
158	do { \
159	ut_print_timestamp(stderr); \
160	fprintf(stderr, str); \
161	} while (0)
162	#else
163	#define DEBUG_FTS_SORT_PRINT(str)
164	#endif /* FTSORT_PRINT */
165
166	/***********************************************************//**
167	Create a temporary "fts sort index" used to merge sort the
168	tokenized doc string. The index has three "fields":
169
170	1) Tokenized word,
171	2) Doc ID
172	3) Word's position in original 'doc'.
173
174	@return dict_index_t structure for the fts sort index /*
175	dict_index_t*
176	row_merge_create_fts_sort_index(
177	/============================/
178	dict_index_t* index, /!< in: Original FTS index*
179	based on which this sort index
180	is created /*
181	dict_table_t* table, /!< in,out: table that FTS index*
182	is being created on /*
183	ibool* opt_doc_id_size);
184	/!< out: whether to use 4 bytes*
185	instead of 8 bytes integer to
186	store Doc ID during sort /*
187
188	/******************************************************************//**
189	Initialize FTS parallel sort structures.
190	@return TRUE if all successful /*
191	ibool
192	row_fts_psort_info_init(
193	/====================/
194	trx_t* trx, /!< in: transaction /
195	row_merge_dup_t* dup, /!< in,own: descriptor of*
196	FTS index being created /*
197	const dict_table_t* new_table,/!< in: table where indexes are*
198	created /*
199	ibool opt_doc_id_size,
200	/!< in: whether to use 4 bytes*
201	instead of 8 bytes integer to
202	store Doc ID during sort /*
203	fts_psort_t** psort, /!< out: parallel sort info to be*
204	instantiated /*
205	fts_psort_t** merge) /!< out: parallel merge info*
206	to be instantiated /*
207	MY_ATTRIBUTE((nonnull));
208	/******************************************************************//**
209	Clean up and deallocate FTS parallel sort structures, and close
210	temparary merge sort files /*
211	void
212	row_fts_psort_info_destroy(
213	/=======================/
214	fts_psort_t* psort_info, /!< parallel sort info /
215	fts_psort_t* merge_info); /!< parallel merge info /
216	/******************************************************************//**
217	Free up merge buffers when merge sort is done /*
218	void
219	row_fts_free_pll_merge_buf(
220	/=======================/
221	fts_psort_t* psort_info); /!< in: parallel sort info /
222
223	/*******************************************************************//**
224	Start the parallel tokenization and parallel merge sort /*
225	void
226	row_fts_start_psort(
227	/================/
228	fts_psort_t* psort_info); /!< in: parallel sort info /
229	/*******************************************************************//**
230	Kick off the parallel merge and insert thread /*
231	void
232	row_fts_start_parallel_merge(
233	/=========================/
234	fts_psort_t* merge_info); /!< in: parallel sort info /
235	/******************************************************************//**
236	Propagate a newly added record up one level in the selection tree
237	@return parent where this value propagated to /*
238	int
239	row_merge_fts_sel_propagate(
240	/========================/
241	int propogated, /<! in: tree node propagated /
242	int* sel_tree, /<! in: selection tree /
243	ulint level, /<! in: selection tree level /
244	const mrec_t** mrec, /<! in: sort record /
245	ulint** offsets, /<! in: record offsets /
246	dict_index_t* index); /<! in: FTS index /
247	/******************************************************************//**
248	Read sorted file containing index data tuples and insert these data
249	tuples to the index
250	@return DB_SUCCESS or error number /*
251	dberr_t
252	row_fts_merge_insert(
253	/=================/
254	dict_index_t* index, /!< in: index /
255	dict_table_t* table, /!< in: new table /
256	fts_psort_t* psort_info, /!< parallel sort info /
257	ulint id) / !< in: which auxiliary table's data*
258	to insert to /*
259	MY_ATTRIBUTE((nonnull));
260	#endif /* row0ftsort_h */
261

Browse the source code of MariaDB/storage/innobase/include/row0ftsort.h