1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2015, 2018, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /**************************************************//** |
21 | @file include/row0ftsort.h |
22 | Create Full Text Index with (parallel) merge sort |
23 | |
24 | Created 10/13/2010 Jimmy Yang |
25 | *******************************************************/ |
26 | |
27 | #ifndef row0ftsort_h |
28 | #define row0ftsort_h |
29 | |
30 | #include "univ.i" |
31 | #include "data0data.h" |
32 | #include "dict0types.h" |
33 | #include "row0mysql.h" |
34 | #include "fts0fts.h" |
35 | #include "fts0types.h" |
36 | #include "fts0priv.h" |
37 | #include "row0merge.h" |
38 | #include "btr0bulk.h" |
39 | #include "os0thread.h" |
40 | |
41 | /** This structure defineds information the scan thread will fetch |
42 | and put to the linked list for parallel tokenization/sort threads |
43 | to process */ |
44 | typedef struct fts_doc_item fts_doc_item_t; |
45 | |
46 | /** Information about temporary files used in merge sort */ |
47 | struct fts_doc_item { |
48 | dfield_t* field; /*!< field contains document string */ |
49 | doc_id_t doc_id; /*!< document ID */ |
50 | UT_LIST_NODE_T(fts_doc_item_t) doc_list; |
51 | /*!< list of doc items */ |
52 | }; |
53 | |
54 | /** This defines the list type that scan thread would feed the parallel |
55 | tokenization threads and sort threads. */ |
56 | typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t; |
57 | |
58 | #define FTS_PLL_MERGE 1 |
59 | |
60 | /** Sort information passed to each individual parallel sort thread */ |
61 | struct fts_psort_t; |
62 | |
63 | /** Common info passed to each parallel sort thread */ |
64 | struct fts_psort_common_t { |
65 | row_merge_dup_t* dup; /*!< descriptor of FTS index */ |
66 | dict_table_t* new_table; /*!< source table */ |
67 | trx_t* trx; /*!< transaction */ |
68 | fts_psort_t* all_info; /*!< all parallel sort info */ |
69 | os_event_t sort_event; /*!< sort event */ |
70 | os_event_t merge_event; /*!< merge event */ |
71 | ibool opt_doc_id_size;/*!< whether to use 4 bytes |
72 | instead of 8 bytes integer to |
73 | store Doc ID during sort, if |
74 | Doc ID will not be big enough |
75 | to use 8 bytes value */ |
76 | }; |
77 | |
78 | struct fts_psort_t { |
79 | ulint psort_id; /*!< Parallel sort ID */ |
80 | row_merge_buf_t* merge_buf[FTS_NUM_AUX_INDEX]; |
81 | /*!< sort buffer */ |
82 | merge_file_t* merge_file[FTS_NUM_AUX_INDEX]; |
83 | /*!< sort file */ |
84 | row_merge_block_t* merge_block[FTS_NUM_AUX_INDEX]; |
85 | /*!< buffer to write to file */ |
86 | row_merge_block_t* block_alloc[FTS_NUM_AUX_INDEX]; |
87 | /*!< buffer to allocated */ |
88 | row_merge_block_t* crypt_block[FTS_NUM_AUX_INDEX]; |
89 | /*!< buffer to crypt data */ |
90 | row_merge_block_t* crypt_alloc[FTS_NUM_AUX_INDEX]; |
91 | /*!< buffer to allocated */ |
92 | ulint child_status; /*!< child thread status */ |
93 | ulint state; /*!< parent thread state */ |
94 | fts_doc_list_t fts_doc_list; /*!< doc list to process */ |
95 | fts_psort_common_t* psort_common; /*!< ptr to all psort info */ |
96 | os_thread_t thread_hdl; /*!< thread handler */ |
97 | dberr_t error; /*!< db error during psort */ |
98 | ulint memory_used; /*!< memory used by fts_doc_list */ |
99 | ib_mutex_t mutex; /*!< mutex for fts_doc_list */ |
100 | }; |
101 | |
102 | /** Row fts token for plugin parser */ |
103 | struct row_fts_token_t { |
104 | fts_string_t* text; /*!< token */ |
105 | UT_LIST_NODE_T(row_fts_token_t) |
106 | token_list; /*!< next token link */ |
107 | }; |
108 | |
109 | typedef UT_LIST_BASE_NODE_T(row_fts_token_t) fts_token_list_t; |
110 | |
111 | /** Structure stores information from string tokenization operation */ |
112 | struct fts_tokenize_ctx { |
113 | ulint processed_len; /*!< processed string length */ |
114 | ulint init_pos; /*!< doc start position */ |
115 | ulint buf_used; /*!< the sort buffer (ID) when |
116 | tokenization stops, which |
117 | could due to sort buffer full */ |
118 | ulint rows_added[FTS_NUM_AUX_INDEX]; |
119 | /*!< number of rows added for |
120 | each FTS index partition */ |
121 | ib_rbt_t* cached_stopword;/*!< in: stopword list */ |
122 | dfield_t sort_field[FTS_NUM_FIELDS_SORT]; |
123 | /*!< in: sort field */ |
124 | fts_token_list_t fts_token_list; |
125 | }; |
126 | |
127 | typedef struct fts_tokenize_ctx fts_tokenize_ctx_t; |
128 | |
129 | /** Structure stores information needed for the insertion phase of FTS |
130 | parallel sort. */ |
131 | struct fts_psort_insert { |
132 | CHARSET_INFO* charset; /*!< charset info */ |
133 | mem_heap_t* heap; /*!< heap */ |
134 | ibool opt_doc_id_size;/*!< Whether to use smaller (4 bytes) |
135 | integer for Doc ID */ |
136 | BtrBulk* btr_bulk; /*!< Bulk load instance */ |
137 | dtuple_t* tuple; /*!< Tuple to insert */ |
138 | |
139 | #ifdef UNIV_DEBUG |
140 | ulint aux_index_id; /*!< Auxiliary index id */ |
141 | #endif |
142 | }; |
143 | |
144 | typedef struct fts_psort_insert fts_psort_insert_t; |
145 | |
146 | |
147 | /** status bit used for communication between parent and child thread */ |
148 | #define FTS_PARENT_COMPLETE 1 |
149 | #define FTS_PARENT_EXITING 2 |
150 | #define FTS_CHILD_COMPLETE 1 |
151 | #define FTS_CHILD_EXITING 2 |
152 | |
153 | /** Print some debug information */ |
154 | #define FTSORT_PRINT |
155 | |
156 | #ifdef FTSORT_PRINT |
157 | #define DEBUG_FTS_SORT_PRINT(str) \ |
158 | do { \ |
159 | ut_print_timestamp(stderr); \ |
160 | fprintf(stderr, str); \ |
161 | } while (0) |
162 | #else |
163 | #define DEBUG_FTS_SORT_PRINT(str) |
164 | #endif /* FTSORT_PRINT */ |
165 | |
166 | /*************************************************************//** |
167 | Create a temporary "fts sort index" used to merge sort the |
168 | tokenized doc string. The index has three "fields": |
169 | |
170 | 1) Tokenized word, |
171 | 2) Doc ID |
172 | 3) Word's position in original 'doc'. |
173 | |
174 | @return dict_index_t structure for the fts sort index */ |
175 | dict_index_t* |
176 | row_merge_create_fts_sort_index( |
177 | /*============================*/ |
178 | dict_index_t* index, /*!< in: Original FTS index |
179 | based on which this sort index |
180 | is created */ |
181 | dict_table_t* table, /*!< in,out: table that FTS index |
182 | is being created on */ |
183 | ibool* opt_doc_id_size); |
184 | /*!< out: whether to use 4 bytes |
185 | instead of 8 bytes integer to |
186 | store Doc ID during sort */ |
187 | |
188 | /********************************************************************//** |
189 | Initialize FTS parallel sort structures. |
190 | @return TRUE if all successful */ |
191 | ibool |
192 | row_fts_psort_info_init( |
193 | /*====================*/ |
194 | trx_t* trx, /*!< in: transaction */ |
195 | row_merge_dup_t* dup, /*!< in,own: descriptor of |
196 | FTS index being created */ |
197 | const dict_table_t* new_table,/*!< in: table where indexes are |
198 | created */ |
199 | ibool opt_doc_id_size, |
200 | /*!< in: whether to use 4 bytes |
201 | instead of 8 bytes integer to |
202 | store Doc ID during sort */ |
203 | fts_psort_t** psort, /*!< out: parallel sort info to be |
204 | instantiated */ |
205 | fts_psort_t** merge) /*!< out: parallel merge info |
206 | to be instantiated */ |
207 | MY_ATTRIBUTE((nonnull)); |
208 | /********************************************************************//** |
209 | Clean up and deallocate FTS parallel sort structures, and close |
210 | temparary merge sort files */ |
211 | void |
212 | row_fts_psort_info_destroy( |
213 | /*=======================*/ |
214 | fts_psort_t* psort_info, /*!< parallel sort info */ |
215 | fts_psort_t* merge_info); /*!< parallel merge info */ |
216 | /********************************************************************//** |
217 | Free up merge buffers when merge sort is done */ |
218 | void |
219 | row_fts_free_pll_merge_buf( |
220 | /*=======================*/ |
221 | fts_psort_t* psort_info); /*!< in: parallel sort info */ |
222 | |
223 | /*********************************************************************//** |
224 | Start the parallel tokenization and parallel merge sort */ |
225 | void |
226 | row_fts_start_psort( |
227 | /*================*/ |
228 | fts_psort_t* psort_info); /*!< in: parallel sort info */ |
229 | /*********************************************************************//** |
230 | Kick off the parallel merge and insert thread */ |
231 | void |
232 | row_fts_start_parallel_merge( |
233 | /*=========================*/ |
234 | fts_psort_t* merge_info); /*!< in: parallel sort info */ |
235 | /********************************************************************//** |
236 | Propagate a newly added record up one level in the selection tree |
237 | @return parent where this value propagated to */ |
238 | int |
239 | row_merge_fts_sel_propagate( |
240 | /*========================*/ |
241 | int propogated, /*<! in: tree node propagated */ |
242 | int* sel_tree, /*<! in: selection tree */ |
243 | ulint level, /*<! in: selection tree level */ |
244 | const mrec_t** mrec, /*<! in: sort record */ |
245 | ulint** offsets, /*<! in: record offsets */ |
246 | dict_index_t* index); /*<! in: FTS index */ |
247 | /********************************************************************//** |
248 | Read sorted file containing index data tuples and insert these data |
249 | tuples to the index |
250 | @return DB_SUCCESS or error number */ |
251 | dberr_t |
252 | row_fts_merge_insert( |
253 | /*=================*/ |
254 | dict_index_t* index, /*!< in: index */ |
255 | dict_table_t* table, /*!< in: new table */ |
256 | fts_psort_t* psort_info, /*!< parallel sort info */ |
257 | ulint id) /* !< in: which auxiliary table's data |
258 | to insert to */ |
259 | MY_ATTRIBUTE((nonnull)); |
260 | #endif /* row0ftsort_h */ |
261 | |