1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2016, 2017, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /******************************************************************//** |
21 | @file include/fts0fts.h |
22 | Full text search header file |
23 | |
24 | Created 2011/09/02 Sunny Bains |
25 | ***********************************************************************/ |
26 | |
27 | #ifndef fts0fts_h |
28 | #define fts0fts_h |
29 | |
30 | #include "ha_prototypes.h" |
31 | |
32 | #include "data0type.h" |
33 | #include "data0types.h" |
34 | #include "dict0types.h" |
35 | #include "hash0hash.h" |
36 | #include "mem0mem.h" |
37 | #include "rem0types.h" |
38 | #include "row0types.h" |
39 | #include "trx0types.h" |
40 | #include "ut0vec.h" |
41 | #include "ut0rbt.h" |
42 | #include "ut0wqueue.h" |
43 | #include "que0types.h" |
44 | #include "ft_global.h" |
45 | #include "mysql/plugin_ftparser.h" |
46 | |
47 | /** "NULL" value of a document id. */ |
48 | #define FTS_NULL_DOC_ID 0 |
49 | |
50 | /** FTS hidden column that is used to map to and from the row */ |
51 | #define FTS_DOC_ID_COL_NAME "FTS_DOC_ID" |
52 | |
53 | /** The name of the index created by FTS */ |
54 | #define FTS_DOC_ID_INDEX_NAME "FTS_DOC_ID_INDEX" |
55 | |
56 | #define FTS_DOC_ID_INDEX_NAME_LEN 16 |
57 | |
58 | /** Doc ID is a 8 byte value */ |
59 | #define FTS_DOC_ID_LEN 8 |
60 | |
61 | /** The number of fields to sort when we build FT index with |
62 | FIC. Three fields are sort: (word, doc_id, position) */ |
63 | #define FTS_NUM_FIELDS_SORT 3 |
64 | |
65 | /** Maximum number of rows in a table, smaller than which, we will |
66 | optimize using a 4 byte Doc ID for FIC merge sort to reduce sort size */ |
67 | #define MAX_DOC_ID_OPT_VAL 1073741824 |
68 | |
69 | /** Document id type. */ |
70 | typedef ib_id_t doc_id_t; |
71 | |
72 | /** doc_id_t printf format */ |
73 | #define FTS_DOC_ID_FORMAT IB_ID_FMT |
74 | |
75 | /** Convert document id to the InnoDB (BIG ENDIAN) storage format. */ |
76 | #define fts_write_doc_id(d, s) mach_write_to_8(d, s) |
77 | |
78 | /** Read a document id to internal format. */ |
79 | #define fts_read_doc_id(s) mach_read_from_8(s) |
80 | |
81 | /** Bind the doc id to a variable */ |
82 | #define fts_bind_doc_id(i, n, v) pars_info_bind_int8_literal(i, n, v) |
83 | |
84 | /** Defines for FTS query mode, they have the same values as |
85 | those defined in mysql file ft_global.h */ |
86 | #define FTS_NL 0 |
87 | #define FTS_BOOL 1 |
88 | #define FTS_SORTED 2 |
89 | #define FTS_EXPAND 4 |
90 | #define FTS_NO_RANKING 8 |
91 | #define FTS_PROXIMITY 16 |
92 | #define FTS_PHRASE 32 |
93 | #define FTS_OPT_RANKING 64 |
94 | |
95 | #define FTS_INDEX_TABLE_IND_NAME "FTS_INDEX_TABLE_IND" |
96 | |
97 | /** The number of FTS index partitions for a fulltext idnex */ |
98 | #define FTS_NUM_AUX_INDEX 6 |
99 | |
100 | /** Threshold where our optimize thread automatically kicks in */ |
101 | #define FTS_OPTIMIZE_THRESHOLD 10000000 |
102 | |
103 | /** Threshold to avoid exhausting of doc ids. Consecutive doc id difference |
104 | should not exceed FTS_DOC_ID_MAX_STEP */ |
105 | #define FTS_DOC_ID_MAX_STEP 65535 |
106 | |
107 | /** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */ |
108 | #define FTS_MAX_WORD_LEN (HA_FT_MAXCHARLEN * 4) |
109 | |
110 | /** Maximum possible Fulltext word length (in characters) */ |
111 | #define FTS_MAX_WORD_LEN_IN_CHAR HA_FT_MAXCHARLEN |
112 | |
113 | /** Number of columns in FTS AUX Tables */ |
114 | #define FTS_DELETED_TABLE_NUM_COLS 1 |
115 | #define FTS_CONFIG_TABLE_NUM_COLS 2 |
116 | #define FTS_AUX_INDEX_TABLE_NUM_COLS 5 |
117 | |
118 | /** DELETED_TABLE(doc_id BIGINT UNSIGNED) */ |
119 | #define FTS_DELETED_TABLE_COL_LEN 8 |
120 | /** CONFIG_TABLE(key CHAR(50), value CHAR(200)) */ |
121 | #define FTS_CONFIG_TABLE_KEY_COL_LEN 50 |
122 | #define FTS_CONFIG_TABLE_VALUE_COL_LEN 200 |
123 | |
124 | #define FTS_INDEX_FIRST_DOC_ID_LEN 8 |
125 | #define FTS_INDEX_LAST_DOC_ID_LEN 8 |
126 | #define FTS_INDEX_DOC_COUNT_LEN 4 |
127 | /* BLOB COLUMN, 0 means VARIABLE SIZE */ |
128 | #define FTS_INDEX_ILIST_LEN 0 |
129 | |
130 | |
131 | /** Variable specifying the FTS parallel sort degree */ |
132 | extern ulong fts_sort_pll_degree; |
133 | |
134 | /** Variable specifying the number of word to optimize for each optimize table |
135 | call */ |
136 | extern ulong fts_num_word_optimize; |
137 | |
138 | /** Variable specifying whether we do additional FTS diagnostic printout |
139 | in the log */ |
140 | extern char fts_enable_diag_print; |
141 | |
142 | /** FTS rank type, which will be between 0 .. 1 inclusive */ |
143 | typedef float fts_rank_t; |
144 | |
145 | /** Type of a row during a transaction. FTS_NOTHING means the row can be |
146 | forgotten from the FTS system's POV, FTS_INVALID is an internal value used |
147 | to mark invalid states. |
148 | |
149 | NOTE: Do not change the order or value of these, fts_trx_row_get_new_state |
150 | depends on them being exactly as they are. */ |
151 | enum fts_row_state { |
152 | FTS_INSERT = 0, |
153 | FTS_MODIFY, |
154 | FTS_DELETE, |
155 | FTS_NOTHING, |
156 | FTS_INVALID |
157 | }; |
158 | |
159 | /** The FTS table types. */ |
160 | enum fts_table_type_t { |
161 | FTS_INDEX_TABLE, /*!< FTS auxiliary table that is |
162 | specific to a particular FTS index |
163 | on a table */ |
164 | |
165 | FTS_COMMON_TABLE /*!< FTS auxiliary table that is common |
166 | for all FTS index on a table */ |
167 | }; |
168 | |
169 | struct fts_doc_t; |
170 | struct fts_cache_t; |
171 | struct fts_token_t; |
172 | struct fts_doc_ids_t; |
173 | struct fts_index_cache_t; |
174 | |
175 | |
176 | /** Initialize the "fts_table" for internal query into FTS auxiliary |
177 | tables */ |
178 | #define FTS_INIT_FTS_TABLE(fts_table, m_suffix, m_type, m_table)\ |
179 | do { \ |
180 | (fts_table)->suffix = m_suffix; \ |
181 | (fts_table)->type = m_type; \ |
182 | (fts_table)->table_id = m_table->id; \ |
183 | (fts_table)->parent = m_table->name.m_name; \ |
184 | (fts_table)->table = m_table; \ |
185 | } while (0); |
186 | |
187 | #define FTS_INIT_INDEX_TABLE(fts_table, m_suffix, m_type, m_index)\ |
188 | do { \ |
189 | (fts_table)->suffix = m_suffix; \ |
190 | (fts_table)->type = m_type; \ |
191 | (fts_table)->table_id = m_index->table->id; \ |
192 | (fts_table)->parent = m_index->table->name.m_name; \ |
193 | (fts_table)->table = m_index->table; \ |
194 | (fts_table)->index_id = m_index->id; \ |
195 | } while (0); |
196 | |
197 | /** Information about changes in a single transaction affecting |
198 | the FTS system. */ |
199 | struct fts_trx_t { |
200 | trx_t* trx; /*!< InnoDB transaction */ |
201 | |
202 | ib_vector_t* savepoints; /*!< Active savepoints, must have at |
203 | least one element, the implied |
204 | savepoint */ |
205 | ib_vector_t* last_stmt; /*!< last_stmt */ |
206 | |
207 | mem_heap_t* heap; /*!< heap */ |
208 | }; |
209 | |
210 | /** Information required for transaction savepoint handling. */ |
211 | struct fts_savepoint_t { |
212 | char* name; /*!< First entry is always NULL, the |
213 | default instance. Otherwise the name |
214 | of the savepoint */ |
215 | |
216 | ib_rbt_t* tables; /*!< Modified FTS tables */ |
217 | }; |
218 | |
219 | /** Information about changed rows in a transaction for a single table. */ |
220 | struct fts_trx_table_t { |
221 | dict_table_t* table; /*!< table */ |
222 | |
223 | fts_trx_t* fts_trx; /*!< link to parent */ |
224 | |
225 | ib_rbt_t* rows; /*!< rows changed; indexed by doc-id, |
226 | cells are fts_trx_row_t* */ |
227 | |
228 | fts_doc_ids_t* added_doc_ids; /*!< list of added doc ids (NULL until |
229 | the first addition) */ |
230 | |
231 | /*!< for adding doc ids */ |
232 | que_t* docs_added_graph; |
233 | }; |
234 | |
235 | /** Information about one changed row in a transaction. */ |
236 | struct fts_trx_row_t { |
237 | doc_id_t doc_id; /*!< Id of the ins/upd/del document */ |
238 | |
239 | fts_row_state state; /*!< state of the row */ |
240 | |
241 | ib_vector_t* fts_indexes; /*!< The indexes that are affected */ |
242 | }; |
243 | |
244 | /** List of document ids that were added during a transaction. This |
245 | list is passed on to a background 'Add' thread and OPTIMIZE, so it |
246 | needs its own memory heap. */ |
247 | struct fts_doc_ids_t { |
248 | ib_vector_t* doc_ids; /*!< document ids (each element is |
249 | of type doc_id_t). */ |
250 | |
251 | ib_alloc_t* self_heap; /*!< Allocator used to create an |
252 | instance of this type and the |
253 | doc_ids vector */ |
254 | }; |
255 | |
256 | // FIXME: Get rid of this if possible. |
257 | /** Since MySQL's character set support for Unicode is woefully inadequate |
258 | (it supports basic operations like isalpha etc. only for 8-bit characters), |
259 | we have to implement our own. We use UTF-16 without surrogate processing |
260 | as our in-memory format. This typedef is a single such character. */ |
261 | typedef unsigned short ib_uc_t; |
262 | |
263 | /** An UTF-16 ro UTF-8 string. */ |
264 | struct fts_string_t { |
265 | byte* f_str; /*!< string, not necessary terminated in |
266 | any way */ |
267 | ulint f_len; /*!< Length of the string in bytes */ |
268 | ulint f_n_char; /*!< Number of characters */ |
269 | }; |
270 | |
271 | /** Query ranked doc ids. */ |
272 | struct fts_ranking_t { |
273 | doc_id_t doc_id; /*!< Document id */ |
274 | |
275 | fts_rank_t rank; /*!< Rank is between 0 .. 1 */ |
276 | |
277 | byte* words; /*!< this contains the words |
278 | that were queried |
279 | and found in this document */ |
280 | ulint words_len; /*!< words len */ |
281 | }; |
282 | |
283 | /** Query result. */ |
284 | struct fts_result_t { |
285 | ib_rbt_node_t* current; /*!< Current element */ |
286 | |
287 | ib_rbt_t* rankings_by_id; /*!< RB tree of type fts_ranking_t |
288 | indexed by doc id */ |
289 | ib_rbt_t* rankings_by_rank;/*!< RB tree of type fts_ranking_t |
290 | indexed by rank */ |
291 | }; |
292 | |
293 | /** This is used to generate the FTS auxiliary table name, we need the |
294 | table id and the index id to generate the column specific FTS auxiliary |
295 | table name. */ |
296 | struct fts_table_t { |
297 | const char* parent; /*!< Parent table name, this is |
298 | required only for the database |
299 | name */ |
300 | |
301 | fts_table_type_t |
302 | type; /*!< The auxiliary table type */ |
303 | |
304 | table_id_t table_id; /*!< The table id */ |
305 | |
306 | index_id_t index_id; /*!< The index id */ |
307 | |
308 | const char* suffix; /*!< The suffix of the fts auxiliary |
309 | table name, can be NULL, not used |
310 | everywhere (yet) */ |
311 | const dict_table_t* |
312 | table; /*!< Parent table */ |
313 | CHARSET_INFO* charset; /*!< charset info if it is for FTS |
314 | index auxiliary table */ |
315 | }; |
316 | |
317 | enum fts_status { |
318 | BG_THREAD_STOP = 1, /*!< TRUE if the FTS background thread |
319 | has finished reading the ADDED table, |
320 | meaning more items can be added to |
321 | the table. */ |
322 | |
323 | BG_THREAD_READY = 2, /*!< TRUE if the FTS background thread |
324 | is ready */ |
325 | |
326 | ADD_THREAD_STARTED = 4, /*!< TRUE if the FTS add thread |
327 | has started */ |
328 | |
329 | ADDED_TABLE_SYNCED = 8, /*!< TRUE if the ADDED table record is |
330 | sync-ed after crash recovery */ |
331 | |
332 | TABLE_DICT_LOCKED = 16 /*!< Set if the table has |
333 | dict_sys->mutex */ |
334 | }; |
335 | |
336 | typedef enum fts_status fts_status_t; |
337 | |
338 | /** The state of the FTS sub system. */ |
339 | class fts_t { |
340 | public: |
341 | /** fts_t constructor. |
342 | @param[in] table table with FTS indexes |
343 | @param[in,out] heap memory heap where 'this' is stored */ |
344 | fts_t( |
345 | const dict_table_t* table, |
346 | mem_heap_t* heap); |
347 | |
348 | /** fts_t destructor. */ |
349 | ~fts_t(); |
350 | |
351 | /** Mutex protecting bg_threads* and fts_add_wq. */ |
352 | ib_mutex_t bg_threads_mutex; |
353 | |
354 | /** Number of background threads accessing this table. */ |
355 | ulint bg_threads; |
356 | |
357 | /** Status bit regarding fts running state. TRUE if background |
358 | threads running should stop themselves. */ |
359 | ulint fts_status; |
360 | |
361 | /** Work queue for scheduling jobs for the FTS 'Add' thread, or NULL |
362 | if the thread has not yet been created. Each work item is a |
363 | fts_trx_doc_ids_t*. */ |
364 | ib_wqueue_t* add_wq; |
365 | |
366 | /** FTS memory buffer for this table, or NULL if the table has no FTS |
367 | index. */ |
368 | fts_cache_t* cache; |
369 | |
370 | /** FTS doc id hidden column number in the CLUSTERED index. */ |
371 | ulint doc_col; |
372 | |
373 | /** Vector of FTS indexes, this is mainly for caching purposes. */ |
374 | ib_vector_t* indexes; |
375 | |
376 | /** Heap for fts_t allocation. */ |
377 | mem_heap_t* fts_heap; |
378 | }; |
379 | |
380 | struct fts_stopword_t; |
381 | |
382 | /** status bits for fts_stopword_t status field. */ |
383 | #define STOPWORD_NOT_INIT 0x1 |
384 | #define STOPWORD_OFF 0x2 |
385 | #define STOPWORD_FROM_DEFAULT 0x4 |
386 | #define STOPWORD_USER_TABLE 0x8 |
387 | |
388 | extern const char* fts_default_stopword[]; |
389 | |
390 | /** Variable specifying the maximum FTS cache size for each table */ |
391 | extern ulong fts_max_cache_size; |
392 | |
393 | /** Variable specifying the total memory allocated for FTS cache */ |
394 | extern ulong fts_max_total_cache_size; |
395 | |
396 | /** Variable specifying the FTS result cache limit for each query */ |
397 | extern ulong fts_result_cache_limit; |
398 | |
399 | /** Variable specifying the maximum FTS max token size */ |
400 | extern ulong fts_max_token_size; |
401 | |
402 | /** Variable specifying the minimum FTS max token size */ |
403 | extern ulong fts_min_token_size; |
404 | |
405 | /** Whether the total memory used for FTS cache is exhausted, and we will |
406 | need a sync to free some memory */ |
407 | extern bool fts_need_sync; |
408 | |
409 | /** Variable specifying the table that has Fulltext index to display its |
410 | content through information schema table */ |
411 | extern char* fts_internal_tbl_name; |
412 | extern char* fts_internal_tbl_name2; |
413 | |
414 | #define fts_que_graph_free(graph) \ |
415 | do { \ |
416 | mutex_enter(&dict_sys->mutex); \ |
417 | que_graph_free(graph); \ |
418 | mutex_exit(&dict_sys->mutex); \ |
419 | } while (0) |
420 | |
421 | /******************************************************************//** |
422 | Create a FTS cache. */ |
423 | fts_cache_t* |
424 | fts_cache_create( |
425 | /*=============*/ |
426 | dict_table_t* table); /*!< table owns the FTS cache */ |
427 | |
428 | /******************************************************************//** |
429 | Create a FTS index cache. |
430 | @return Index Cache */ |
431 | fts_index_cache_t* |
432 | fts_cache_index_cache_create( |
433 | /*=========================*/ |
434 | dict_table_t* table, /*!< in: table with FTS index */ |
435 | dict_index_t* index); /*!< in: FTS index */ |
436 | |
437 | /******************************************************************//** |
438 | Get the next available document id. This function creates a new |
439 | transaction to generate the document id. |
440 | @return DB_SUCCESS if OK */ |
441 | dberr_t |
442 | fts_get_next_doc_id( |
443 | /*================*/ |
444 | const dict_table_t* table, /*!< in: table */ |
445 | doc_id_t* doc_id);/*!< out: new document id */ |
446 | /*********************************************************************//** |
447 | Update the next and last Doc ID in the CONFIG table to be the input |
448 | "doc_id" value (+ 1). We would do so after each FTS index build or |
449 | table truncate */ |
450 | void |
451 | fts_update_next_doc_id( |
452 | /*===================*/ |
453 | trx_t* trx, /*!< in/out: transaction */ |
454 | const dict_table_t* table, /*!< in: table */ |
455 | const char* table_name, /*!< in: table name, or NULL */ |
456 | doc_id_t doc_id); /*!< in: DOC ID to set */ |
457 | |
458 | /******************************************************************//** |
459 | Create a new fts_doc_ids_t. |
460 | @return new fts_doc_ids_t. */ |
461 | fts_doc_ids_t* |
462 | fts_doc_ids_create(void); |
463 | /*=====================*/ |
464 | |
465 | /******************************************************************//** |
466 | Free a fts_doc_ids_t. */ |
467 | void |
468 | fts_doc_ids_free( |
469 | /*=============*/ |
470 | fts_doc_ids_t* doc_ids); /*!< in: doc_ids to free */ |
471 | |
472 | /******************************************************************//** |
473 | Notify the FTS system about an operation on an FTS-indexed table. */ |
474 | void |
475 | fts_trx_add_op( |
476 | /*===========*/ |
477 | trx_t* trx, /*!< in: InnoDB transaction */ |
478 | dict_table_t* table, /*!< in: table */ |
479 | doc_id_t doc_id, /*!< in: doc id */ |
480 | fts_row_state state, /*!< in: state of the row */ |
481 | ib_vector_t* fts_indexes); /*!< in: FTS indexes affected |
482 | (NULL=all) */ |
483 | |
484 | /******************************************************************//** |
485 | Free an FTS trx. */ |
486 | void |
487 | fts_trx_free( |
488 | /*=========*/ |
489 | fts_trx_t* fts_trx); /*!< in, own: FTS trx */ |
490 | |
491 | /** Creates the common auxiliary tables needed for supporting an FTS index |
492 | on the given table. row_mysql_lock_data_dictionary must have been called |
493 | before this. |
494 | The following tables are created. |
495 | CREATE TABLE $FTS_PREFIX_DELETED |
496 | (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) |
497 | CREATE TABLE $FTS_PREFIX_DELETED_CACHE |
498 | (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) |
499 | CREATE TABLE $FTS_PREFIX_BEING_DELETED |
500 | (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) |
501 | CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE |
502 | (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) |
503 | CREATE TABLE $FTS_PREFIX_CONFIG |
504 | (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key) |
505 | @param[in,out] trx transaction |
506 | @param[in] table table with FTS index |
507 | @param[in] skip_doc_id_index Skip index on doc id |
508 | @return DB_SUCCESS if succeed */ |
509 | dberr_t |
510 | fts_create_common_tables( |
511 | trx_t* trx, |
512 | dict_table_t* table, |
513 | bool skip_doc_id_index) |
514 | MY_ATTRIBUTE((nonnull, warn_unused_result)); |
515 | /** Creates the column specific ancillary tables needed for supporting an |
516 | FTS index on the given table. row_mysql_lock_data_dictionary must have |
517 | been called before this. |
518 | |
519 | All FTS AUX Index tables have the following schema. |
520 | CREAT TABLE $FTS_PREFIX_INDEX_[1-6]( |
521 | word VARCHAR(FTS_MAX_WORD_LEN), |
522 | first_doc_id INT NOT NULL, |
523 | last_doc_id UNSIGNED NOT NULL, |
524 | doc_count UNSIGNED INT NOT NULL, |
525 | ilist VARBINARY NOT NULL, |
526 | UNIQUE CLUSTERED INDEX ON (word, first_doc_id)) |
527 | @param[in,out] trx dictionary transaction |
528 | @param[in] index fulltext index |
529 | @param[in] id table id |
530 | @return DB_SUCCESS or error code */ |
531 | dberr_t |
532 | fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id) |
533 | MY_ATTRIBUTE((nonnull, warn_unused_result)); |
534 | /******************************************************************//** |
535 | Add the FTS document id hidden column. */ |
536 | void |
537 | fts_add_doc_id_column( |
538 | /*==================*/ |
539 | dict_table_t* table, /*!< in/out: Table with FTS index */ |
540 | mem_heap_t* heap); /*!< in: temporary memory heap, or NULL */ |
541 | |
542 | /*********************************************************************//** |
543 | Drops the ancillary tables needed for supporting an FTS index on the |
544 | given table. row_mysql_lock_data_dictionary must have been called before |
545 | this. |
546 | @return DB_SUCCESS or error code */ |
547 | dberr_t |
548 | fts_drop_tables( |
549 | /*============*/ |
550 | trx_t* trx, /*!< in: transaction */ |
551 | dict_table_t* table); /*!< in: table has the FTS |
552 | index */ |
553 | /******************************************************************//** |
554 | The given transaction is about to be committed; do whatever is necessary |
555 | from the FTS system's POV. |
556 | @return DB_SUCCESS or error code */ |
557 | dberr_t |
558 | fts_commit( |
559 | /*=======*/ |
560 | trx_t* trx) /*!< in: transaction */ |
561 | MY_ATTRIBUTE((warn_unused_result)); |
562 | |
563 | /** FTS Query entry point. |
564 | @param[in] index fts index to search |
565 | @param[in] flags FTS search mode |
566 | @param[in] query_str FTS query |
567 | @param[in] query_len FTS query string len in bytes |
568 | @param[in,out] result result doc ids |
569 | @return DB_SUCCESS if successful otherwise error code */ |
570 | dberr_t |
571 | fts_query( |
572 | dict_index_t* index, |
573 | uint flags, |
574 | const byte* query_str, |
575 | ulint query_len, |
576 | fts_result_t** result) |
577 | MY_ATTRIBUTE((warn_unused_result)); |
578 | |
579 | /******************************************************************//** |
580 | Retrieve the FTS Relevance Ranking result for doc with doc_id |
581 | @return the relevance ranking value. */ |
582 | float |
583 | fts_retrieve_ranking( |
584 | /*=================*/ |
585 | fts_result_t* result, /*!< in: FTS result structure */ |
586 | doc_id_t doc_id); /*!< in: the interested document |
587 | doc_id */ |
588 | |
589 | /******************************************************************//** |
590 | FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */ |
591 | void |
592 | fts_query_sort_result_on_rank( |
593 | /*==========================*/ |
594 | fts_result_t* result); /*!< out: result instance |
595 | to sort.*/ |
596 | |
597 | /******************************************************************//** |
598 | FTS Query free result, returned by fts_query(). */ |
599 | void |
600 | fts_query_free_result( |
601 | /*==================*/ |
602 | fts_result_t* result); /*!< in: result instance |
603 | to free.*/ |
604 | |
605 | /******************************************************************//** |
606 | Extract the doc id from the FTS hidden column. */ |
607 | doc_id_t |
608 | fts_get_doc_id_from_row( |
609 | /*====================*/ |
610 | dict_table_t* table, /*!< in: table */ |
611 | dtuple_t* row); /*!< in: row whose FTS doc id we |
612 | want to extract.*/ |
613 | |
614 | /** Extract the doc id from the record that belongs to index. |
615 | @param[in] table table |
616 | @param[in] rec record contains FTS_DOC_ID |
617 | @param[in] index index of rec |
618 | @param[in] heap heap memory |
619 | @return doc id that was extracted from rec */ |
620 | doc_id_t |
621 | fts_get_doc_id_from_rec( |
622 | dict_table_t* table, |
623 | const rec_t* rec, |
624 | const dict_index_t* index, |
625 | mem_heap_t* heap); |
626 | |
627 | /** Add new fts doc id to the update vector. |
628 | @param[in] table the table that contains the FTS index. |
629 | @param[in,out] ufield the fts doc id field in the update vector. |
630 | No new memory is allocated for this in this |
631 | function. |
632 | @param[in,out] next_doc_id the fts doc id that has been added to the |
633 | update vector. If 0, a new fts doc id is |
634 | automatically generated. The memory provided |
635 | for this argument will be used by the update |
636 | vector. Ensure that the life time of this |
637 | memory matches that of the update vector. |
638 | @return the fts doc id used in the update vector */ |
639 | doc_id_t |
640 | fts_update_doc_id( |
641 | dict_table_t* table, |
642 | upd_field_t* ufield, |
643 | doc_id_t* next_doc_id); |
644 | |
645 | /******************************************************************//** |
646 | FTS initialize. */ |
647 | void |
648 | fts_startup(void); |
649 | /*==============*/ |
650 | |
651 | #if 0 // TODO: Enable this in WL#6608 |
652 | /******************************************************************//** |
653 | Signal FTS threads to initiate shutdown. */ |
654 | void |
655 | fts_start_shutdown( |
656 | /*===============*/ |
657 | dict_table_t* table, /*!< in: table with FTS |
658 | indexes */ |
659 | fts_t* fts); /*!< in: fts instance to |
660 | shutdown */ |
661 | |
662 | /******************************************************************//** |
663 | Wait for FTS threads to shutdown. */ |
664 | void |
665 | fts_shutdown( |
666 | /*=========*/ |
667 | dict_table_t* table, /*!< in: table with FTS |
668 | indexes */ |
669 | fts_t* fts); /*!< in: fts instance to |
670 | shutdown */ |
671 | #endif |
672 | |
673 | /******************************************************************//** |
674 | Create an instance of fts_t. |
675 | @return instance of fts_t */ |
676 | fts_t* |
677 | fts_create( |
678 | /*=======*/ |
679 | dict_table_t* table); /*!< out: table with FTS |
680 | indexes */ |
681 | |
682 | /**********************************************************************//** |
683 | Free the FTS resources. */ |
684 | void |
685 | fts_free( |
686 | /*=====*/ |
687 | dict_table_t* table); /*!< in/out: table with |
688 | FTS indexes */ |
689 | |
690 | /*********************************************************************//** |
691 | Run OPTIMIZE on the given table. |
692 | @return DB_SUCCESS if all OK */ |
693 | dberr_t |
694 | fts_optimize_table( |
695 | /*===============*/ |
696 | dict_table_t* table); /*!< in: table to optimiza */ |
697 | |
698 | /**********************************************************************//** |
699 | Startup the optimize thread and create the work queue. */ |
700 | void |
701 | fts_optimize_init(void); |
702 | /*====================*/ |
703 | |
704 | /****************************************************************//** |
705 | Drops index ancillary tables for a FTS index |
706 | @return DB_SUCCESS or error code */ |
707 | dberr_t |
708 | fts_drop_index_tables( |
709 | /*==================*/ |
710 | trx_t* trx, /*!< in: transaction */ |
711 | dict_index_t* index) /*!< in: Index to drop */ |
712 | MY_ATTRIBUTE((warn_unused_result)); |
713 | |
714 | /******************************************************************//** |
715 | Remove the table from the OPTIMIZER's list. We do wait for |
716 | acknowledgement from the consumer of the message. */ |
717 | void |
718 | fts_optimize_remove_table( |
719 | /*======================*/ |
720 | dict_table_t* table); /*!< in: table to remove */ |
721 | |
722 | /** Shutdown fts optimize thread. */ |
723 | void |
724 | fts_optimize_shutdown(); |
725 | |
726 | /** Send sync fts cache for the table. |
727 | @param[in] table table to sync */ |
728 | void |
729 | fts_optimize_request_sync_table( |
730 | dict_table_t* table); |
731 | |
732 | /**********************************************************************//** |
733 | Take a FTS savepoint. */ |
734 | void |
735 | fts_savepoint_take( |
736 | /*===============*/ |
737 | fts_trx_t* fts_trx, /*!< in: fts transaction */ |
738 | const char* name); /*!< in: savepoint name */ |
739 | |
740 | /**********************************************************************//** |
741 | Refresh last statement savepoint. */ |
742 | void |
743 | fts_savepoint_laststmt_refresh( |
744 | /*===========================*/ |
745 | trx_t* trx); /*!< in: transaction */ |
746 | |
747 | /**********************************************************************//** |
748 | Release the savepoint data identified by name. */ |
749 | void |
750 | fts_savepoint_release( |
751 | /*==================*/ |
752 | trx_t* trx, /*!< in: transaction */ |
753 | const char* name); /*!< in: savepoint name */ |
754 | |
755 | /** Clear cache. |
756 | @param[in,out] cache fts cache */ |
757 | void |
758 | fts_cache_clear( |
759 | fts_cache_t* cache); |
760 | |
761 | /*********************************************************************//** |
762 | Initialize things in cache. */ |
763 | void |
764 | fts_cache_init( |
765 | /*===========*/ |
766 | fts_cache_t* cache); /*!< in: cache */ |
767 | |
768 | /*********************************************************************//** |
769 | Rollback to and including savepoint indentified by name. */ |
770 | void |
771 | fts_savepoint_rollback( |
772 | /*===================*/ |
773 | trx_t* trx, /*!< in: transaction */ |
774 | const char* name); /*!< in: savepoint name */ |
775 | |
776 | /*********************************************************************//** |
777 | Rollback to and including savepoint indentified by name. */ |
778 | void |
779 | fts_savepoint_rollback_last_stmt( |
780 | /*=============================*/ |
781 | trx_t* trx); /*!< in: transaction */ |
782 | |
783 | /***********************************************************************//** |
784 | Drop all orphaned FTS auxiliary tables, those that don't have a parent |
785 | table or FTS index defined on them. */ |
786 | void |
787 | fts_drop_orphaned_tables(void); |
788 | /*==========================*/ |
789 | |
790 | /** Run SYNC on the table, i.e., write out data from the cache to the |
791 | FTS auxiliary INDEX table and clear the cache at the end. |
792 | @param[in,out] table fts table |
793 | @param[in] unlock_cache whether unlock cache when write node |
794 | @param[in] wait whether wait for existing sync to finish |
795 | @param[in] has_dict whether has dict operation lock |
796 | @return DB_SUCCESS on success, error code on failure. */ |
797 | dberr_t |
798 | fts_sync_table( |
799 | dict_table_t* table, |
800 | bool unlock_cache, |
801 | bool wait, |
802 | bool has_dict); |
803 | |
804 | /****************************************************************//** |
805 | Free the query graph but check whether dict_sys->mutex is already |
806 | held */ |
807 | void |
808 | fts_que_graph_free_check_lock( |
809 | /*==========================*/ |
810 | fts_table_t* fts_table, /*!< in: FTS table */ |
811 | const fts_index_cache_t*index_cache, /*!< in: FTS index cache */ |
812 | que_t* graph); /*!< in: query graph */ |
813 | |
814 | /****************************************************************//** |
815 | Create an FTS index cache. */ |
816 | CHARSET_INFO* |
817 | fts_index_get_charset( |
818 | /*==================*/ |
819 | dict_index_t* index); /*!< in: FTS index */ |
820 | |
821 | /*********************************************************************//** |
822 | Get the initial Doc ID by consulting the CONFIG table |
823 | @return initial Doc ID */ |
824 | doc_id_t |
825 | fts_init_doc_id( |
826 | /*============*/ |
827 | const dict_table_t* table); /*!< in: table */ |
828 | |
829 | /* Get parent table name if it's a fts aux table |
830 | @param[in] aux_table_name aux table name |
831 | @param[in] aux_table_len aux table length |
832 | @return parent table name, or NULL */ |
833 | char* |
834 | fts_get_parent_table_name( |
835 | const char* aux_table_name, |
836 | ulint aux_table_len); |
837 | |
838 | /******************************************************************//** |
839 | compare two character string according to their charset. */ |
840 | extern |
841 | int |
842 | innobase_fts_text_cmp( |
843 | /*==================*/ |
844 | const void* cs, /*!< in: Character set */ |
845 | const void* p1, /*!< in: key */ |
846 | const void* p2); /*!< in: node */ |
847 | |
848 | /******************************************************************//** |
849 | Makes all characters in a string lower case. */ |
850 | extern |
851 | size_t |
852 | innobase_fts_casedn_str( |
853 | /*====================*/ |
854 | CHARSET_INFO* cs, /*!< in: Character set */ |
855 | char* src, /*!< in: string to put in |
856 | lower case */ |
857 | size_t src_len, /*!< in: input string length */ |
858 | char* dst, /*!< in: buffer for result |
859 | string */ |
860 | size_t dst_len); /*!< in: buffer size */ |
861 | |
862 | |
863 | /******************************************************************//** |
864 | compare two character string according to their charset. */ |
865 | extern |
866 | int |
867 | innobase_fts_text_cmp_prefix( |
868 | /*=========================*/ |
869 | const void* cs, /*!< in: Character set */ |
870 | const void* p1, /*!< in: key */ |
871 | const void* p2); /*!< in: node */ |
872 | |
873 | /*************************************************************//** |
874 | Get the next token from the given string and store it in *token. */ |
875 | extern |
876 | ulint |
877 | innobase_mysql_fts_get_token( |
878 | /*=========================*/ |
879 | CHARSET_INFO* charset, /*!< in: Character set */ |
880 | const byte* start, /*!< in: start of text */ |
881 | const byte* end, /*!< in: one character past |
882 | end of text */ |
883 | fts_string_t* token); /*!< out: token's text */ |
884 | |
885 | /*************************************************************//** |
886 | Get token char size by charset |
887 | @return the number of token char size */ |
888 | ulint |
889 | fts_get_token_size( |
890 | /*===============*/ |
891 | const CHARSET_INFO* cs, /*!< in: Character set */ |
892 | const char* token, /*!< in: token */ |
893 | ulint len); /*!< in: token length */ |
894 | |
895 | /*************************************************************//** |
896 | FULLTEXT tokenizer internal in MYSQL_FTPARSER_SIMPLE_MODE |
897 | @return 0 if tokenize sucessfully */ |
898 | int |
899 | fts_tokenize_document_internal( |
900 | /*===========================*/ |
901 | MYSQL_FTPARSER_PARAM* param, /*!< in: parser parameter */ |
902 | const char* doc, /*!< in: document to tokenize */ |
903 | int len); /*!< in: document length */ |
904 | |
905 | /*********************************************************************//** |
906 | Fetch COUNT(*) from specified table. |
907 | @return the number of rows in the table */ |
908 | ulint |
909 | fts_get_rows_count( |
910 | /*===============*/ |
911 | fts_table_t* fts_table); /*!< in: fts table to read */ |
912 | |
913 | /*************************************************************//** |
914 | Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists |
915 | @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */ |
916 | doc_id_t |
917 | fts_get_max_doc_id( |
918 | /*===============*/ |
919 | dict_table_t* table); /*!< in: user table */ |
920 | |
921 | /******************************************************************//** |
922 | Check whether user supplied stopword table exists and is of |
923 | the right format. |
924 | @return the stopword column charset if qualifies */ |
925 | CHARSET_INFO* |
926 | fts_valid_stopword_table( |
927 | /*=====================*/ |
928 | const char* stopword_table_name); /*!< in: Stopword table |
929 | name */ |
930 | /****************************************************************//** |
931 | This function loads specified stopword into FTS cache |
932 | @return TRUE if success */ |
933 | ibool |
934 | fts_load_stopword( |
935 | /*==============*/ |
936 | const dict_table_t* |
937 | table, /*!< in: Table with FTS */ |
938 | trx_t* trx, /*!< in: Transaction */ |
939 | const char* global_stopword_table, /*!< in: Global stopword table |
940 | name */ |
941 | const char* session_stopword_table, /*!< in: Session stopword table |
942 | name */ |
943 | ibool stopword_is_on, /*!< in: Whether stopword |
944 | option is turned on/off */ |
945 | ibool reload); /*!< in: Whether it is during |
946 | reload of FTS table */ |
947 | |
948 | /****************************************************************//** |
949 | Read the rows from the FTS index |
950 | @return DB_SUCCESS if OK */ |
951 | dberr_t |
952 | fts_table_fetch_doc_ids( |
953 | /*====================*/ |
954 | trx_t* trx, /*!< in: transaction */ |
955 | fts_table_t* fts_table, /*!< in: aux table */ |
956 | fts_doc_ids_t* doc_ids); /*!< in: For collecting |
957 | doc ids */ |
958 | /****************************************************************//** |
959 | This function brings FTS index in sync when FTS index is first |
960 | used. There are documents that have not yet sync-ed to auxiliary |
961 | tables from last server abnormally shutdown, we will need to bring |
962 | such document into FTS cache before any further operations |
963 | @return TRUE if all OK */ |
964 | ibool |
965 | fts_init_index( |
966 | /*===========*/ |
967 | dict_table_t* table, /*!< in: Table with FTS */ |
968 | ibool has_cache_lock); /*!< in: Whether we already |
969 | have cache lock */ |
970 | /*******************************************************************//** |
971 | Add a newly create index in FTS cache */ |
972 | void |
973 | fts_add_index( |
974 | /*==========*/ |
975 | dict_index_t* index, /*!< FTS index to be added */ |
976 | dict_table_t* table); /*!< table */ |
977 | |
978 | /*******************************************************************//** |
979 | Drop auxiliary tables related to an FTS index |
980 | @return DB_SUCCESS or error number */ |
981 | dberr_t |
982 | fts_drop_index( |
983 | /*===========*/ |
984 | dict_table_t* table, /*!< in: Table where indexes are dropped */ |
985 | dict_index_t* index, /*!< in: Index to be dropped */ |
986 | trx_t* trx); /*!< in: Transaction for the drop */ |
987 | |
988 | /****************************************************************//** |
989 | Rename auxiliary tables for all fts index for a table |
990 | @return DB_SUCCESS or error code */ |
991 | dberr_t |
992 | fts_rename_aux_tables( |
993 | /*==================*/ |
994 | dict_table_t* table, /*!< in: user Table */ |
995 | const char* new_name, /*!< in: new table name */ |
996 | trx_t* trx); /*!< in: transaction */ |
997 | |
998 | /*******************************************************************//** |
999 | Check indexes in the fts->indexes is also present in index cache and |
1000 | table->indexes list |
1001 | @return TRUE if all indexes match */ |
1002 | ibool |
1003 | fts_check_cached_index( |
1004 | /*===================*/ |
1005 | dict_table_t* table); /*!< in: Table where indexes are dropped */ |
1006 | |
1007 | /** Check if the all the auxillary tables associated with FTS index are in |
1008 | consistent state. For now consistency is check only by ensuring |
1009 | index->page_no != FIL_NULL |
1010 | @param[out] base_table table has host fts index |
1011 | @param[in,out] trx trx handler */ |
1012 | void |
1013 | fts_check_corrupt( |
1014 | dict_table_t* base_table, |
1015 | trx_t* trx); |
1016 | |
1017 | /** Fetch the document from tuple, tokenize the text data and |
1018 | insert the text data into fts auxiliary table and |
1019 | its cache. Moreover this tuple fields doesn't contain any information |
1020 | about externally stored field. This tuple contains data directly |
1021 | converted from mysql. |
1022 | @param[in] ftt FTS transaction table |
1023 | @param[in] doc_id doc id |
1024 | @param[in] tuple tuple from where data can be retrieved |
1025 | and tuple should be arranged in table |
1026 | schema order. */ |
1027 | void |
1028 | fts_add_doc_from_tuple( |
1029 | fts_trx_table_t*ftt, |
1030 | doc_id_t doc_id, |
1031 | const dtuple_t* tuple); |
1032 | |
1033 | /** Create an FTS trx. |
1034 | @param[in,out] trx InnoDB Transaction |
1035 | @return FTS transaction. */ |
1036 | fts_trx_t* |
1037 | fts_trx_create( |
1038 | trx_t* trx); |
1039 | |
1040 | #endif /*!< fts0fts.h */ |
1041 | |