1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2017, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /******************************************************************//** |
21 | @file include/fts0priv.h |
22 | Full text search internal header file |
23 | |
24 | Created 2011/09/02 Sunny Bains |
25 | ***********************************************************************/ |
26 | |
27 | #ifndef INNOBASE_FTS0PRIV_H |
28 | #define INNOBASE_FTS0PRIV_H |
29 | |
30 | #include "univ.i" |
31 | #include "dict0dict.h" |
32 | #include "pars0pars.h" |
33 | #include "que0que.h" |
34 | #include "que0types.h" |
35 | #include "fts0types.h" |
36 | |
37 | /* The various states of the FTS sub system pertaining to a table with |
38 | FTS indexes defined on it. */ |
39 | enum fts_table_state_enum { |
40 | /* !<This must be 0 since we insert |
41 | a hard coded '0' at create time |
42 | to the config table */ |
43 | |
44 | FTS_TABLE_STATE_RUNNING = 0, /*!< Auxiliary tables created OK */ |
45 | |
46 | FTS_TABLE_STATE_OPTIMIZING, /*!< This is a substate of RUNNING */ |
47 | |
48 | FTS_TABLE_STATE_DELETED /*!< All aux tables to be dropped when |
49 | it's safe to do so */ |
50 | }; |
51 | |
52 | typedef enum fts_table_state_enum fts_table_state_t; |
53 | |
54 | /** The default time to wait for the background thread (in microsecnds). */ |
55 | #define FTS_MAX_BACKGROUND_THREAD_WAIT 10000 |
56 | |
57 | /** Maximum number of iterations to wait before we complain */ |
58 | #define FTS_BACKGROUND_THREAD_WAIT_COUNT 1000 |
59 | |
60 | /** The maximum length of the config table's value column in bytes */ |
61 | #define FTS_MAX_CONFIG_NAME_LEN 64 |
62 | |
63 | /** The maximum length of the config table's value column in bytes */ |
64 | #define FTS_MAX_CONFIG_VALUE_LEN 1024 |
65 | |
66 | /** Approx. upper limit of ilist length in bytes. */ |
67 | #define FTS_ILIST_MAX_SIZE (64 * 1024) |
68 | |
69 | /** FTS config table name parameters */ |
70 | |
71 | /** The number of seconds after which an OPTIMIZE run will stop */ |
72 | #define FTS_OPTIMIZE_LIMIT_IN_SECS "optimize_checkpoint_limit" |
73 | |
74 | /** The next doc id */ |
75 | #define FTS_SYNCED_DOC_ID "synced_doc_id" |
76 | |
77 | /** The last word that was OPTIMIZED */ |
78 | #define FTS_LAST_OPTIMIZED_WORD "last_optimized_word" |
79 | |
80 | /** Total number of documents that have been deleted. The next_doc_id |
81 | minus this count gives us the total number of documents. */ |
82 | #define FTS_TOTAL_DELETED_COUNT "deleted_doc_count" |
83 | |
84 | /** Total number of words parsed from all documents */ |
85 | #define FTS_TOTAL_WORD_COUNT "total_word_count" |
86 | |
87 | /** Start of optimize of an FTS index */ |
88 | #define FTS_OPTIMIZE_START_TIME "optimize_start_time" |
89 | |
90 | /** End of optimize for an FTS index */ |
91 | #define FTS_OPTIMIZE_END_TIME "optimize_end_time" |
92 | |
93 | /** User specified stopword table name */ |
94 | #define FTS_STOPWORD_TABLE_NAME "stopword_table_name" |
95 | |
96 | /** Whether to use (turn on/off) stopword */ |
97 | #define FTS_USE_STOPWORD "use_stopword" |
98 | |
99 | /** State of the FTS system for this table. It can be one of |
100 | RUNNING, OPTIMIZING, DELETED. */ |
101 | #define FTS_TABLE_STATE "table_state" |
102 | |
103 | /** The minimum length of an FTS auxiliary table names's id component |
104 | e.g., For an auxiliary table name |
105 | |
106 | FTS_<TABLE_ID>_SUFFIX |
107 | |
108 | This constant is for the minimum length required to store the <TABLE_ID> |
109 | component. |
110 | */ |
111 | #define FTS_AUX_MIN_TABLE_ID_LENGTH 48 |
112 | |
113 | /** Maximum length of an integer stored in the config table value column. */ |
114 | #define FTS_MAX_INT_LEN 32 |
115 | |
116 | /******************************************************************//** |
117 | Parse an SQL string. %s is replaced with the table's id. |
118 | @return query graph */ |
119 | que_t* |
120 | fts_parse_sql( |
121 | /*==========*/ |
122 | fts_table_t* fts_table, /*!< in: FTS aux table */ |
123 | pars_info_t* info, /*!< in: info struct, or NULL */ |
124 | const char* sql) /*!< in: SQL string to evaluate */ |
125 | MY_ATTRIBUTE((warn_unused_result)); |
126 | |
127 | /******************************************************************//** |
128 | Evaluate a parsed SQL statement |
129 | @return DB_SUCCESS or error code */ |
130 | dberr_t |
131 | fts_eval_sql( |
132 | /*=========*/ |
133 | trx_t* trx, /*!< in: transaction */ |
134 | que_t* graph) /*!< in: Parsed statement */ |
135 | MY_ATTRIBUTE((warn_unused_result)); |
136 | |
137 | /******************************************************************//** |
138 | Construct the name of an ancillary FTS table for the given table. |
139 | Caller must allocate enough memory(usually size of MAX_FULL_NAME_LEN) |
140 | for param 'table_name'. */ |
141 | void |
142 | fts_get_table_name( |
143 | /*===============*/ |
144 | const fts_table_t* |
145 | fts_table, /*!< in: FTS aux table info */ |
146 | char* table_name); /*!< in/out: aux table name */ |
147 | |
148 | /******************************************************************//** |
149 | Construct the column specification part of the SQL string for selecting the |
150 | indexed FTS columns for the given table. Adds the necessary bound |
151 | ids to the given 'info' and returns the SQL string. Examples: |
152 | |
153 | One indexed column named "text": |
154 | |
155 | "$sel0", |
156 | info/ids: sel0 -> "text" |
157 | |
158 | Two indexed columns named "subject" and "content": |
159 | |
160 | "$sel0, $sel1", |
161 | info/ids: sel0 -> "subject", sel1 -> "content", |
162 | @return heap-allocated WHERE string */ |
163 | const char* |
164 | fts_get_select_columns_str( |
165 | /*=======================*/ |
166 | dict_index_t* index, /*!< in: FTS index */ |
167 | pars_info_t* info, /*!< in/out: parser info */ |
168 | mem_heap_t* heap) /*!< in: memory heap */ |
169 | MY_ATTRIBUTE((warn_unused_result)); |
170 | |
171 | /** define for fts_doc_fetch_by_doc_id() "option" value, defines whether |
172 | we want to get Doc whose ID is equal to or greater or smaller than supplied |
173 | ID */ |
174 | #define FTS_FETCH_DOC_BY_ID_EQUAL 1 |
175 | #define FTS_FETCH_DOC_BY_ID_LARGE 2 |
176 | #define FTS_FETCH_DOC_BY_ID_SMALL 3 |
177 | |
178 | /*************************************************************//** |
179 | Fetch document (= a single row's indexed text) with the given |
180 | document id. |
181 | @return: DB_SUCCESS if fetch is successful, else error */ |
182 | dberr_t |
183 | fts_doc_fetch_by_doc_id( |
184 | /*====================*/ |
185 | fts_get_doc_t* get_doc, /*!< in: state */ |
186 | doc_id_t doc_id, /*!< in: id of document to fetch */ |
187 | dict_index_t* index_to_use, /*!< in: caller supplied FTS index, |
188 | or NULL */ |
189 | ulint option, /*!< in: search option, if it is |
190 | greater than doc_id or equal */ |
191 | fts_sql_callback |
192 | callback, /*!< in: callback to read |
193 | records */ |
194 | void* arg); /*!< in: callback arg */ |
195 | |
196 | /*******************************************************************//** |
197 | Callback function for fetch that stores the text of an FTS document, |
198 | converting each column to UTF-16. |
199 | @return always FALSE */ |
200 | ibool |
201 | fts_query_expansion_fetch_doc( |
202 | /*==========================*/ |
203 | void* row, /*!< in: sel_node_t* */ |
204 | void* user_arg); /*!< in: fts_doc_t* */ |
205 | |
206 | /******************************************************************** |
207 | Write out a single word's data as new entry/entries in the INDEX table. |
208 | @return DB_SUCCESS if all OK. */ |
209 | dberr_t |
210 | fts_write_node( |
211 | /*===========*/ |
212 | trx_t* trx, /*!< in: transaction */ |
213 | que_t** graph, /*!< in: query graph */ |
214 | fts_table_t* fts_table, /*!< in: the FTS aux index */ |
215 | fts_string_t* word, /*!< in: word in UTF-8 */ |
216 | fts_node_t* node) /*!< in: node columns */ |
217 | MY_ATTRIBUTE((warn_unused_result)); |
218 | |
219 | /** Check if a fts token is a stopword or less than fts_min_token_size |
220 | or greater than fts_max_token_size. |
221 | @param[in] token token string |
222 | @param[in] stopwords stopwords rb tree |
223 | @param[in] cs token charset |
224 | @retval true if it is not stopword and length in range |
225 | @retval false if it is stopword or length not in range */ |
226 | bool |
227 | fts_check_token( |
228 | const fts_string_t* token, |
229 | const ib_rbt_t* stopwords, |
230 | const CHARSET_INFO* cs); |
231 | |
232 | /******************************************************************//** |
233 | Initialize a document. */ |
234 | void |
235 | fts_doc_init( |
236 | /*=========*/ |
237 | fts_doc_t* doc); /*!< in: doc to initialize */ |
238 | |
239 | /******************************************************************//** |
240 | Do a binary search for a doc id in the array |
241 | @return +ve index if found -ve index where it should be |
242 | inserted if not found */ |
243 | int |
244 | fts_bsearch( |
245 | /*========*/ |
246 | fts_update_t* array, /*!< in: array to sort */ |
247 | int lower, /*!< in: lower bound of array*/ |
248 | int upper, /*!< in: upper bound of array*/ |
249 | doc_id_t doc_id) /*!< in: doc id to lookup */ |
250 | MY_ATTRIBUTE((warn_unused_result)); |
251 | /******************************************************************//** |
252 | Free document. */ |
253 | void |
254 | fts_doc_free( |
255 | /*=========*/ |
256 | fts_doc_t* doc); /*!< in: document */ |
257 | |
258 | /******************************************************************//** |
259 | Free fts_optimizer_word_t instanace.*/ |
260 | void |
261 | fts_word_free( |
262 | /*==========*/ |
263 | fts_word_t* word); /*!< in: instance to free.*/ |
264 | |
265 | /******************************************************************//** |
266 | Read the rows from the FTS inde |
267 | @return DB_SUCCESS or error code */ |
268 | dberr_t |
269 | fts_index_fetch_nodes( |
270 | /*==================*/ |
271 | trx_t* trx, /*!< in: transaction */ |
272 | que_t** graph, /*!< in: prepared statement */ |
273 | fts_table_t* fts_table, /*!< in: FTS aux table */ |
274 | const fts_string_t* |
275 | word, /*!< in: the word to fetch */ |
276 | fts_fetch_t* fetch); /*!< in: fetch callback.*/ |
277 | |
278 | /******************************************************************//** |
279 | Compare two fts_trx_table_t instances, we actually compare the |
280 | table id's here. |
281 | @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ |
282 | UNIV_INLINE |
283 | int |
284 | fts_trx_table_cmp( |
285 | /*==============*/ |
286 | const void* v1, /*!< in: id1 */ |
287 | const void* v2); /*!< in: id2 */ |
288 | |
289 | /******************************************************************//** |
290 | Compare a table id with a trx_table_t table id. |
291 | @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ |
292 | UNIV_INLINE |
293 | int |
294 | fts_trx_table_id_cmp( |
295 | /*=================*/ |
296 | const void* p1, /*!< in: id1 */ |
297 | const void* p2); /*!< in: id2 */ |
298 | |
299 | /******************************************************************//** |
300 | Commit a transaction. |
301 | @return DB_SUCCESS if all OK */ |
302 | dberr_t |
303 | fts_sql_commit( |
304 | /*===========*/ |
305 | trx_t* trx); /*!< in: transaction */ |
306 | |
307 | /******************************************************************//** |
308 | Rollback a transaction. |
309 | @return DB_SUCCESS if all OK */ |
310 | dberr_t |
311 | fts_sql_rollback( |
312 | /*=============*/ |
313 | trx_t* trx); /*!< in: transaction */ |
314 | |
315 | /******************************************************************//** |
316 | Parse an SQL string. %s is replaced with the table's id. Don't acquire |
317 | the dict mutex |
318 | @return query graph */ |
319 | que_t* |
320 | fts_parse_sql_no_dict_lock( |
321 | /*=======================*/ |
322 | pars_info_t* info, /*!< in: parser info */ |
323 | const char* sql) /*!< in: SQL string to evaluate */ |
324 | MY_ATTRIBUTE((warn_unused_result)); |
325 | |
326 | /******************************************************************//** |
327 | Get value from config table. The caller must ensure that enough |
328 | space is allocated for value to hold the column contents |
329 | @return DB_SUCCESS or error code */ |
330 | dberr_t |
331 | fts_config_get_value( |
332 | /*=================*/ |
333 | trx_t* trx, /* transaction */ |
334 | fts_table_t* fts_table, /*!< in: the indexed FTS table */ |
335 | const char* name, /*!< in: get config value for |
336 | this parameter name */ |
337 | fts_string_t* value); /*!< out: value read from |
338 | config table */ |
339 | /******************************************************************//** |
340 | Get value specific to an FTS index from the config table. The caller |
341 | must ensure that enough space is allocated for value to hold the |
342 | column contents. |
343 | @return DB_SUCCESS or error code */ |
344 | dberr_t |
345 | fts_config_get_index_value( |
346 | /*=======================*/ |
347 | trx_t* trx, /*!< transaction */ |
348 | dict_index_t* index, /*!< in: index */ |
349 | const char* param, /*!< in: get config value for |
350 | this parameter name */ |
351 | fts_string_t* value) /*!< out: value read from |
352 | config table */ |
353 | MY_ATTRIBUTE((warn_unused_result)); |
354 | |
355 | /******************************************************************//** |
356 | Set the value in the config table for name. |
357 | @return DB_SUCCESS or error code */ |
358 | dberr_t |
359 | fts_config_set_value( |
360 | /*=================*/ |
361 | trx_t* trx, /*!< transaction */ |
362 | fts_table_t* fts_table, /*!< in: the indexed FTS table */ |
363 | const char* name, /*!< in: get config value for |
364 | this parameter name */ |
365 | const fts_string_t* |
366 | value); /*!< in: value to update */ |
367 | |
368 | /****************************************************************//** |
369 | Set an ulint value in the config table. |
370 | @return DB_SUCCESS if all OK else error code */ |
371 | dberr_t |
372 | fts_config_set_ulint( |
373 | /*=================*/ |
374 | trx_t* trx, /*!< in: transaction */ |
375 | fts_table_t* fts_table, /*!< in: the indexed FTS table */ |
376 | const char* name, /*!< in: param name */ |
377 | ulint int_value) /*!< in: value */ |
378 | MY_ATTRIBUTE((warn_unused_result)); |
379 | |
380 | /******************************************************************//** |
381 | Set the value specific to an FTS index in the config table. |
382 | @return DB_SUCCESS or error code */ |
383 | dberr_t |
384 | fts_config_set_index_value( |
385 | /*=======================*/ |
386 | trx_t* trx, /*!< transaction */ |
387 | dict_index_t* index, /*!< in: index */ |
388 | const char* param, /*!< in: get config value for |
389 | this parameter name */ |
390 | fts_string_t* value) /*!< out: value read from |
391 | config table */ |
392 | MY_ATTRIBUTE((warn_unused_result)); |
393 | |
394 | #ifdef FTS_OPTIMIZE_DEBUG |
395 | /******************************************************************//** |
396 | Get an ulint value from the config table. |
397 | @return DB_SUCCESS or error code */ |
398 | dberr_t |
399 | fts_config_get_index_ulint( |
400 | /*=======================*/ |
401 | trx_t* trx, /*!< in: transaction */ |
402 | dict_index_t* index, /*!< in: FTS index */ |
403 | const char* name, /*!< in: param name */ |
404 | ulint* int_value) /*!< out: value */ |
405 | MY_ATTRIBUTE((warn_unused_result)); |
406 | #endif /* FTS_OPTIMIZE_DEBUG */ |
407 | |
408 | /******************************************************************//** |
409 | Set an ulint value int the config table. |
410 | @return DB_SUCCESS or error code */ |
411 | dberr_t |
412 | fts_config_set_index_ulint( |
413 | /*=======================*/ |
414 | trx_t* trx, /*!< in: transaction */ |
415 | dict_index_t* index, /*!< in: FTS index */ |
416 | const char* name, /*!< in: param name */ |
417 | ulint int_value) /*!< in: value */ |
418 | MY_ATTRIBUTE((warn_unused_result)); |
419 | |
420 | /******************************************************************//** |
421 | Get an ulint value from the config table. |
422 | @return DB_SUCCESS or error code */ |
423 | dberr_t |
424 | fts_config_get_ulint( |
425 | /*=================*/ |
426 | trx_t* trx, /*!< in: transaction */ |
427 | fts_table_t* fts_table, /*!< in: the indexed FTS table */ |
428 | const char* name, /*!< in: param name */ |
429 | ulint* int_value); /*!< out: value */ |
430 | |
431 | /******************************************************************//** |
432 | Search cache for word. |
433 | @return the word node vector if found else NULL */ |
434 | const ib_vector_t* |
435 | fts_cache_find_word( |
436 | /*================*/ |
437 | const fts_index_cache_t* |
438 | index_cache, /*!< in: cache to search */ |
439 | const fts_string_t* |
440 | text) /*!< in: word to search for */ |
441 | MY_ATTRIBUTE((warn_unused_result)); |
442 | |
443 | /******************************************************************//** |
444 | Append deleted doc ids to vector and sort the vector. */ |
445 | void |
446 | fts_cache_append_deleted_doc_ids( |
447 | /*=============================*/ |
448 | const fts_cache_t* |
449 | cache, /*!< in: cache to use */ |
450 | ib_vector_t* vector); /*!< in: append to this vector */ |
451 | /******************************************************************//** |
452 | Wait for the background thread to start. We poll to detect change |
453 | of state, which is acceptable, since the wait should happen only |
454 | once during startup. |
455 | @return true if the thread started else FALSE (i.e timed out) */ |
456 | ibool |
457 | fts_wait_for_background_thread_to_start( |
458 | /*====================================*/ |
459 | dict_table_t* table, /*!< in: table to which the thread |
460 | is attached */ |
461 | ulint max_wait); /*!< in: time in microseconds, if set |
462 | to 0 then it disables timeout |
463 | checking */ |
464 | /******************************************************************//** |
465 | Search the index specific cache for a particular FTS index. |
466 | @return the index specific cache else NULL */ |
467 | fts_index_cache_t* |
468 | fts_find_index_cache( |
469 | /*================*/ |
470 | const fts_cache_t* |
471 | cache, /*!< in: cache to search */ |
472 | const dict_index_t* |
473 | index) /*!< in: index to search for */ |
474 | MY_ATTRIBUTE((warn_unused_result)); |
475 | |
476 | /******************************************************************//** |
477 | Write the table id to the given buffer (including final NUL). Buffer must be |
478 | at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long. |
479 | @return number of bytes written */ |
480 | UNIV_INLINE |
481 | int |
482 | fts_write_object_id( |
483 | /*================*/ |
484 | ib_id_t id, /*!< in: a table/index id */ |
485 | char* str, /*!< in: buffer to write the id to */ |
486 | bool hex_format MY_ATTRIBUTE((unused))); |
487 | /*!< in: true for fixed hex format, |
488 | false for old ambiguous format */ |
489 | |
490 | /******************************************************************//** |
491 | Read the table id from the string generated by fts_write_object_id(). |
492 | @return TRUE if parse successful */ |
493 | UNIV_INLINE |
494 | ibool |
495 | fts_read_object_id( |
496 | /*===============*/ |
497 | ib_id_t* id, /*!< out: a table id */ |
498 | const char* str) /*!< in: buffer to read from */ |
499 | MY_ATTRIBUTE((warn_unused_result)); |
500 | |
501 | /******************************************************************//** |
502 | Get the table id. |
503 | @return number of bytes written */ |
504 | int |
505 | fts_get_table_id( |
506 | /*=============*/ |
507 | const fts_table_t* |
508 | fts_table, /*!< in: FTS Auxiliary table */ |
509 | char* table_id) /*!< out: table id, must be at least |
510 | FTS_AUX_MIN_TABLE_ID_LENGTH bytes |
511 | long */ |
512 | MY_ATTRIBUTE((warn_unused_result)); |
513 | |
514 | /******************************************************************//** |
515 | Add the table to add to the OPTIMIZER's list. */ |
516 | void |
517 | fts_optimize_add_table( |
518 | /*===================*/ |
519 | dict_table_t* table); /*!< in: table to add */ |
520 | |
521 | /******************************************************************//** |
522 | Construct the prefix name of an FTS table. |
523 | @return own: table name, must be freed with ut_free() */ |
524 | char* |
525 | fts_get_table_name_prefix( |
526 | /*======================*/ |
527 | const fts_table_t* |
528 | fts_table) /*!< in: Auxiliary table type */ |
529 | MY_ATTRIBUTE((warn_unused_result)); |
530 | |
531 | /******************************************************************//** |
532 | Add node positions. */ |
533 | void |
534 | fts_cache_node_add_positions( |
535 | /*=========================*/ |
536 | fts_cache_t* cache, /*!< in: cache */ |
537 | fts_node_t* node, /*!< in: word node */ |
538 | doc_id_t doc_id, /*!< in: doc id */ |
539 | ib_vector_t* positions); /*!< in: fts_token_t::positions */ |
540 | |
541 | /******************************************************************//** |
542 | Create the config table name for retrieving index specific value. |
543 | @return index config parameter name */ |
544 | char* |
545 | fts_config_create_index_param_name( |
546 | /*===============================*/ |
547 | const char* param, /*!< in: base name of param */ |
548 | const dict_index_t* index) /*!< in: index for config */ |
549 | MY_ATTRIBUTE((warn_unused_result)); |
550 | |
551 | #include "fts0priv.ic" |
552 | |
553 | #endif /* INNOBASE_FTS0PRIV_H */ |
554 | |