1/*****************************************************************************
2
3Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2017, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/******************************************************************//**
21@file include/fts0priv.h
22Full text search internal header file
23
24Created 2011/09/02 Sunny Bains
25***********************************************************************/
26
27#ifndef INNOBASE_FTS0PRIV_H
28#define INNOBASE_FTS0PRIV_H
29
30#include "univ.i"
31#include "dict0dict.h"
32#include "pars0pars.h"
33#include "que0que.h"
34#include "que0types.h"
35#include "fts0types.h"
36
37/* The various states of the FTS sub system pertaining to a table with
38FTS indexes defined on it. */
39enum fts_table_state_enum {
40 /* !<This must be 0 since we insert
41 a hard coded '0' at create time
42 to the config table */
43
44 FTS_TABLE_STATE_RUNNING = 0, /*!< Auxiliary tables created OK */
45
46 FTS_TABLE_STATE_OPTIMIZING, /*!< This is a substate of RUNNING */
47
48 FTS_TABLE_STATE_DELETED /*!< All aux tables to be dropped when
49 it's safe to do so */
50};
51
52typedef enum fts_table_state_enum fts_table_state_t;
53
54/** The default time to wait for the background thread (in microsecnds). */
55#define FTS_MAX_BACKGROUND_THREAD_WAIT 10000
56
57/** Maximum number of iterations to wait before we complain */
58#define FTS_BACKGROUND_THREAD_WAIT_COUNT 1000
59
60/** The maximum length of the config table's value column in bytes */
61#define FTS_MAX_CONFIG_NAME_LEN 64
62
63/** The maximum length of the config table's value column in bytes */
64#define FTS_MAX_CONFIG_VALUE_LEN 1024
65
66/** Approx. upper limit of ilist length in bytes. */
67#define FTS_ILIST_MAX_SIZE (64 * 1024)
68
69/** FTS config table name parameters */
70
71/** The number of seconds after which an OPTIMIZE run will stop */
72#define FTS_OPTIMIZE_LIMIT_IN_SECS "optimize_checkpoint_limit"
73
74/** The next doc id */
75#define FTS_SYNCED_DOC_ID "synced_doc_id"
76
77/** The last word that was OPTIMIZED */
78#define FTS_LAST_OPTIMIZED_WORD "last_optimized_word"
79
80/** Total number of documents that have been deleted. The next_doc_id
81minus this count gives us the total number of documents. */
82#define FTS_TOTAL_DELETED_COUNT "deleted_doc_count"
83
84/** Total number of words parsed from all documents */
85#define FTS_TOTAL_WORD_COUNT "total_word_count"
86
87/** Start of optimize of an FTS index */
88#define FTS_OPTIMIZE_START_TIME "optimize_start_time"
89
90/** End of optimize for an FTS index */
91#define FTS_OPTIMIZE_END_TIME "optimize_end_time"
92
93/** User specified stopword table name */
94#define FTS_STOPWORD_TABLE_NAME "stopword_table_name"
95
96/** Whether to use (turn on/off) stopword */
97#define FTS_USE_STOPWORD "use_stopword"
98
99/** State of the FTS system for this table. It can be one of
100 RUNNING, OPTIMIZING, DELETED. */
101#define FTS_TABLE_STATE "table_state"
102
103/** The minimum length of an FTS auxiliary table names's id component
104e.g., For an auxiliary table name
105
106 FTS_<TABLE_ID>_SUFFIX
107
108This constant is for the minimum length required to store the <TABLE_ID>
109component.
110*/
111#define FTS_AUX_MIN_TABLE_ID_LENGTH 48
112
113/** Maximum length of an integer stored in the config table value column. */
114#define FTS_MAX_INT_LEN 32
115
116/******************************************************************//**
117Parse an SQL string. %s is replaced with the table's id.
118@return query graph */
119que_t*
120fts_parse_sql(
121/*==========*/
122 fts_table_t* fts_table, /*!< in: FTS aux table */
123 pars_info_t* info, /*!< in: info struct, or NULL */
124 const char* sql) /*!< in: SQL string to evaluate */
125 MY_ATTRIBUTE((warn_unused_result));
126
127/******************************************************************//**
128Evaluate a parsed SQL statement
129@return DB_SUCCESS or error code */
130dberr_t
131fts_eval_sql(
132/*=========*/
133 trx_t* trx, /*!< in: transaction */
134 que_t* graph) /*!< in: Parsed statement */
135 MY_ATTRIBUTE((warn_unused_result));
136
137/******************************************************************//**
138Construct the name of an ancillary FTS table for the given table.
139Caller must allocate enough memory(usually size of MAX_FULL_NAME_LEN)
140for param 'table_name'. */
141void
142fts_get_table_name(
143/*===============*/
144 const fts_table_t*
145 fts_table, /*!< in: FTS aux table info */
146 char* table_name); /*!< in/out: aux table name */
147
148/******************************************************************//**
149Construct the column specification part of the SQL string for selecting the
150indexed FTS columns for the given table. Adds the necessary bound
151ids to the given 'info' and returns the SQL string. Examples:
152
153One indexed column named "text":
154
155 "$sel0",
156 info/ids: sel0 -> "text"
157
158Two indexed columns named "subject" and "content":
159
160 "$sel0, $sel1",
161 info/ids: sel0 -> "subject", sel1 -> "content",
162@return heap-allocated WHERE string */
163const char*
164fts_get_select_columns_str(
165/*=======================*/
166 dict_index_t* index, /*!< in: FTS index */
167 pars_info_t* info, /*!< in/out: parser info */
168 mem_heap_t* heap) /*!< in: memory heap */
169 MY_ATTRIBUTE((warn_unused_result));
170
171/** define for fts_doc_fetch_by_doc_id() "option" value, defines whether
172we want to get Doc whose ID is equal to or greater or smaller than supplied
173ID */
174#define FTS_FETCH_DOC_BY_ID_EQUAL 1
175#define FTS_FETCH_DOC_BY_ID_LARGE 2
176#define FTS_FETCH_DOC_BY_ID_SMALL 3
177
178/*************************************************************//**
179Fetch document (= a single row's indexed text) with the given
180document id.
181@return: DB_SUCCESS if fetch is successful, else error */
182dberr_t
183fts_doc_fetch_by_doc_id(
184/*====================*/
185 fts_get_doc_t* get_doc, /*!< in: state */
186 doc_id_t doc_id, /*!< in: id of document to fetch */
187 dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
188 or NULL */
189 ulint option, /*!< in: search option, if it is
190 greater than doc_id or equal */
191 fts_sql_callback
192 callback, /*!< in: callback to read
193 records */
194 void* arg); /*!< in: callback arg */
195
196/*******************************************************************//**
197Callback function for fetch that stores the text of an FTS document,
198converting each column to UTF-16.
199@return always FALSE */
200ibool
201fts_query_expansion_fetch_doc(
202/*==========================*/
203 void* row, /*!< in: sel_node_t* */
204 void* user_arg); /*!< in: fts_doc_t* */
205
206/********************************************************************
207Write out a single word's data as new entry/entries in the INDEX table.
208@return DB_SUCCESS if all OK. */
209dberr_t
210fts_write_node(
211/*===========*/
212 trx_t* trx, /*!< in: transaction */
213 que_t** graph, /*!< in: query graph */
214 fts_table_t* fts_table, /*!< in: the FTS aux index */
215 fts_string_t* word, /*!< in: word in UTF-8 */
216 fts_node_t* node) /*!< in: node columns */
217 MY_ATTRIBUTE((warn_unused_result));
218
219/** Check if a fts token is a stopword or less than fts_min_token_size
220or greater than fts_max_token_size.
221@param[in] token token string
222@param[in] stopwords stopwords rb tree
223@param[in] cs token charset
224@retval true if it is not stopword and length in range
225@retval false if it is stopword or length not in range */
226bool
227fts_check_token(
228 const fts_string_t* token,
229 const ib_rbt_t* stopwords,
230 const CHARSET_INFO* cs);
231
232/******************************************************************//**
233Initialize a document. */
234void
235fts_doc_init(
236/*=========*/
237 fts_doc_t* doc); /*!< in: doc to initialize */
238
239/******************************************************************//**
240Do a binary search for a doc id in the array
241@return +ve index if found -ve index where it should be
242 inserted if not found */
243int
244fts_bsearch(
245/*========*/
246 fts_update_t* array, /*!< in: array to sort */
247 int lower, /*!< in: lower bound of array*/
248 int upper, /*!< in: upper bound of array*/
249 doc_id_t doc_id) /*!< in: doc id to lookup */
250 MY_ATTRIBUTE((warn_unused_result));
251/******************************************************************//**
252Free document. */
253void
254fts_doc_free(
255/*=========*/
256 fts_doc_t* doc); /*!< in: document */
257
258/******************************************************************//**
259Free fts_optimizer_word_t instanace.*/
260void
261fts_word_free(
262/*==========*/
263 fts_word_t* word); /*!< in: instance to free.*/
264
265/******************************************************************//**
266Read the rows from the FTS inde
267@return DB_SUCCESS or error code */
268dberr_t
269fts_index_fetch_nodes(
270/*==================*/
271 trx_t* trx, /*!< in: transaction */
272 que_t** graph, /*!< in: prepared statement */
273 fts_table_t* fts_table, /*!< in: FTS aux table */
274 const fts_string_t*
275 word, /*!< in: the word to fetch */
276 fts_fetch_t* fetch); /*!< in: fetch callback.*/
277
278/******************************************************************//**
279Compare two fts_trx_table_t instances, we actually compare the
280table id's here.
281@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
282UNIV_INLINE
283int
284fts_trx_table_cmp(
285/*==============*/
286 const void* v1, /*!< in: id1 */
287 const void* v2); /*!< in: id2 */
288
289/******************************************************************//**
290Compare a table id with a trx_table_t table id.
291@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
292UNIV_INLINE
293int
294fts_trx_table_id_cmp(
295/*=================*/
296 const void* p1, /*!< in: id1 */
297 const void* p2); /*!< in: id2 */
298
299/******************************************************************//**
300Commit a transaction.
301@return DB_SUCCESS if all OK */
302dberr_t
303fts_sql_commit(
304/*===========*/
305 trx_t* trx); /*!< in: transaction */
306
307/******************************************************************//**
308Rollback a transaction.
309@return DB_SUCCESS if all OK */
310dberr_t
311fts_sql_rollback(
312/*=============*/
313 trx_t* trx); /*!< in: transaction */
314
315/******************************************************************//**
316Parse an SQL string. %s is replaced with the table's id. Don't acquire
317the dict mutex
318@return query graph */
319que_t*
320fts_parse_sql_no_dict_lock(
321/*=======================*/
322 pars_info_t* info, /*!< in: parser info */
323 const char* sql) /*!< in: SQL string to evaluate */
324 MY_ATTRIBUTE((warn_unused_result));
325
326/******************************************************************//**
327Get value from config table. The caller must ensure that enough
328space is allocated for value to hold the column contents
329@return DB_SUCCESS or error code */
330dberr_t
331fts_config_get_value(
332/*=================*/
333 trx_t* trx, /* transaction */
334 fts_table_t* fts_table, /*!< in: the indexed FTS table */
335 const char* name, /*!< in: get config value for
336 this parameter name */
337 fts_string_t* value); /*!< out: value read from
338 config table */
339/******************************************************************//**
340Get value specific to an FTS index from the config table. The caller
341must ensure that enough space is allocated for value to hold the
342column contents.
343@return DB_SUCCESS or error code */
344dberr_t
345fts_config_get_index_value(
346/*=======================*/
347 trx_t* trx, /*!< transaction */
348 dict_index_t* index, /*!< in: index */
349 const char* param, /*!< in: get config value for
350 this parameter name */
351 fts_string_t* value) /*!< out: value read from
352 config table */
353 MY_ATTRIBUTE((warn_unused_result));
354
355/******************************************************************//**
356Set the value in the config table for name.
357@return DB_SUCCESS or error code */
358dberr_t
359fts_config_set_value(
360/*=================*/
361 trx_t* trx, /*!< transaction */
362 fts_table_t* fts_table, /*!< in: the indexed FTS table */
363 const char* name, /*!< in: get config value for
364 this parameter name */
365 const fts_string_t*
366 value); /*!< in: value to update */
367
368/****************************************************************//**
369Set an ulint value in the config table.
370@return DB_SUCCESS if all OK else error code */
371dberr_t
372fts_config_set_ulint(
373/*=================*/
374 trx_t* trx, /*!< in: transaction */
375 fts_table_t* fts_table, /*!< in: the indexed FTS table */
376 const char* name, /*!< in: param name */
377 ulint int_value) /*!< in: value */
378 MY_ATTRIBUTE((warn_unused_result));
379
380/******************************************************************//**
381Set the value specific to an FTS index in the config table.
382@return DB_SUCCESS or error code */
383dberr_t
384fts_config_set_index_value(
385/*=======================*/
386 trx_t* trx, /*!< transaction */
387 dict_index_t* index, /*!< in: index */
388 const char* param, /*!< in: get config value for
389 this parameter name */
390 fts_string_t* value) /*!< out: value read from
391 config table */
392 MY_ATTRIBUTE((warn_unused_result));
393
394#ifdef FTS_OPTIMIZE_DEBUG
395/******************************************************************//**
396Get an ulint value from the config table.
397@return DB_SUCCESS or error code */
398dberr_t
399fts_config_get_index_ulint(
400/*=======================*/
401 trx_t* trx, /*!< in: transaction */
402 dict_index_t* index, /*!< in: FTS index */
403 const char* name, /*!< in: param name */
404 ulint* int_value) /*!< out: value */
405 MY_ATTRIBUTE((warn_unused_result));
406#endif /* FTS_OPTIMIZE_DEBUG */
407
408/******************************************************************//**
409Set an ulint value int the config table.
410@return DB_SUCCESS or error code */
411dberr_t
412fts_config_set_index_ulint(
413/*=======================*/
414 trx_t* trx, /*!< in: transaction */
415 dict_index_t* index, /*!< in: FTS index */
416 const char* name, /*!< in: param name */
417 ulint int_value) /*!< in: value */
418 MY_ATTRIBUTE((warn_unused_result));
419
420/******************************************************************//**
421Get an ulint value from the config table.
422@return DB_SUCCESS or error code */
423dberr_t
424fts_config_get_ulint(
425/*=================*/
426 trx_t* trx, /*!< in: transaction */
427 fts_table_t* fts_table, /*!< in: the indexed FTS table */
428 const char* name, /*!< in: param name */
429 ulint* int_value); /*!< out: value */
430
431/******************************************************************//**
432Search cache for word.
433@return the word node vector if found else NULL */
434const ib_vector_t*
435fts_cache_find_word(
436/*================*/
437 const fts_index_cache_t*
438 index_cache, /*!< in: cache to search */
439 const fts_string_t*
440 text) /*!< in: word to search for */
441 MY_ATTRIBUTE((warn_unused_result));
442
443/******************************************************************//**
444Append deleted doc ids to vector and sort the vector. */
445void
446fts_cache_append_deleted_doc_ids(
447/*=============================*/
448 const fts_cache_t*
449 cache, /*!< in: cache to use */
450 ib_vector_t* vector); /*!< in: append to this vector */
451/******************************************************************//**
452Wait for the background thread to start. We poll to detect change
453of state, which is acceptable, since the wait should happen only
454once during startup.
455@return true if the thread started else FALSE (i.e timed out) */
456ibool
457fts_wait_for_background_thread_to_start(
458/*====================================*/
459 dict_table_t* table, /*!< in: table to which the thread
460 is attached */
461 ulint max_wait); /*!< in: time in microseconds, if set
462 to 0 then it disables timeout
463 checking */
464/******************************************************************//**
465Search the index specific cache for a particular FTS index.
466@return the index specific cache else NULL */
467fts_index_cache_t*
468fts_find_index_cache(
469/*================*/
470 const fts_cache_t*
471 cache, /*!< in: cache to search */
472 const dict_index_t*
473 index) /*!< in: index to search for */
474 MY_ATTRIBUTE((warn_unused_result));
475
476/******************************************************************//**
477Write the table id to the given buffer (including final NUL). Buffer must be
478at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
479@return number of bytes written */
480UNIV_INLINE
481int
482fts_write_object_id(
483/*================*/
484 ib_id_t id, /*!< in: a table/index id */
485 char* str, /*!< in: buffer to write the id to */
486 bool hex_format MY_ATTRIBUTE((unused)));
487 /*!< in: true for fixed hex format,
488 false for old ambiguous format */
489
490/******************************************************************//**
491Read the table id from the string generated by fts_write_object_id().
492@return TRUE if parse successful */
493UNIV_INLINE
494ibool
495fts_read_object_id(
496/*===============*/
497 ib_id_t* id, /*!< out: a table id */
498 const char* str) /*!< in: buffer to read from */
499 MY_ATTRIBUTE((warn_unused_result));
500
501/******************************************************************//**
502Get the table id.
503@return number of bytes written */
504int
505fts_get_table_id(
506/*=============*/
507 const fts_table_t*
508 fts_table, /*!< in: FTS Auxiliary table */
509 char* table_id) /*!< out: table id, must be at least
510 FTS_AUX_MIN_TABLE_ID_LENGTH bytes
511 long */
512 MY_ATTRIBUTE((warn_unused_result));
513
514/******************************************************************//**
515Add the table to add to the OPTIMIZER's list. */
516void
517fts_optimize_add_table(
518/*===================*/
519 dict_table_t* table); /*!< in: table to add */
520
521/******************************************************************//**
522Construct the prefix name of an FTS table.
523@return own: table name, must be freed with ut_free() */
524char*
525fts_get_table_name_prefix(
526/*======================*/
527 const fts_table_t*
528 fts_table) /*!< in: Auxiliary table type */
529 MY_ATTRIBUTE((warn_unused_result));
530
531/******************************************************************//**
532Add node positions. */
533void
534fts_cache_node_add_positions(
535/*=========================*/
536 fts_cache_t* cache, /*!< in: cache */
537 fts_node_t* node, /*!< in: word node */
538 doc_id_t doc_id, /*!< in: doc id */
539 ib_vector_t* positions); /*!< in: fts_token_t::positions */
540
541/******************************************************************//**
542Create the config table name for retrieving index specific value.
543@return index config parameter name */
544char*
545fts_config_create_index_param_name(
546/*===============================*/
547 const char* param, /*!< in: base name of param */
548 const dict_index_t* index) /*!< in: index for config */
549 MY_ATTRIBUTE((warn_unused_result));
550
551#include "fts0priv.ic"
552
553#endif /* INNOBASE_FTS0PRIV_H */
554