1/*****************************************************************************
2
3Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2016, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file fts/fts0fts.cc
22Full Text Search interface
23***********************************************************************/
24
25#include "ha_prototypes.h"
26
27#include "trx0roll.h"
28#include "row0mysql.h"
29#include "row0upd.h"
30#include "dict0types.h"
31#include "dict0stats_bg.h"
32#include "row0sel.h"
33#include "fts0fts.h"
34#include "fts0priv.h"
35#include "fts0types.h"
36#include "fts0types.ic"
37#include "fts0vlc.ic"
38#include "fts0plugin.h"
39#include "dict0priv.h"
40#include "dict0stats.h"
41#include "btr0pcur.h"
42#include "sync0sync.h"
43#include "ut0new.h"
44
45static const ulint FTS_MAX_ID_LEN = 32;
46
47/** Column name from the FTS config table */
48#define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
49
50/** Verify if a aux table name is a obsolete table
51by looking up the key word in the obsolete table names */
52#define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
53 (strstr((table_name), "DOC_ID") != NULL \
54 || strstr((table_name), "ADDED") != NULL \
55 || strstr((table_name), "STOPWORDS") != NULL)
56
57/** This is maximum FTS cache for each table and would be
58a configurable variable */
59ulong fts_max_cache_size;
60
61/** Whether the total memory used for FTS cache is exhausted, and we will
62need a sync to free some memory */
63bool fts_need_sync = false;
64
65/** Variable specifying the total memory allocated for FTS cache */
66ulong fts_max_total_cache_size;
67
68/** This is FTS result cache limit for each query and would be
69a configurable variable */
70ulong fts_result_cache_limit;
71
72/** Variable specifying the maximum FTS max token size */
73ulong fts_max_token_size;
74
75/** Variable specifying the minimum FTS max token size */
76ulong fts_min_token_size;
77
78
79// FIXME: testing
80static ib_time_t elapsed_time = 0;
81static ulint n_nodes = 0;
82
83#ifdef FTS_CACHE_SIZE_DEBUG
84/** The cache size permissible lower limit (1K) */
85static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
86
87/** The cache size permissible upper limit (1G) */
88static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
89#endif
90
91/** Time to sleep after DEADLOCK error before retrying operation. */
92static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
93
94/** variable to record innodb_fts_internal_tbl_name for information
95schema table INNODB_FTS_INSERTED etc. */
96char* fts_internal_tbl_name = NULL;
97char* fts_internal_tbl_name2 = NULL;
98
99/** InnoDB default stopword list:
100There are different versions of stopwords, the stop words listed
101below comes from "Google Stopword" list. Reference:
102http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
103The final version of InnoDB default stopword list is still pending
104for decision */
105const char *fts_default_stopword[] =
106{
107 "a",
108 "about",
109 "an",
110 "are",
111 "as",
112 "at",
113 "be",
114 "by",
115 "com",
116 "de",
117 "en",
118 "for",
119 "from",
120 "how",
121 "i",
122 "in",
123 "is",
124 "it",
125 "la",
126 "of",
127 "on",
128 "or",
129 "that",
130 "the",
131 "this",
132 "to",
133 "was",
134 "what",
135 "when",
136 "where",
137 "who",
138 "will",
139 "with",
140 "und",
141 "the",
142 "www",
143 NULL
144};
145
146/** For storing table info when checking for orphaned tables. */
147struct fts_aux_table_t {
148 table_id_t id; /*!< Table id */
149 table_id_t parent_id; /*!< Parent table id */
150 table_id_t index_id; /*!< Table FT index id */
151 char* name; /*!< Name of the table */
152};
153
154/** FTS auxiliary table suffixes that are common to all FT indexes. */
155const char* fts_common_tables[] = {
156 "BEING_DELETED",
157 "BEING_DELETED_CACHE",
158 "CONFIG",
159 "DELETED",
160 "DELETED_CACHE",
161 NULL
162};
163
164/** FTS auxiliary INDEX split intervals. */
165const fts_index_selector_t fts_index_selector[] = {
166 { 9, "INDEX_1" },
167 { 65, "INDEX_2" },
168 { 70, "INDEX_3" },
169 { 75, "INDEX_4" },
170 { 80, "INDEX_5" },
171 { 85, "INDEX_6" },
172 { 0 , NULL }
173};
174
175/** Default config values for FTS indexes on a table. */
176static const char* fts_config_table_insert_values_sql =
177 "BEGIN\n"
178 "\n"
179 "INSERT INTO $config_table VALUES('"
180 FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
181 ""
182 "INSERT INTO $config_table VALUES('"
183 FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n"
184 ""
185 "INSERT INTO $config_table VALUES ('"
186 FTS_SYNCED_DOC_ID "', '0');\n"
187 ""
188 "INSERT INTO $config_table VALUES ('"
189 FTS_TOTAL_DELETED_COUNT "', '0');\n"
190 "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
191 "INSERT INTO $config_table VALUES ('"
192 FTS_TABLE_STATE "', '0');\n";
193
194/** FTS tokenize parmameter for plugin parser */
195struct fts_tokenize_param_t {
196 fts_doc_t* result_doc; /*!< Result doc for tokens */
197 ulint add_pos; /*!< Added position for tokens */
198};
199
200/** Run SYNC on the table, i.e., write out data from the cache to the
201FTS auxiliary INDEX table and clear the cache at the end.
202@param[in,out] sync sync state
203@param[in] unlock_cache whether unlock cache lock when write node
204@param[in] wait whether wait when a sync is in progress
205@param[in] has_dict whether has dict operation lock
206@return DB_SUCCESS if all OK */
207static
208dberr_t
209fts_sync(
210 fts_sync_t* sync,
211 bool unlock_cache,
212 bool wait,
213 bool has_dict);
214
215/****************************************************************//**
216Release all resources help by the words rb tree e.g., the node ilist. */
217static
218void
219fts_words_free(
220/*===========*/
221 ib_rbt_t* words) /*!< in: rb tree of words */
222 MY_ATTRIBUTE((nonnull));
223#ifdef FTS_CACHE_SIZE_DEBUG
224/****************************************************************//**
225Read the max cache size parameter from the config table. */
226static
227void
228fts_update_max_cache_size(
229/*======================*/
230 fts_sync_t* sync); /*!< in: sync state */
231#endif
232
233/*********************************************************************//**
234This function fetches the document just inserted right before
235we commit the transaction, and tokenize the inserted text data
236and insert into FTS auxiliary table and its cache.
237@return TRUE if successful */
238static
239ulint
240fts_add_doc_by_id(
241/*==============*/
242 fts_trx_table_t*ftt, /*!< in: FTS trx table */
243 doc_id_t doc_id, /*!< in: doc id */
244 ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)));
245 /*!< in: affected fts indexes */
246/******************************************************************//**
247Update the last document id. This function could create a new
248transaction to update the last document id.
249@return DB_SUCCESS if OK */
250static
251dberr_t
252fts_update_sync_doc_id(
253/*===================*/
254 const dict_table_t* table, /*!< in: table */
255 const char* table_name, /*!< in: table name, or NULL */
256 doc_id_t doc_id, /*!< in: last document id */
257 trx_t* trx) /*!< in: update trx, or NULL */
258 MY_ATTRIBUTE((nonnull(1)));
259
260/** Tokenize a document.
261@param[in,out] doc document to tokenize
262@param[out] result tokenization result
263@param[in] parser pluggable parser */
264static
265void
266fts_tokenize_document(
267 fts_doc_t* doc,
268 fts_doc_t* result,
269 st_mysql_ftparser* parser);
270
271/** Continue to tokenize a document.
272@param[in,out] doc document to tokenize
273@param[in] add_pos add this position to all tokens from this tokenization
274@param[out] result tokenization result
275@param[in] parser pluggable parser */
276static
277void
278fts_tokenize_document_next(
279 fts_doc_t* doc,
280 ulint add_pos,
281 fts_doc_t* result,
282 st_mysql_ftparser* parser);
283
284/** Create the vector of fts_get_doc_t instances.
285@param[in,out] cache fts cache
286@return vector of fts_get_doc_t instances */
287static
288ib_vector_t*
289fts_get_docs_create(
290 fts_cache_t* cache);
291
292/** Free the FTS cache.
293@param[in,out] cache to be freed */
294static
295void
296fts_cache_destroy(fts_cache_t* cache)
297{
298 rw_lock_free(&cache->lock);
299 rw_lock_free(&cache->init_lock);
300 mutex_free(&cache->optimize_lock);
301 mutex_free(&cache->deleted_lock);
302 mutex_free(&cache->doc_id_lock);
303 os_event_destroy(cache->sync->event);
304
305 if (cache->stopword_info.cached_stopword) {
306 rbt_free(cache->stopword_info.cached_stopword);
307 }
308
309 if (cache->sync_heap->arg) {
310 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
311 }
312
313 mem_heap_free(cache->cache_heap);
314}
315
316/** Get a character set based on precise type.
317@param prtype precise type
318@return the corresponding character set */
319UNIV_INLINE
320CHARSET_INFO*
321fts_get_charset(ulint prtype)
322{
323#ifdef UNIV_DEBUG
324 switch (prtype & DATA_MYSQL_TYPE_MASK) {
325 case MYSQL_TYPE_BIT:
326 case MYSQL_TYPE_STRING:
327 case MYSQL_TYPE_VAR_STRING:
328 case MYSQL_TYPE_TINY_BLOB:
329 case MYSQL_TYPE_MEDIUM_BLOB:
330 case MYSQL_TYPE_BLOB:
331 case MYSQL_TYPE_LONG_BLOB:
332 case MYSQL_TYPE_VARCHAR:
333 break;
334 default:
335 ut_error;
336 }
337#endif /* UNIV_DEBUG */
338
339 uint cs_num = (uint) dtype_get_charset_coll(prtype);
340
341 if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
342 return(cs);
343 }
344
345 ib::fatal() << "Unable to find charset-collation " << cs_num;
346 return(NULL);
347}
348
349/****************************************************************//**
350This function loads the default InnoDB stopword list */
351static
352void
353fts_load_default_stopword(
354/*======================*/
355 fts_stopword_t* stopword_info) /*!< in: stopword info */
356{
357 fts_string_t str;
358 mem_heap_t* heap;
359 ib_alloc_t* allocator;
360 ib_rbt_t* stop_words;
361
362 allocator = stopword_info->heap;
363 heap = static_cast<mem_heap_t*>(allocator->arg);
364
365 if (!stopword_info->cached_stopword) {
366 stopword_info->cached_stopword = rbt_create_arg_cmp(
367 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
368 &my_charset_latin1);
369 }
370
371 stop_words = stopword_info->cached_stopword;
372
373 str.f_n_char = 0;
374
375 for (ulint i = 0; fts_default_stopword[i]; ++i) {
376 char* word;
377 fts_tokenizer_word_t new_word;
378
379 /* We are going to duplicate the value below. */
380 word = const_cast<char*>(fts_default_stopword[i]);
381
382 new_word.nodes = ib_vector_create(
383 allocator, sizeof(fts_node_t), 4);
384
385 str.f_len = ut_strlen(word);
386 str.f_str = reinterpret_cast<byte*>(word);
387
388 fts_string_dup(&new_word.text, &str, heap);
389
390 rbt_insert(stop_words, &new_word, &new_word);
391 }
392
393 stopword_info->status = STOPWORD_FROM_DEFAULT;
394}
395
396/****************************************************************//**
397Callback function to read a single stopword value.
398@return Always return TRUE */
399static
400ibool
401fts_read_stopword(
402/*==============*/
403 void* row, /*!< in: sel_node_t* */
404 void* user_arg) /*!< in: pointer to ib_vector_t */
405{
406 ib_alloc_t* allocator;
407 fts_stopword_t* stopword_info;
408 sel_node_t* sel_node;
409 que_node_t* exp;
410 ib_rbt_t* stop_words;
411 dfield_t* dfield;
412 fts_string_t str;
413 mem_heap_t* heap;
414 ib_rbt_bound_t parent;
415
416 sel_node = static_cast<sel_node_t*>(row);
417 stopword_info = static_cast<fts_stopword_t*>(user_arg);
418
419 stop_words = stopword_info->cached_stopword;
420 allocator = static_cast<ib_alloc_t*>(stopword_info->heap);
421 heap = static_cast<mem_heap_t*>(allocator->arg);
422
423 exp = sel_node->select_list;
424
425 /* We only need to read the first column */
426 dfield = que_node_get_val(exp);
427
428 str.f_n_char = 0;
429 str.f_str = static_cast<byte*>(dfield_get_data(dfield));
430 str.f_len = dfield_get_len(dfield);
431
432 /* Only create new node if it is a value not already existed */
433 if (str.f_len != UNIV_SQL_NULL
434 && rbt_search(stop_words, &parent, &str) != 0) {
435
436 fts_tokenizer_word_t new_word;
437
438 new_word.nodes = ib_vector_create(
439 allocator, sizeof(fts_node_t), 4);
440
441 new_word.text.f_str = static_cast<byte*>(
442 mem_heap_alloc(heap, str.f_len + 1));
443
444 memcpy(new_word.text.f_str, str.f_str, str.f_len);
445
446 new_word.text.f_n_char = 0;
447 new_word.text.f_len = str.f_len;
448 new_word.text.f_str[str.f_len] = 0;
449
450 rbt_insert(stop_words, &new_word, &new_word);
451 }
452
453 return(TRUE);
454}
455
456/******************************************************************//**
457Load user defined stopword from designated user table
458@return TRUE if load operation is successful */
459static
460ibool
461fts_load_user_stopword(
462/*===================*/
463 fts_t* fts, /*!< in: FTS struct */
464 const char* stopword_table_name, /*!< in: Stopword table
465 name */
466 fts_stopword_t* stopword_info) /*!< in: Stopword info */
467{
468 pars_info_t* info;
469 que_t* graph;
470 dberr_t error = DB_SUCCESS;
471 ibool ret = TRUE;
472 trx_t* trx;
473 ibool has_lock = fts->fts_status & TABLE_DICT_LOCKED;
474
475 trx = trx_create();
476 trx->op_info = "Load user stopword table into FTS cache";
477
478 if (!has_lock) {
479 mutex_enter(&dict_sys->mutex);
480 }
481
482 /* Validate the user table existence and in the right
483 format */
484 stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
485 if (!stopword_info->charset) {
486 ret = FALSE;
487 goto cleanup;
488 } else if (!stopword_info->cached_stopword) {
489 /* Create the stopword RB tree with the stopword column
490 charset. All comparison will use this charset */
491 stopword_info->cached_stopword = rbt_create_arg_cmp(
492 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
493 (void*)stopword_info->charset);
494
495 }
496
497 info = pars_info_create();
498
499 pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
500
501 pars_info_bind_function(info, "my_func", fts_read_stopword,
502 stopword_info);
503
504 graph = fts_parse_sql_no_dict_lock(
505 info,
506 "DECLARE FUNCTION my_func;\n"
507 "DECLARE CURSOR c IS"
508 " SELECT value"
509 " FROM $table_stopword;\n"
510 "BEGIN\n"
511 "\n"
512 "OPEN c;\n"
513 "WHILE 1 = 1 LOOP\n"
514 " FETCH c INTO my_func();\n"
515 " IF c % NOTFOUND THEN\n"
516 " EXIT;\n"
517 " END IF;\n"
518 "END LOOP;\n"
519 "CLOSE c;");
520
521 for (;;) {
522 error = fts_eval_sql(trx, graph);
523
524 if (error == DB_SUCCESS) {
525 fts_sql_commit(trx);
526 stopword_info->status = STOPWORD_USER_TABLE;
527 break;
528 } else {
529
530 fts_sql_rollback(trx);
531
532 if (error == DB_LOCK_WAIT_TIMEOUT) {
533 ib::warn() << "Lock wait timeout reading user"
534 " stopword table. Retrying!";
535
536 trx->error_state = DB_SUCCESS;
537 } else {
538 ib::error() << "Error '" << ut_strerr(error)
539 << "' while reading user stopword"
540 " table.";
541 ret = FALSE;
542 break;
543 }
544 }
545 }
546
547 que_graph_free(graph);
548
549cleanup:
550 if (!has_lock) {
551 mutex_exit(&dict_sys->mutex);
552 }
553
554 trx_free(trx);
555 return(ret);
556}
557
558/******************************************************************//**
559Initialize the index cache. */
560static
561void
562fts_index_cache_init(
563/*=================*/
564 ib_alloc_t* allocator, /*!< in: the allocator to use */
565 fts_index_cache_t* index_cache) /*!< in: index cache */
566{
567 ulint i;
568
569 ut_a(index_cache->words == NULL);
570
571 index_cache->words = rbt_create_arg_cmp(
572 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
573 (void*) index_cache->charset);
574
575 ut_a(index_cache->doc_stats == NULL);
576
577 index_cache->doc_stats = ib_vector_create(
578 allocator, sizeof(fts_doc_stats_t), 4);
579
580 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
581 ut_a(index_cache->ins_graph[i] == NULL);
582 ut_a(index_cache->sel_graph[i] == NULL);
583 }
584}
585
586/*********************************************************************//**
587Initialize FTS cache. */
588void
589fts_cache_init(
590/*===========*/
591 fts_cache_t* cache) /*!< in: cache to initialize */
592{
593 ulint i;
594
595 /* Just to make sure */
596 ut_a(cache->sync_heap->arg == NULL);
597
598 cache->sync_heap->arg = mem_heap_create(1024);
599
600 cache->total_size = 0;
601
602 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
603 cache->deleted_doc_ids = ib_vector_create(
604 cache->sync_heap, sizeof(fts_update_t), 4);
605 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
606
607 /* Reset the cache data for all the FTS indexes. */
608 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
609 fts_index_cache_t* index_cache;
610
611 index_cache = static_cast<fts_index_cache_t*>(
612 ib_vector_get(cache->indexes, i));
613
614 fts_index_cache_init(cache->sync_heap, index_cache);
615 }
616}
617
618/****************************************************************//**
619Create a FTS cache. */
620fts_cache_t*
621fts_cache_create(
622/*=============*/
623 dict_table_t* table) /*!< in: table owns the FTS cache */
624{
625 mem_heap_t* heap;
626 fts_cache_t* cache;
627
628 heap = static_cast<mem_heap_t*>(mem_heap_create(512));
629
630 cache = static_cast<fts_cache_t*>(
631 mem_heap_zalloc(heap, sizeof(*cache)));
632
633 cache->cache_heap = heap;
634
635 rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
636
637 rw_lock_create(
638 fts_cache_init_rw_lock_key, &cache->init_lock,
639 SYNC_FTS_CACHE_INIT);
640
641 mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
642
643 mutex_create(LATCH_ID_FTS_OPTIMIZE, &cache->optimize_lock);
644
645 mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
646
647 /* This is the heap used to create the cache itself. */
648 cache->self_heap = ib_heap_allocator_create(heap);
649
650 /* This is a transient heap, used for storing sync data. */
651 cache->sync_heap = ib_heap_allocator_create(heap);
652 cache->sync_heap->arg = NULL;
653
654 cache->sync = static_cast<fts_sync_t*>(
655 mem_heap_zalloc(heap, sizeof(fts_sync_t)));
656
657 cache->sync->table = table;
658 cache->sync->event = os_event_create(0);
659
660 /* Create the index cache vector that will hold the inverted indexes. */
661 cache->indexes = ib_vector_create(
662 cache->self_heap, sizeof(fts_index_cache_t), 2);
663
664 fts_cache_init(cache);
665
666 cache->stopword_info.cached_stopword = NULL;
667 cache->stopword_info.charset = NULL;
668
669 cache->stopword_info.heap = cache->self_heap;
670
671 cache->stopword_info.status = STOPWORD_NOT_INIT;
672
673 return(cache);
674}
675
676/*******************************************************************//**
677Add a newly create index into FTS cache */
678void
679fts_add_index(
680/*==========*/
681 dict_index_t* index, /*!< FTS index to be added */
682 dict_table_t* table) /*!< table */
683{
684 fts_t* fts = table->fts;
685 fts_cache_t* cache;
686 fts_index_cache_t* index_cache;
687
688 ut_ad(fts);
689 cache = table->fts->cache;
690
691 rw_lock_x_lock(&cache->init_lock);
692
693 ib_vector_push(fts->indexes, &index);
694
695 index_cache = fts_find_index_cache(cache, index);
696
697 if (!index_cache) {
698 /* Add new index cache structure */
699 index_cache = fts_cache_index_cache_create(table, index);
700 }
701
702 rw_lock_x_unlock(&cache->init_lock);
703}
704
705/*******************************************************************//**
706recalibrate get_doc structure after index_cache in cache->indexes changed */
707static
708void
709fts_reset_get_doc(
710/*==============*/
711 fts_cache_t* cache) /*!< in: FTS index cache */
712{
713 fts_get_doc_t* get_doc;
714 ulint i;
715
716 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
717
718 ib_vector_reset(cache->get_docs);
719
720 for (i = 0; i < ib_vector_size(cache->indexes); i++) {
721 fts_index_cache_t* ind_cache;
722
723 ind_cache = static_cast<fts_index_cache_t*>(
724 ib_vector_get(cache->indexes, i));
725
726 get_doc = static_cast<fts_get_doc_t*>(
727 ib_vector_push(cache->get_docs, NULL));
728
729 memset(get_doc, 0x0, sizeof(*get_doc));
730
731 get_doc->index_cache = ind_cache;
732 }
733
734 ut_ad(ib_vector_size(cache->get_docs)
735 == ib_vector_size(cache->indexes));
736}
737
738/*******************************************************************//**
739Check an index is in the table->indexes list
740@return TRUE if it exists */
741static
742ibool
743fts_in_dict_index(
744/*==============*/
745 dict_table_t* table, /*!< in: Table */
746 dict_index_t* index_check) /*!< in: index to be checked */
747{
748 dict_index_t* index;
749
750 for (index = dict_table_get_first_index(table);
751 index != NULL;
752 index = dict_table_get_next_index(index)) {
753
754 if (index == index_check) {
755 return(TRUE);
756 }
757 }
758
759 return(FALSE);
760}
761
762/*******************************************************************//**
763Check an index is in the fts->cache->indexes list
764@return TRUE if it exists */
765static
766ibool
767fts_in_index_cache(
768/*===============*/
769 dict_table_t* table, /*!< in: Table */
770 dict_index_t* index) /*!< in: index to be checked */
771{
772 ulint i;
773
774 for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
775 fts_index_cache_t* index_cache;
776
777 index_cache = static_cast<fts_index_cache_t*>(
778 ib_vector_get(table->fts->cache->indexes, i));
779
780 if (index_cache->index == index) {
781 return(TRUE);
782 }
783 }
784
785 return(FALSE);
786}
787
788/*******************************************************************//**
789Check indexes in the fts->indexes is also present in index cache and
790table->indexes list
791@return TRUE if all indexes match */
792ibool
793fts_check_cached_index(
794/*===================*/
795 dict_table_t* table) /*!< in: Table where indexes are dropped */
796{
797 ulint i;
798
799 if (!table->fts || !table->fts->cache) {
800 return(TRUE);
801 }
802
803 ut_a(ib_vector_size(table->fts->indexes)
804 == ib_vector_size(table->fts->cache->indexes));
805
806 for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
807 dict_index_t* index;
808
809 index = static_cast<dict_index_t*>(
810 ib_vector_getp(table->fts->indexes, i));
811
812 if (!fts_in_index_cache(table, index)) {
813 return(FALSE);
814 }
815
816 if (!fts_in_dict_index(table, index)) {
817 return(FALSE);
818 }
819 }
820
821 return(TRUE);
822}
823
824/*******************************************************************//**
825Drop auxiliary tables related to an FTS index
826@return DB_SUCCESS or error number */
827dberr_t
828fts_drop_index(
829/*===========*/
830 dict_table_t* table, /*!< in: Table where indexes are dropped */
831 dict_index_t* index, /*!< in: Index to be dropped */
832 trx_t* trx) /*!< in: Transaction for the drop */
833{
834 ib_vector_t* indexes = table->fts->indexes;
835 dberr_t err = DB_SUCCESS;
836
837 ut_a(indexes);
838
839 if ((ib_vector_size(indexes) == 1
840 && (index == static_cast<dict_index_t*>(
841 ib_vector_getp(table->fts->indexes, 0))))
842 || ib_vector_is_empty(indexes)) {
843 doc_id_t current_doc_id;
844 doc_id_t first_doc_id;
845
846 /* If we are dropping the only FTS index of the table,
847 remove it from optimize thread */
848 fts_optimize_remove_table(table);
849
850 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
851
852 /* If Doc ID column is not added internally by FTS index,
853 we can drop all FTS auxiliary tables. Otherwise, we will
854 need to keep some common table such as CONFIG table, so
855 as to keep track of incrementing Doc IDs */
856 if (!DICT_TF2_FLAG_IS_SET(
857 table, DICT_TF2_FTS_HAS_DOC_ID)) {
858
859 err = fts_drop_tables(trx, table);
860
861 err = fts_drop_index_tables(trx, index);
862
863 for(;;) {
864 bool retry = false;
865 if (index->index_fts_syncing) {
866 retry = true;
867 }
868 if (!retry){
869 fts_free(table);
870 break;
871 }
872 DICT_BG_YIELD(trx);
873 }
874 return(err);
875 }
876
877 for(;;) {
878 bool retry = false;
879 if (index->index_fts_syncing) {
880 retry = true;
881 }
882 if (!retry){
883 current_doc_id = table->fts->cache->next_doc_id;
884 first_doc_id = table->fts->cache->first_doc_id;
885 fts_cache_clear(table->fts->cache);
886 fts_cache_destroy(table->fts->cache);
887 table->fts->cache = fts_cache_create(table);
888 table->fts->cache->next_doc_id = current_doc_id;
889 table->fts->cache->first_doc_id = first_doc_id;
890 break;
891 }
892 DICT_BG_YIELD(trx);
893 }
894 } else {
895 fts_cache_t* cache = table->fts->cache;
896 fts_index_cache_t* index_cache;
897
898 rw_lock_x_lock(&cache->init_lock);
899
900 index_cache = fts_find_index_cache(cache, index);
901
902 if (index_cache != NULL) {
903 for(;;) {
904 bool retry = false;
905 if (index->index_fts_syncing) {
906 retry = true;
907 }
908 if (!retry && index_cache->words) {
909 fts_words_free(index_cache->words);
910 rbt_free(index_cache->words);
911 break;
912 }
913 DICT_BG_YIELD(trx);
914 }
915
916 ib_vector_remove(cache->indexes, *(void**) index_cache);
917 }
918
919 if (cache->get_docs) {
920 fts_reset_get_doc(cache);
921 }
922
923 rw_lock_x_unlock(&cache->init_lock);
924 }
925
926 err = fts_drop_index_tables(trx, index);
927
928 ib_vector_remove(indexes, (const void*) index);
929
930 return(err);
931}
932
933/****************************************************************//**
934Free the query graph but check whether dict_sys->mutex is already
935held */
936void
937fts_que_graph_free_check_lock(
938/*==========================*/
939 fts_table_t* fts_table, /*!< in: FTS table */
940 const fts_index_cache_t*index_cache, /*!< in: FTS index cache */
941 que_t* graph) /*!< in: query graph */
942{
943 ibool has_dict = FALSE;
944
945 if (fts_table && fts_table->table) {
946 ut_ad(fts_table->table->fts);
947
948 has_dict = fts_table->table->fts->fts_status
949 & TABLE_DICT_LOCKED;
950 } else if (index_cache) {
951 ut_ad(index_cache->index->table->fts);
952
953 has_dict = index_cache->index->table->fts->fts_status
954 & TABLE_DICT_LOCKED;
955 }
956
957 if (!has_dict) {
958 mutex_enter(&dict_sys->mutex);
959 }
960
961 ut_ad(mutex_own(&dict_sys->mutex));
962
963 que_graph_free(graph);
964
965 if (!has_dict) {
966 mutex_exit(&dict_sys->mutex);
967 }
968}
969
970/****************************************************************//**
971Create an FTS index cache. */
972CHARSET_INFO*
973fts_index_get_charset(
974/*==================*/
975 dict_index_t* index) /*!< in: FTS index */
976{
977 CHARSET_INFO* charset = NULL;
978 dict_field_t* field;
979 ulint prtype;
980
981 field = dict_index_get_nth_field(index, 0);
982 prtype = field->col->prtype;
983
984 charset = fts_get_charset(prtype);
985
986#ifdef FTS_DEBUG
987 /* Set up charset info for this index. Please note all
988 field of the FTS index should have the same charset */
989 for (i = 1; i < index->n_fields; i++) {
990 CHARSET_INFO* fld_charset;
991
992 field = dict_index_get_nth_field(index, i);
993 prtype = field->col->prtype;
994
995 fld_charset = fts_get_charset(prtype);
996
997 /* All FTS columns should have the same charset */
998 if (charset) {
999 ut_a(charset == fld_charset);
1000 } else {
1001 charset = fld_charset;
1002 }
1003 }
1004#endif
1005
1006 return(charset);
1007
1008}
1009/****************************************************************//**
1010Create an FTS index cache.
1011@return Index Cache */
1012fts_index_cache_t*
1013fts_cache_index_cache_create(
1014/*=========================*/
1015 dict_table_t* table, /*!< in: table with FTS index */
1016 dict_index_t* index) /*!< in: FTS index */
1017{
1018 ulint n_bytes;
1019 fts_index_cache_t* index_cache;
1020 fts_cache_t* cache = table->fts->cache;
1021
1022 ut_a(cache != NULL);
1023
1024 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
1025
1026 /* Must not already exist in the cache vector. */
1027 ut_a(fts_find_index_cache(cache, index) == NULL);
1028
1029 index_cache = static_cast<fts_index_cache_t*>(
1030 ib_vector_push(cache->indexes, NULL));
1031
1032 memset(index_cache, 0x0, sizeof(*index_cache));
1033
1034 index_cache->index = index;
1035
1036 index_cache->charset = fts_index_get_charset(index);
1037
1038 n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
1039
1040 index_cache->ins_graph = static_cast<que_t**>(
1041 mem_heap_zalloc(static_cast<mem_heap_t*>(
1042 cache->self_heap->arg), n_bytes));
1043
1044 index_cache->sel_graph = static_cast<que_t**>(
1045 mem_heap_zalloc(static_cast<mem_heap_t*>(
1046 cache->self_heap->arg), n_bytes));
1047
1048 fts_index_cache_init(cache->sync_heap, index_cache);
1049
1050 if (cache->get_docs) {
1051 fts_reset_get_doc(cache);
1052 }
1053
1054 return(index_cache);
1055}
1056
1057/****************************************************************//**
1058Release all resources help by the words rb tree e.g., the node ilist. */
1059static
1060void
1061fts_words_free(
1062/*===========*/
1063 ib_rbt_t* words) /*!< in: rb tree of words */
1064{
1065 const ib_rbt_node_t* rbt_node;
1066
1067 /* Free the resources held by a word. */
1068 for (rbt_node = rbt_first(words);
1069 rbt_node != NULL;
1070 rbt_node = rbt_first(words)) {
1071
1072 ulint i;
1073 fts_tokenizer_word_t* word;
1074
1075 word = rbt_value(fts_tokenizer_word_t, rbt_node);
1076
1077 /* Free the ilists of this word. */
1078 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1079
1080 fts_node_t* fts_node = static_cast<fts_node_t*>(
1081 ib_vector_get(word->nodes, i));
1082
1083 ut_free(fts_node->ilist);
1084 fts_node->ilist = NULL;
1085 }
1086
1087 /* NOTE: We are responsible for free'ing the node */
1088 ut_free(rbt_remove_node(words, rbt_node));
1089 }
1090}
1091
1092/** Clear cache.
1093@param[in,out] cache fts cache */
1094void
1095fts_cache_clear(
1096 fts_cache_t* cache)
1097{
1098 ulint i;
1099
1100 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1101 ulint j;
1102 fts_index_cache_t* index_cache;
1103
1104 index_cache = static_cast<fts_index_cache_t*>(
1105 ib_vector_get(cache->indexes, i));
1106
1107 fts_words_free(index_cache->words);
1108
1109 rbt_free(index_cache->words);
1110
1111 index_cache->words = NULL;
1112
1113 for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1114
1115 if (index_cache->ins_graph[j] != NULL) {
1116
1117 fts_que_graph_free_check_lock(
1118 NULL, index_cache,
1119 index_cache->ins_graph[j]);
1120
1121 index_cache->ins_graph[j] = NULL;
1122 }
1123
1124 if (index_cache->sel_graph[j] != NULL) {
1125
1126 fts_que_graph_free_check_lock(
1127 NULL, index_cache,
1128 index_cache->sel_graph[j]);
1129
1130 index_cache->sel_graph[j] = NULL;
1131 }
1132 }
1133
1134 index_cache->doc_stats = NULL;
1135 }
1136
1137 mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1138 cache->sync_heap->arg = NULL;
1139
1140 fts_need_sync = false;
1141
1142 cache->total_size = 0;
1143
1144 mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1145 cache->deleted_doc_ids = NULL;
1146 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1147}
1148
1149/*********************************************************************//**
1150Search the index specific cache for a particular FTS index.
1151@return the index cache else NULL */
1152UNIV_INLINE
1153fts_index_cache_t*
1154fts_get_index_cache(
1155/*================*/
1156 fts_cache_t* cache, /*!< in: cache to search */
1157 const dict_index_t* index) /*!< in: index to search for */
1158{
1159 ulint i;
1160
1161 ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
1162 || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1163
1164 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1165 fts_index_cache_t* index_cache;
1166
1167 index_cache = static_cast<fts_index_cache_t*>(
1168 ib_vector_get(cache->indexes, i));
1169
1170 if (index_cache->index == index) {
1171
1172 return(index_cache);
1173 }
1174 }
1175
1176 return(NULL);
1177}
1178
1179#ifdef FTS_DEBUG
1180/*********************************************************************//**
1181Search the index cache for a get_doc structure.
1182@return the fts_get_doc_t item else NULL */
1183static
1184fts_get_doc_t*
1185fts_get_index_get_doc(
1186/*==================*/
1187 fts_cache_t* cache, /*!< in: cache to search */
1188 const dict_index_t* index) /*!< in: index to search for */
1189{
1190 ulint i;
1191
1192 ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1193
1194 for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1195 fts_get_doc_t* get_doc;
1196
1197 get_doc = static_cast<fts_get_doc_t*>(
1198 ib_vector_get(cache->get_docs, i));
1199
1200 if (get_doc->index_cache->index == index) {
1201
1202 return(get_doc);
1203 }
1204 }
1205
1206 return(NULL);
1207}
1208#endif
1209
1210/**********************************************************************//**
1211Find an existing word, or if not found, create one and return it.
1212@return specified word token */
1213static
1214fts_tokenizer_word_t*
1215fts_tokenizer_word_get(
1216/*===================*/
1217 fts_cache_t* cache, /*!< in: cache */
1218 fts_index_cache_t*
1219 index_cache, /*!< in: index cache */
1220 fts_string_t* text) /*!< in: node text */
1221{
1222 fts_tokenizer_word_t* word;
1223 ib_rbt_bound_t parent;
1224
1225 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1226
1227 /* If it is a stopword, do not index it */
1228 if (!fts_check_token(text,
1229 cache->stopword_info.cached_stopword,
1230 index_cache->charset)) {
1231
1232 return(NULL);
1233 }
1234
1235 /* Check if we found a match, if not then add word to tree. */
1236 if (rbt_search(index_cache->words, &parent, text) != 0) {
1237 mem_heap_t* heap;
1238 fts_tokenizer_word_t new_word;
1239
1240 heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1241
1242 new_word.nodes = ib_vector_create(
1243 cache->sync_heap, sizeof(fts_node_t), 4);
1244
1245 fts_string_dup(&new_word.text, text, heap);
1246
1247 parent.last = rbt_add_node(
1248 index_cache->words, &parent, &new_word);
1249
1250 /* Take into account the RB tree memory use and the vector. */
1251 cache->total_size += sizeof(new_word)
1252 + sizeof(ib_rbt_node_t)
1253 + text->f_len
1254 + (sizeof(fts_node_t) * 4)
1255 + sizeof(*new_word.nodes);
1256
1257 ut_ad(rbt_validate(index_cache->words));
1258 }
1259
1260 word = rbt_value(fts_tokenizer_word_t, parent.last);
1261
1262 return(word);
1263}
1264
1265/**********************************************************************//**
1266Add the given doc_id/word positions to the given node's ilist. */
1267void
1268fts_cache_node_add_positions(
1269/*=========================*/
1270 fts_cache_t* cache, /*!< in: cache */
1271 fts_node_t* node, /*!< in: word node */
1272 doc_id_t doc_id, /*!< in: doc id */
1273 ib_vector_t* positions) /*!< in: fts_token_t::positions */
1274{
1275 ulint i;
1276 byte* ptr;
1277 byte* ilist;
1278 ulint enc_len;
1279 ulint last_pos;
1280 byte* ptr_start;
1281 ulint doc_id_delta;
1282
1283#ifdef UNIV_DEBUG
1284 if (cache) {
1285 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1286 }
1287#endif /* UNIV_DEBUG */
1288
1289 ut_ad(doc_id >= node->last_doc_id);
1290
1291 /* Calculate the space required to store the ilist. */
1292 doc_id_delta = (ulint)(doc_id - node->last_doc_id);
1293 enc_len = fts_get_encoded_len(doc_id_delta);
1294
1295 last_pos = 0;
1296 for (i = 0; i < ib_vector_size(positions); i++) {
1297 ulint pos = *(static_cast<ulint*>(
1298 ib_vector_get(positions, i)));
1299
1300 ut_ad(last_pos == 0 || pos > last_pos);
1301
1302 enc_len += fts_get_encoded_len(pos - last_pos);
1303 last_pos = pos;
1304 }
1305
1306 /* The 0x00 byte at the end of the token positions list. */
1307 enc_len++;
1308
1309 if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1310 /* No need to allocate more space, we can fit in the new
1311 data at the end of the old one. */
1312 ilist = NULL;
1313 ptr = node->ilist + node->ilist_size;
1314 } else {
1315 ulint new_size = node->ilist_size + enc_len;
1316
1317 /* Over-reserve space by a fixed size for small lengths and
1318 by 20% for lengths >= 48 bytes. */
1319 if (new_size < 16) {
1320 new_size = 16;
1321 } else if (new_size < 32) {
1322 new_size = 32;
1323 } else if (new_size < 48) {
1324 new_size = 48;
1325 } else {
1326 new_size = (ulint)(1.2 * new_size);
1327 }
1328
1329 ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
1330 ptr = ilist + node->ilist_size;
1331
1332 node->ilist_size_alloc = new_size;
1333 }
1334
1335 ptr_start = ptr;
1336
1337 /* Encode the new fragment. */
1338 ptr += fts_encode_int(doc_id_delta, ptr);
1339
1340 last_pos = 0;
1341 for (i = 0; i < ib_vector_size(positions); i++) {
1342 ulint pos = *(static_cast<ulint*>(
1343 ib_vector_get(positions, i)));
1344
1345 ptr += fts_encode_int(pos - last_pos, ptr);
1346 last_pos = pos;
1347 }
1348
1349 *ptr++ = 0;
1350
1351 ut_a(enc_len == (ulint)(ptr - ptr_start));
1352
1353 if (ilist) {
1354 /* Copy old ilist to the start of the new one and switch the
1355 new one into place in the node. */
1356 if (node->ilist_size > 0) {
1357 memcpy(ilist, node->ilist, node->ilist_size);
1358 ut_free(node->ilist);
1359 }
1360
1361 node->ilist = ilist;
1362 }
1363
1364 node->ilist_size += enc_len;
1365
1366 if (cache) {
1367 cache->total_size += enc_len;
1368 }
1369
1370 if (node->first_doc_id == FTS_NULL_DOC_ID) {
1371 node->first_doc_id = doc_id;
1372 }
1373
1374 node->last_doc_id = doc_id;
1375 ++node->doc_count;
1376}
1377
1378/**********************************************************************//**
1379Add document to the cache. */
1380static
1381void
1382fts_cache_add_doc(
1383/*==============*/
1384 fts_cache_t* cache, /*!< in: cache */
1385 fts_index_cache_t*
1386 index_cache, /*!< in: index cache */
1387 doc_id_t doc_id, /*!< in: doc id to add */
1388 ib_rbt_t* tokens) /*!< in: document tokens */
1389{
1390 const ib_rbt_node_t* node;
1391 ulint n_words;
1392 fts_doc_stats_t* doc_stats;
1393
1394 if (!tokens) {
1395 return;
1396 }
1397
1398 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1399
1400 n_words = rbt_size(tokens);
1401
1402 for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1403
1404 fts_tokenizer_word_t* word;
1405 fts_node_t* fts_node = NULL;
1406 fts_token_t* token = rbt_value(fts_token_t, node);
1407
1408 /* Find and/or add token to the cache. */
1409 word = fts_tokenizer_word_get(
1410 cache, index_cache, &token->text);
1411
1412 if (!word) {
1413 ut_free(rbt_remove_node(tokens, node));
1414 continue;
1415 }
1416
1417 if (ib_vector_size(word->nodes) > 0) {
1418 fts_node = static_cast<fts_node_t*>(
1419 ib_vector_last(word->nodes));
1420 }
1421
1422 if (fts_node == NULL || fts_node->synced
1423 || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1424 || doc_id < fts_node->last_doc_id) {
1425
1426 fts_node = static_cast<fts_node_t*>(
1427 ib_vector_push(word->nodes, NULL));
1428
1429 memset(fts_node, 0x0, sizeof(*fts_node));
1430
1431 cache->total_size += sizeof(*fts_node);
1432 }
1433
1434 fts_cache_node_add_positions(
1435 cache, fts_node, doc_id, token->positions);
1436
1437 ut_free(rbt_remove_node(tokens, node));
1438 }
1439
1440 ut_a(rbt_empty(tokens));
1441
1442 /* Add to doc ids processed so far. */
1443 doc_stats = static_cast<fts_doc_stats_t*>(
1444 ib_vector_push(index_cache->doc_stats, NULL));
1445
1446 doc_stats->doc_id = doc_id;
1447 doc_stats->word_count = n_words;
1448
1449 /* Add the doc stats memory usage too. */
1450 cache->total_size += sizeof(*doc_stats);
1451
1452 if (doc_id > cache->sync->max_doc_id) {
1453 cache->sync->max_doc_id = doc_id;
1454 }
1455}
1456
1457/****************************************************************//**
1458Drops a table. If the table can't be found we return a SUCCESS code.
1459@return DB_SUCCESS or error code */
1460static MY_ATTRIBUTE((nonnull, warn_unused_result))
1461dberr_t
1462fts_drop_table(
1463/*===========*/
1464 trx_t* trx, /*!< in: transaction */
1465 const char* table_name) /*!< in: table to drop */
1466{
1467 dict_table_t* table;
1468 dberr_t error = DB_SUCCESS;
1469
1470 /* Check that the table exists in our data dictionary.
1471 Similar to regular drop table case, we will open table with
1472 DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1473 table = dict_table_open_on_name(
1474 table_name, TRUE, FALSE,
1475 static_cast<dict_err_ignore_t>(
1476 DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1477
1478 if (table != 0) {
1479
1480 dict_table_close(table, TRUE, FALSE);
1481
1482 /* Pass nonatomic=false (dont allow data dict unlock),
1483 because the transaction may hold locks on SYS_* tables from
1484 previous calls to fts_drop_table(). */
1485 error = row_drop_table_for_mysql(table_name, trx, true, false, false);
1486
1487 if (error != DB_SUCCESS) {
1488 ib::error() << "Unable to drop FTS index aux table "
1489 << table_name << ": " << ut_strerr(error);
1490 }
1491 } else {
1492 error = DB_FAIL;
1493 }
1494
1495 return(error);
1496}
1497
1498/****************************************************************//**
1499Rename a single auxiliary table due to database name change.
1500@return DB_SUCCESS or error code */
1501static MY_ATTRIBUTE((nonnull, warn_unused_result))
1502dberr_t
1503fts_rename_one_aux_table(
1504/*=====================*/
1505 const char* new_name, /*!< in: new parent tbl name */
1506 const char* fts_table_old_name, /*!< in: old aux tbl name */
1507 trx_t* trx) /*!< in: transaction */
1508{
1509 char fts_table_new_name[MAX_TABLE_NAME_LEN];
1510 ulint new_db_name_len = dict_get_db_name_len(new_name);
1511 ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1512 ulint table_new_name_len = strlen(fts_table_old_name)
1513 + new_db_name_len - old_db_name_len;
1514
1515 /* Check if the new and old database names are the same, if so,
1516 nothing to do */
1517 ut_ad((new_db_name_len != old_db_name_len)
1518 || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1519
1520 /* Get the database name from "new_name", and table name
1521 from the fts_table_old_name */
1522 strncpy(fts_table_new_name, new_name, new_db_name_len);
1523 strncpy(fts_table_new_name + new_db_name_len,
1524 strchr(fts_table_old_name, '/'),
1525 table_new_name_len - new_db_name_len);
1526 fts_table_new_name[table_new_name_len] = 0;
1527
1528 return(row_rename_table_for_mysql(
1529 fts_table_old_name, fts_table_new_name, trx, false));
1530}
1531
1532/****************************************************************//**
1533Rename auxiliary tables for all fts index for a table. This(rename)
1534is due to database name change
1535@return DB_SUCCESS or error code */
1536dberr_t
1537fts_rename_aux_tables(
1538/*==================*/
1539 dict_table_t* table, /*!< in: user Table */
1540 const char* new_name, /*!< in: new table name */
1541 trx_t* trx) /*!< in: transaction */
1542{
1543 ulint i;
1544 fts_table_t fts_table;
1545
1546 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1547
1548 /* Rename common auxiliary tables */
1549 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1550 char old_table_name[MAX_FULL_NAME_LEN];
1551 dberr_t err = DB_SUCCESS;
1552
1553 fts_table.suffix = fts_common_tables[i];
1554
1555 fts_get_table_name(&fts_table, old_table_name);
1556
1557 err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1558
1559 if (err != DB_SUCCESS) {
1560 return(err);
1561 }
1562 }
1563
1564 fts_t* fts = table->fts;
1565
1566 /* Rename index specific auxiliary tables */
1567 for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1568 ++i) {
1569 dict_index_t* index;
1570
1571 index = static_cast<dict_index_t*>(
1572 ib_vector_getp(fts->indexes, i));
1573
1574 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1575
1576 for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1577 dberr_t err;
1578 char old_table_name[MAX_FULL_NAME_LEN];
1579
1580 fts_table.suffix = fts_get_suffix(j);
1581
1582 fts_get_table_name(&fts_table, old_table_name);
1583
1584 err = fts_rename_one_aux_table(
1585 new_name, old_table_name, trx);
1586
1587 DBUG_EXECUTE_IF("fts_rename_failure",
1588 err = DB_DEADLOCK;
1589 fts_sql_rollback(trx););
1590
1591 if (err != DB_SUCCESS) {
1592 return(err);
1593 }
1594 }
1595 }
1596
1597 return(DB_SUCCESS);
1598}
1599
1600/****************************************************************//**
1601Drops the common ancillary tables needed for supporting an FTS index
1602on the given table. row_mysql_lock_data_dictionary must have been called
1603before this.
1604@return DB_SUCCESS or error code */
1605static MY_ATTRIBUTE((nonnull, warn_unused_result))
1606dberr_t
1607fts_drop_common_tables(
1608/*===================*/
1609 trx_t* trx, /*!< in: transaction */
1610 fts_table_t* fts_table) /*!< in: table with an FTS
1611 index */
1612{
1613 ulint i;
1614 dberr_t error = DB_SUCCESS;
1615
1616 for (i = 0; fts_common_tables[i] != NULL; ++i) {
1617 dberr_t err;
1618 char table_name[MAX_FULL_NAME_LEN];
1619
1620 fts_table->suffix = fts_common_tables[i];
1621
1622 fts_get_table_name(fts_table, table_name);
1623
1624 err = fts_drop_table(trx, table_name);
1625
1626 /* We only return the status of the last error. */
1627 if (err != DB_SUCCESS && err != DB_FAIL) {
1628 error = err;
1629 }
1630 }
1631
1632 return(error);
1633}
1634
1635/****************************************************************//**
1636Since we do a horizontal split on the index table, we need to drop
1637all the split tables.
1638@return DB_SUCCESS or error code */
1639static
1640dberr_t
1641fts_drop_index_split_tables(
1642/*========================*/
1643 trx_t* trx, /*!< in: transaction */
1644 dict_index_t* index) /*!< in: fts instance */
1645
1646{
1647 ulint i;
1648 fts_table_t fts_table;
1649 dberr_t error = DB_SUCCESS;
1650
1651 FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1652
1653 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1654 dberr_t err;
1655 char table_name[MAX_FULL_NAME_LEN];
1656
1657 fts_table.suffix = fts_get_suffix(i);
1658
1659 fts_get_table_name(&fts_table, table_name);
1660
1661 err = fts_drop_table(trx, table_name);
1662
1663 /* We only return the status of the last error. */
1664 if (err != DB_SUCCESS && err != DB_FAIL) {
1665 error = err;
1666 }
1667 }
1668
1669 return(error);
1670}
1671
1672/****************************************************************//**
1673Drops FTS auxiliary tables for an FTS index
1674@return DB_SUCCESS or error code */
1675dberr_t
1676fts_drop_index_tables(
1677/*==================*/
1678 trx_t* trx, /*!< in: transaction */
1679 dict_index_t* index) /*!< in: Index to drop */
1680{
1681 return(fts_drop_index_split_tables(trx, index));
1682}
1683
1684/****************************************************************//**
1685Drops FTS ancillary tables needed for supporting an FTS index
1686on the given table. row_mysql_lock_data_dictionary must have been called
1687before this.
1688@return DB_SUCCESS or error code */
1689static MY_ATTRIBUTE((nonnull, warn_unused_result))
1690dberr_t
1691fts_drop_all_index_tables(
1692/*======================*/
1693 trx_t* trx, /*!< in: transaction */
1694 fts_t* fts) /*!< in: fts instance */
1695{
1696 dberr_t error = DB_SUCCESS;
1697
1698 for (ulint i = 0;
1699 fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1700 ++i) {
1701
1702 dberr_t err;
1703 dict_index_t* index;
1704
1705 index = static_cast<dict_index_t*>(
1706 ib_vector_getp(fts->indexes, i));
1707
1708 err = fts_drop_index_tables(trx, index);
1709
1710 if (err != DB_SUCCESS) {
1711 error = err;
1712 }
1713 }
1714
1715 return(error);
1716}
1717
1718/*********************************************************************//**
1719Drops the ancillary tables needed for supporting an FTS index on a
1720given table. row_mysql_lock_data_dictionary must have been called before
1721this.
1722@return DB_SUCCESS or error code */
1723dberr_t
1724fts_drop_tables(
1725/*============*/
1726 trx_t* trx, /*!< in: transaction */
1727 dict_table_t* table) /*!< in: table has the FTS index */
1728{
1729 dberr_t error;
1730 fts_table_t fts_table;
1731
1732 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1733
1734 /* TODO: This is not atomic and can cause problems during recovery. */
1735
1736 error = fts_drop_common_tables(trx, &fts_table);
1737
1738 if (error == DB_SUCCESS) {
1739 error = fts_drop_all_index_tables(trx, table->fts);
1740 }
1741
1742 return(error);
1743}
1744
1745/** Create dict_table_t object for FTS Aux tables.
1746@param[in] aux_table_name FTS Aux table name
1747@param[in] table table object of FTS Index
1748@param[in] n_cols number of columns for FTS Aux table
1749@return table object for FTS Aux table */
1750static
1751dict_table_t*
1752fts_create_in_mem_aux_table(
1753 const char* aux_table_name,
1754 const dict_table_t* table,
1755 ulint n_cols)
1756{
1757 dict_table_t* new_table = dict_mem_table_create(
1758 aux_table_name, NULL, n_cols, 0, table->flags,
1759 table->space->id == TRX_SYS_SPACE
1760 ? 0 : table->space->purpose == FIL_TYPE_TEMPORARY
1761 ? DICT_TF2_TEMPORARY : DICT_TF2_USE_FILE_PER_TABLE);
1762
1763 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1764 ut_ad(table->data_dir_path != NULL);
1765 new_table->data_dir_path = mem_heap_strdup(
1766 new_table->heap, table->data_dir_path);
1767 }
1768
1769 return(new_table);
1770}
1771
1772/** Function to create on FTS common table.
1773@param[in,out] trx InnoDB transaction
1774@param[in] table Table that has FTS Index
1775@param[in] fts_table_name FTS AUX table name
1776@param[in] fts_suffix FTS AUX table suffix
1777@param[in,out] heap temporary memory heap
1778@return table object if created, else NULL */
1779static
1780dict_table_t*
1781fts_create_one_common_table(
1782 trx_t* trx,
1783 const dict_table_t* table,
1784 const char* fts_table_name,
1785 const char* fts_suffix,
1786 mem_heap_t* heap)
1787{
1788 dict_table_t* new_table = NULL;
1789 dberr_t error;
1790 bool is_config = strcmp(fts_suffix, "CONFIG") == 0;
1791
1792 if (!is_config) {
1793
1794 new_table = fts_create_in_mem_aux_table(
1795 fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
1796
1797 dict_mem_table_add_col(
1798 new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1799 FTS_DELETED_TABLE_COL_LEN);
1800 } else {
1801 /* Config table has different schema. */
1802 new_table = fts_create_in_mem_aux_table(
1803 fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
1804
1805 dict_mem_table_add_col(
1806 new_table, heap, "key", DATA_VARCHAR, 0,
1807 FTS_CONFIG_TABLE_KEY_COL_LEN);
1808
1809 dict_mem_table_add_col(
1810 new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
1811 FTS_CONFIG_TABLE_VALUE_COL_LEN);
1812 }
1813
1814 dict_table_add_system_columns(new_table, heap);
1815 error = row_create_table_for_mysql(new_table, trx,
1816 FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
1817
1818 if (error == DB_SUCCESS) {
1819
1820 dict_index_t* index = dict_mem_index_create(
1821 new_table, "FTS_COMMON_TABLE_IND",
1822 DICT_UNIQUE|DICT_CLUSTERED, 1);
1823
1824 if (!is_config) {
1825 dict_mem_index_add_field(index, "doc_id", 0);
1826 } else {
1827 dict_mem_index_add_field(index, "key", 0);
1828 }
1829
1830 /* We save and restore trx->dict_operation because
1831 row_create_index_for_mysql() changes the operation to
1832 TRX_DICT_OP_TABLE. */
1833 trx_dict_op_t op = trx_get_dict_operation(trx);
1834
1835 error = row_create_index_for_mysql(index, trx, NULL);
1836
1837 trx->dict_operation = op;
1838 }
1839
1840 if (error != DB_SUCCESS) {
1841 trx->error_state = error;
1842 dict_mem_table_free(new_table);
1843 new_table = NULL;
1844 ib::warn() << "Failed to create FTS common table "
1845 << fts_table_name;
1846 }
1847 return(new_table);
1848}
1849
1850/** Creates the common auxiliary tables needed for supporting an FTS index
1851on the given table. row_mysql_lock_data_dictionary must have been called
1852before this.
1853The following tables are created.
1854CREATE TABLE $FTS_PREFIX_DELETED
1855 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1856CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1857 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1858CREATE TABLE $FTS_PREFIX_BEING_DELETED
1859 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1860CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1861 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1862CREATE TABLE $FTS_PREFIX_CONFIG
1863 (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1864@param[in,out] trx transaction
1865@param[in,out] table table with FTS index
1866@param[in] skip_doc_id_index Skip index on doc id
1867@return DB_SUCCESS if succeed */
1868dberr_t
1869fts_create_common_tables(
1870 trx_t* trx,
1871 dict_table_t* table,
1872 bool skip_doc_id_index)
1873{
1874 dberr_t error;
1875 que_t* graph;
1876 fts_table_t fts_table;
1877 mem_heap_t* heap = mem_heap_create(1024);
1878 pars_info_t* info;
1879 char fts_name[MAX_FULL_NAME_LEN];
1880 char full_name[sizeof(fts_common_tables) / sizeof(char*)]
1881 [MAX_FULL_NAME_LEN];
1882
1883 dict_index_t* index = NULL;
1884 trx_dict_op_t op;
1885 /* common_tables vector is used for dropping FTS common tables
1886 on error condition. */
1887 std::vector<dict_table_t*> common_tables;
1888 std::vector<dict_table_t*>::const_iterator it;
1889
1890 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1891
1892 error = fts_drop_common_tables(trx, &fts_table);
1893
1894 if (error != DB_SUCCESS) {
1895
1896 goto func_exit;
1897 }
1898
1899 /* Create the FTS tables that are common to an FTS index. */
1900 for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
1901
1902 fts_table.suffix = fts_common_tables[i];
1903 fts_get_table_name(&fts_table, full_name[i]);
1904 dict_table_t* common_table = fts_create_one_common_table(
1905 trx, table, full_name[i], fts_table.suffix, heap);
1906
1907 if (common_table == NULL) {
1908 error = DB_ERROR;
1909 goto func_exit;
1910 } else {
1911 common_tables.push_back(common_table);
1912 }
1913
1914 mem_heap_empty(heap);
1915
1916 DBUG_EXECUTE_IF("ib_fts_aux_table_error",
1917 /* Return error after creating FTS_AUX_CONFIG table. */
1918 if (i == 4) {
1919 error = DB_ERROR;
1920 goto func_exit;
1921 }
1922 );
1923
1924 }
1925
1926 /* Write the default settings to the config table. */
1927 info = pars_info_create();
1928
1929 fts_table.suffix = "CONFIG";
1930 fts_get_table_name(&fts_table, fts_name);
1931 pars_info_bind_id(info, true, "config_table", fts_name);
1932
1933 graph = fts_parse_sql_no_dict_lock(
1934 info, fts_config_table_insert_values_sql);
1935
1936 error = fts_eval_sql(trx, graph);
1937
1938 que_graph_free(graph);
1939
1940 if (error != DB_SUCCESS || skip_doc_id_index) {
1941
1942 goto func_exit;
1943 }
1944
1945 index = dict_mem_index_create(table, FTS_DOC_ID_INDEX_NAME,
1946 DICT_UNIQUE, 1);
1947 dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
1948
1949 op = trx_get_dict_operation(trx);
1950
1951 error = row_create_index_for_mysql(index, trx, NULL);
1952
1953 trx->dict_operation = op;
1954
1955func_exit:
1956 if (error != DB_SUCCESS) {
1957 for (it = common_tables.begin(); it != common_tables.end();
1958 ++it) {
1959 row_drop_table_for_mysql(
1960 (*it)->name.m_name, trx, true, FALSE);
1961 }
1962 }
1963
1964 common_tables.clear();
1965 mem_heap_free(heap);
1966
1967 return(error);
1968}
1969
1970/** Create one FTS auxiliary index table for an FTS index.
1971@param[in,out] trx transaction
1972@param[in] index the index instance
1973@param[in] fts_table fts_table structure
1974@param[in,out] heap temporary memory heap
1975@see row_merge_create_fts_sort_index()
1976@return DB_SUCCESS or error code */
1977static
1978dict_table_t*
1979fts_create_one_index_table(
1980 trx_t* trx,
1981 const dict_index_t* index,
1982 const fts_table_t* fts_table,
1983 mem_heap_t* heap)
1984{
1985 dict_field_t* field;
1986 dict_table_t* new_table = NULL;
1987 char table_name[MAX_FULL_NAME_LEN];
1988 dberr_t error;
1989 CHARSET_INFO* charset;
1990
1991 ut_ad(index->type & DICT_FTS);
1992
1993 fts_get_table_name(fts_table, table_name);
1994
1995 new_table = fts_create_in_mem_aux_table(
1996 table_name, fts_table->table,
1997 FTS_AUX_INDEX_TABLE_NUM_COLS);
1998
1999 field = dict_index_get_nth_field(index, 0);
2000 charset = fts_get_charset(field->col->prtype);
2001
2002 dict_mem_table_add_col(new_table, heap, "word",
2003 charset == &my_charset_latin1
2004 ? DATA_VARCHAR : DATA_VARMYSQL,
2005 field->col->prtype,
2006 FTS_MAX_WORD_LEN_IN_CHAR
2007 * unsigned(field->col->mbmaxlen));
2008
2009 dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
2010 DATA_NOT_NULL | DATA_UNSIGNED,
2011 FTS_INDEX_FIRST_DOC_ID_LEN);
2012
2013 dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
2014 DATA_NOT_NULL | DATA_UNSIGNED,
2015 FTS_INDEX_LAST_DOC_ID_LEN);
2016
2017 dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2018 DATA_NOT_NULL | DATA_UNSIGNED,
2019 FTS_INDEX_DOC_COUNT_LEN);
2020
2021 /* The precise type calculation is as follows:
2022 least signficiant byte: MySQL type code (not applicable for sys cols)
2023 second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2024 third least : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2025
2026 dict_mem_table_add_col(
2027 new_table, heap, "ilist", DATA_BLOB,
2028 (DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2029 FTS_INDEX_ILIST_LEN);
2030
2031 dict_table_add_system_columns(new_table, heap);
2032 error = row_create_table_for_mysql(new_table, trx,
2033 FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
2034
2035 if (error == DB_SUCCESS) {
2036 dict_index_t* index = dict_mem_index_create(
2037 new_table, "FTS_INDEX_TABLE_IND",
2038 DICT_UNIQUE|DICT_CLUSTERED, 2);
2039 dict_mem_index_add_field(index, "word", 0);
2040 dict_mem_index_add_field(index, "first_doc_id", 0);
2041
2042 trx_dict_op_t op = trx_get_dict_operation(trx);
2043
2044 error = row_create_index_for_mysql(index, trx, NULL);
2045
2046 trx->dict_operation = op;
2047 }
2048
2049 if (error != DB_SUCCESS) {
2050 trx->error_state = error;
2051 dict_mem_table_free(new_table);
2052 new_table = NULL;
2053 ib::warn() << "Failed to create FTS index table "
2054 << table_name;
2055 }
2056
2057 return(new_table);
2058}
2059
2060/** Creates the column specific ancillary tables needed for supporting an
2061FTS index on the given table. row_mysql_lock_data_dictionary must have
2062been called before this.
2063
2064All FTS AUX Index tables have the following schema.
2065CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2066 word VARCHAR(FTS_MAX_WORD_LEN),
2067 first_doc_id INT NOT NULL,
2068 last_doc_id UNSIGNED NOT NULL,
2069 doc_count UNSIGNED INT NOT NULL,
2070 ilist VARBINARY NOT NULL,
2071 UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2072@param[in,out] trx dictionary transaction
2073@param[in] index fulltext index
2074@param[in] id table id
2075@return DB_SUCCESS or error code */
2076dberr_t
2077fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id)
2078{
2079 ulint i;
2080 fts_table_t fts_table;
2081 dberr_t error = DB_SUCCESS;
2082 mem_heap_t* heap = mem_heap_create(1024);
2083
2084 fts_table.type = FTS_INDEX_TABLE;
2085 fts_table.index_id = index->id;
2086 fts_table.table_id = id;
2087 fts_table.parent = index->table->name.m_name;
2088 fts_table.table = index->table;
2089
2090 /* aux_idx_tables vector is used for dropping FTS AUX INDEX
2091 tables on error condition. */
2092 std::vector<dict_table_t*> aux_idx_tables;
2093 std::vector<dict_table_t*>::const_iterator it;
2094
2095 for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2096 dict_table_t* new_table;
2097
2098 /* Create the FTS auxiliary tables that are specific
2099 to an FTS index. We need to preserve the table_id %s
2100 which fts_parse_sql_no_dict_lock() will fill in for us. */
2101 fts_table.suffix = fts_get_suffix(i);
2102
2103 new_table = fts_create_one_index_table(
2104 trx, index, &fts_table, heap);
2105
2106 if (new_table == NULL) {
2107 error = DB_FAIL;
2108 break;
2109 } else {
2110 aux_idx_tables.push_back(new_table);
2111 }
2112
2113 mem_heap_empty(heap);
2114
2115 DBUG_EXECUTE_IF("ib_fts_index_table_error",
2116 /* Return error after creating FTS_INDEX_5
2117 aux table. */
2118 if (i == 4) {
2119 error = DB_FAIL;
2120 break;
2121 }
2122 );
2123 }
2124
2125 if (error != DB_SUCCESS) {
2126
2127 for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
2128 ++it) {
2129 row_drop_table_for_mysql(
2130 (*it)->name.m_name, trx, true, FALSE);
2131 }
2132 }
2133
2134 aux_idx_tables.clear();
2135 mem_heap_free(heap);
2136
2137 return(error);
2138}
2139
2140#if 0
2141/******************************************************************//**
2142Return string representation of state. */
2143static
2144const char*
2145fts_get_state_str(
2146/*==============*/
2147 /* out: string representation of state */
2148 fts_row_state state) /*!< in: state */
2149{
2150 switch (state) {
2151 case FTS_INSERT:
2152 return("INSERT");
2153
2154 case FTS_MODIFY:
2155 return("MODIFY");
2156
2157 case FTS_DELETE:
2158 return("DELETE");
2159
2160 case FTS_NOTHING:
2161 return("NOTHING");
2162
2163 case FTS_INVALID:
2164 return("INVALID");
2165
2166 default:
2167 return("UNKNOWN");
2168 }
2169}
2170#endif
2171
2172/******************************************************************//**
2173Calculate the new state of a row given the existing state and a new event.
2174@return new state of row */
2175static
2176fts_row_state
2177fts_trx_row_get_new_state(
2178/*======================*/
2179 fts_row_state old_state, /*!< in: existing state of row */
2180 fts_row_state event) /*!< in: new event */
2181{
2182 /* The rules for transforming states:
2183
2184 I = inserted
2185 M = modified
2186 D = deleted
2187 N = nothing
2188
2189 M+D -> D:
2190
2191 If the row existed before the transaction started and it is modified
2192 during the transaction, followed by a deletion of the row, only the
2193 deletion will be signaled.
2194
2195 M+ -> M:
2196
2197 If the row existed before the transaction started and it is modified
2198 more than once during the transaction, only the last modification
2199 will be signaled.
2200
2201 IM*D -> N:
2202
2203 If a new row is added during the transaction (and possibly modified
2204 after its initial insertion) but it is deleted before the end of the
2205 transaction, nothing will be signaled.
2206
2207 IM* -> I:
2208
2209 If a new row is added during the transaction and modified after its
2210 initial insertion, only the addition will be signaled.
2211
2212 M*DI -> M:
2213
2214 If the row existed before the transaction started and it is deleted,
2215 then re-inserted, only a modification will be signaled. Note that
2216 this case is only possible if the table is using the row's primary
2217 key for FTS row ids, since those can be re-inserted by the user,
2218 which is not true for InnoDB generated row ids.
2219
2220 It is easily seen that the above rules decompose such that we do not
2221 need to store the row's entire history of events. Instead, we can
2222 store just one state for the row and update that when new events
2223 arrive. Then we can implement the above rules as a two-dimensional
2224 look-up table, and get checking of invalid combinations "for free"
2225 in the process. */
2226
2227 /* The lookup table for transforming states. old_state is the
2228 Y-axis, event is the X-axis. */
2229 static const fts_row_state table[4][4] = {
2230 /* I M D N */
2231 /* I */ { FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID },
2232 /* M */ { FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID },
2233 /* D */ { FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID },
2234 /* N */ { FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2235 };
2236
2237 fts_row_state result;
2238
2239 ut_a(old_state < FTS_INVALID);
2240 ut_a(event < FTS_INVALID);
2241
2242 result = table[(int) old_state][(int) event];
2243 ut_a(result != FTS_INVALID);
2244
2245 return(result);
2246}
2247
2248/******************************************************************//**
2249Create a savepoint instance.
2250@return savepoint instance */
2251static
2252fts_savepoint_t*
2253fts_savepoint_create(
2254/*=================*/
2255 ib_vector_t* savepoints, /*!< out: InnoDB transaction */
2256 const char* name, /*!< in: savepoint name */
2257 mem_heap_t* heap) /*!< in: heap */
2258{
2259 fts_savepoint_t* savepoint;
2260
2261 savepoint = static_cast<fts_savepoint_t*>(
2262 ib_vector_push(savepoints, NULL));
2263
2264 memset(savepoint, 0x0, sizeof(*savepoint));
2265
2266 if (name) {
2267 savepoint->name = mem_heap_strdup(heap, name);
2268 }
2269
2270 savepoint->tables = rbt_create(
2271 sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2272
2273 return(savepoint);
2274}
2275
2276/******************************************************************//**
2277Create an FTS trx.
2278@return FTS trx */
2279fts_trx_t*
2280fts_trx_create(
2281/*===========*/
2282 trx_t* trx) /*!< in/out: InnoDB
2283 transaction */
2284{
2285 fts_trx_t* ftt;
2286 ib_alloc_t* heap_alloc;
2287 mem_heap_t* heap = mem_heap_create(1024);
2288 trx_named_savept_t* savep;
2289
2290 ut_a(trx->fts_trx == NULL);
2291
2292 ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2293 ftt->trx = trx;
2294 ftt->heap = heap;
2295
2296 heap_alloc = ib_heap_allocator_create(heap);
2297
2298 ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2299 heap_alloc, sizeof(fts_savepoint_t), 4));
2300
2301 ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2302 heap_alloc, sizeof(fts_savepoint_t), 4));
2303
2304 /* Default instance has no name and no heap. */
2305 fts_savepoint_create(ftt->savepoints, NULL, NULL);
2306 fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2307
2308 /* Copy savepoints that already set before. */
2309 for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2310 savep != NULL;
2311 savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2312
2313 fts_savepoint_take(ftt, savep->name);
2314 }
2315
2316 return(ftt);
2317}
2318
2319/******************************************************************//**
2320Create an FTS trx table.
2321@return FTS trx table */
2322static
2323fts_trx_table_t*
2324fts_trx_table_create(
2325/*=================*/
2326 fts_trx_t* fts_trx, /*!< in: FTS trx */
2327 dict_table_t* table) /*!< in: table */
2328{
2329 fts_trx_table_t* ftt;
2330
2331 ftt = static_cast<fts_trx_table_t*>(
2332 mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2333
2334 memset(ftt, 0x0, sizeof(*ftt));
2335
2336 ftt->table = table;
2337 ftt->fts_trx = fts_trx;
2338
2339 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2340
2341 return(ftt);
2342}
2343
2344/******************************************************************//**
2345Clone an FTS trx table.
2346@return FTS trx table */
2347static
2348fts_trx_table_t*
2349fts_trx_table_clone(
2350/*=================*/
2351 const fts_trx_table_t* ftt_src) /*!< in: FTS trx */
2352{
2353 fts_trx_table_t* ftt;
2354
2355 ftt = static_cast<fts_trx_table_t*>(
2356 mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2357
2358 memset(ftt, 0x0, sizeof(*ftt));
2359
2360 ftt->table = ftt_src->table;
2361 ftt->fts_trx = ftt_src->fts_trx;
2362
2363 ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2364
2365 /* Copy the rb tree values to the new savepoint. */
2366 rbt_merge_uniq(ftt->rows, ftt_src->rows);
2367
2368 /* These are only added on commit. At this stage we only have
2369 the updated row state. */
2370 ut_a(ftt_src->added_doc_ids == NULL);
2371
2372 return(ftt);
2373}
2374
2375/******************************************************************//**
2376Initialize the FTS trx instance.
2377@return FTS trx instance */
2378static
2379fts_trx_table_t*
2380fts_trx_init(
2381/*=========*/
2382 trx_t* trx, /*!< in: transaction */
2383 dict_table_t* table, /*!< in: FTS table instance */
2384 ib_vector_t* savepoints) /*!< in: Savepoints */
2385{
2386 fts_trx_table_t* ftt;
2387 ib_rbt_bound_t parent;
2388 ib_rbt_t* tables;
2389 fts_savepoint_t* savepoint;
2390
2391 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2392
2393 tables = savepoint->tables;
2394 rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2395
2396 if (parent.result == 0) {
2397 fts_trx_table_t** fttp;
2398
2399 fttp = rbt_value(fts_trx_table_t*, parent.last);
2400 ftt = *fttp;
2401 } else {
2402 ftt = fts_trx_table_create(trx->fts_trx, table);
2403 rbt_add_node(tables, &parent, &ftt);
2404 }
2405
2406 ut_a(ftt->table == table);
2407
2408 return(ftt);
2409}
2410
2411/******************************************************************//**
2412Notify the FTS system about an operation on an FTS-indexed table. */
2413static
2414void
2415fts_trx_table_add_op(
2416/*=================*/
2417 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2418 doc_id_t doc_id, /*!< in: doc id */
2419 fts_row_state state, /*!< in: state of the row */
2420 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected */
2421{
2422 ib_rbt_t* rows;
2423 ib_rbt_bound_t parent;
2424
2425 rows = ftt->rows;
2426 rbt_search(rows, &parent, &doc_id);
2427
2428 /* Row id found, update state, and if new state is FTS_NOTHING,
2429 we delete the row from our tree. */
2430 if (parent.result == 0) {
2431 fts_trx_row_t* row = rbt_value(fts_trx_row_t, parent.last);
2432
2433 row->state = fts_trx_row_get_new_state(row->state, state);
2434
2435 if (row->state == FTS_NOTHING) {
2436 if (row->fts_indexes) {
2437 ib_vector_free(row->fts_indexes);
2438 }
2439
2440 ut_free(rbt_remove_node(rows, parent.last));
2441 row = NULL;
2442 } else if (row->fts_indexes != NULL) {
2443 ib_vector_free(row->fts_indexes);
2444 row->fts_indexes = fts_indexes;
2445 }
2446
2447 } else { /* Row-id not found, create a new one. */
2448 fts_trx_row_t row;
2449
2450 row.doc_id = doc_id;
2451 row.state = state;
2452 row.fts_indexes = fts_indexes;
2453
2454 rbt_add_node(rows, &parent, &row);
2455 }
2456}
2457
2458/******************************************************************//**
2459Notify the FTS system about an operation on an FTS-indexed table. */
2460void
2461fts_trx_add_op(
2462/*===========*/
2463 trx_t* trx, /*!< in: InnoDB transaction */
2464 dict_table_t* table, /*!< in: table */
2465 doc_id_t doc_id, /*!< in: new doc id */
2466 fts_row_state state, /*!< in: state of the row */
2467 ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
2468 (NULL=all) */
2469{
2470 fts_trx_table_t* tran_ftt;
2471 fts_trx_table_t* stmt_ftt;
2472
2473 if (!trx->fts_trx) {
2474 trx->fts_trx = fts_trx_create(trx);
2475 }
2476
2477 tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2478 stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2479
2480 fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2481 fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2482}
2483
2484/******************************************************************//**
2485Fetch callback that converts a textual document id to a binary value and
2486stores it in the given place.
2487@return always returns NULL */
2488static
2489ibool
2490fts_fetch_store_doc_id(
2491/*===================*/
2492 void* row, /*!< in: sel_node_t* */
2493 void* user_arg) /*!< in: doc_id_t* to store
2494 doc_id in */
2495{
2496 int n_parsed;
2497 sel_node_t* node = static_cast<sel_node_t*>(row);
2498 doc_id_t* doc_id = static_cast<doc_id_t*>(user_arg);
2499 dfield_t* dfield = que_node_get_val(node->select_list);
2500 dtype_t* type = dfield_get_type(dfield);
2501 ulint len = dfield_get_len(dfield);
2502
2503 char buf[32];
2504
2505 ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2506 ut_a(len > 0 && len < sizeof(buf));
2507
2508 memcpy(buf, dfield_get_data(dfield), len);
2509 buf[len] = '\0';
2510
2511 n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2512 ut_a(n_parsed == 1);
2513
2514 return(FALSE);
2515}
2516
2517#ifdef FTS_CACHE_SIZE_DEBUG
2518/******************************************************************//**
2519Get the max cache size in bytes. If there is an error reading the
2520value we simply print an error message here and return the default
2521value to the caller.
2522@return max cache size in bytes */
2523static
2524ulint
2525fts_get_max_cache_size(
2526/*===================*/
2527 trx_t* trx, /*!< in: transaction */
2528 fts_table_t* fts_table) /*!< in: table instance */
2529{
2530 dberr_t error;
2531 fts_string_t value;
2532 ulong cache_size_in_mb;
2533
2534 /* Set to the default value. */
2535 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2536
2537 /* We set the length of value to the max bytes it can hold. This
2538 information is used by the callback that reads the value. */
2539 value.f_n_char = 0;
2540 value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2541 value.f_str = ut_malloc_nokey(value.f_len + 1);
2542
2543 error = fts_config_get_value(
2544 trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2545
2546 if (error == DB_SUCCESS) {
2547
2548 value.f_str[value.f_len] = 0;
2549 cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2550
2551 if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2552
2553 ib::warn() << "FTS max cache size ("
2554 << cache_size_in_mb << ") out of range."
2555 " Minimum value is "
2556 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2557 << "MB and the maximum value is "
2558 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2559 << "MB, setting cache size to upper limit";
2560
2561 cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2562
2563 } else if (cache_size_in_mb
2564 < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2565
2566 ib::warn() << "FTS max cache size ("
2567 << cache_size_in_mb << ") out of range."
2568 " Minimum value is "
2569 << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2570 << "MB and the maximum value is"
2571 << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2572 << "MB, setting cache size to lower limit";
2573
2574 cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2575 }
2576 } else {
2577 ib::error() << "(" << ut_strerr(error) << ") reading max"
2578 " cache config value from config table";
2579 }
2580
2581 ut_free(value.f_str);
2582
2583 return(cache_size_in_mb * 1024 * 1024);
2584}
2585#endif
2586
2587/*********************************************************************//**
2588Update the next and last Doc ID in the CONFIG table to be the input
2589"doc_id" value (+ 1). We would do so after each FTS index build or
2590table truncate */
2591void
2592fts_update_next_doc_id(
2593/*===================*/
2594 trx_t* trx, /*!< in/out: transaction */
2595 const dict_table_t* table, /*!< in: table */
2596 const char* table_name, /*!< in: table name, or NULL */
2597 doc_id_t doc_id) /*!< in: DOC ID to set */
2598{
2599 table->fts->cache->synced_doc_id = doc_id;
2600 table->fts->cache->next_doc_id = doc_id + 1;
2601
2602 table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2603
2604 fts_update_sync_doc_id(
2605 table, table_name, table->fts->cache->synced_doc_id, trx);
2606
2607}
2608
2609/*********************************************************************//**
2610Get the next available document id.
2611@return DB_SUCCESS if OK */
2612dberr_t
2613fts_get_next_doc_id(
2614/*================*/
2615 const dict_table_t* table, /*!< in: table */
2616 doc_id_t* doc_id) /*!< out: new document id */
2617{
2618 fts_cache_t* cache = table->fts->cache;
2619
2620 /* If the Doc ID system has not yet been initialized, we
2621 will consult the CONFIG table and user table to re-establish
2622 the initial value of the Doc ID */
2623 if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2624 fts_init_doc_id(table);
2625 }
2626
2627 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2628 *doc_id = FTS_NULL_DOC_ID;
2629 return(DB_SUCCESS);
2630 }
2631
2632 DEBUG_SYNC_C("get_next_FTS_DOC_ID");
2633 mutex_enter(&cache->doc_id_lock);
2634 *doc_id = cache->next_doc_id++;
2635 mutex_exit(&cache->doc_id_lock);
2636
2637 return(DB_SUCCESS);
2638}
2639
2640/*********************************************************************//**
2641This function fetch the Doc ID from CONFIG table, and compare with
2642the Doc ID supplied. And store the larger one to the CONFIG table.
2643@return DB_SUCCESS if OK */
2644static MY_ATTRIBUTE((nonnull))
2645dberr_t
2646fts_cmp_set_sync_doc_id(
2647/*====================*/
2648 const dict_table_t* table, /*!< in: table */
2649 doc_id_t doc_id_cmp, /*!< in: Doc ID to compare */
2650 ibool read_only, /*!< in: TRUE if read the
2651 synced_doc_id only */
2652 doc_id_t* doc_id) /*!< out: larger document id
2653 after comparing "doc_id_cmp"
2654 to the one stored in CONFIG
2655 table */
2656{
2657 trx_t* trx;
2658 pars_info_t* info;
2659 dberr_t error;
2660 fts_table_t fts_table;
2661 que_t* graph = NULL;
2662 fts_cache_t* cache = table->fts->cache;
2663 char table_name[MAX_FULL_NAME_LEN];
2664retry:
2665 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2666
2667 fts_table.suffix = "CONFIG";
2668 fts_table.table_id = table->id;
2669 fts_table.type = FTS_COMMON_TABLE;
2670 fts_table.table = table;
2671
2672 fts_table.parent = table->name.m_name;
2673
2674 trx = trx_create();
2675 if (srv_read_only_mode) {
2676 trx_start_internal_read_only(trx);
2677 } else {
2678 trx_start_internal(trx);
2679 }
2680
2681 trx->op_info = "update the next FTS document id";
2682
2683 info = pars_info_create();
2684
2685 pars_info_bind_function(
2686 info, "my_func", fts_fetch_store_doc_id, doc_id);
2687
2688 fts_get_table_name(&fts_table, table_name);
2689 pars_info_bind_id(info, true, "config_table", table_name);
2690
2691 graph = fts_parse_sql(
2692 &fts_table, info,
2693 "DECLARE FUNCTION my_func;\n"
2694 "DECLARE CURSOR c IS SELECT value FROM $config_table"
2695 " WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2696 "BEGIN\n"
2697 ""
2698 "OPEN c;\n"
2699 "WHILE 1 = 1 LOOP\n"
2700 " FETCH c INTO my_func();\n"
2701 " IF c % NOTFOUND THEN\n"
2702 " EXIT;\n"
2703 " END IF;\n"
2704 "END LOOP;\n"
2705 "CLOSE c;");
2706
2707 *doc_id = 0;
2708
2709 error = fts_eval_sql(trx, graph);
2710
2711 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2712
2713 // FIXME: We need to retry deadlock errors
2714 if (error != DB_SUCCESS) {
2715 goto func_exit;
2716 }
2717
2718 if (read_only) {
2719 goto func_exit;
2720 }
2721
2722 if (doc_id_cmp == 0 && *doc_id) {
2723 cache->synced_doc_id = *doc_id - 1;
2724 } else {
2725 cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id);
2726 }
2727
2728 mutex_enter(&cache->doc_id_lock);
2729 /* For each sync operation, we will add next_doc_id by 1,
2730 so to mark a sync operation */
2731 if (cache->next_doc_id < cache->synced_doc_id + 1) {
2732 cache->next_doc_id = cache->synced_doc_id + 1;
2733 }
2734 mutex_exit(&cache->doc_id_lock);
2735
2736 if (doc_id_cmp > *doc_id) {
2737 error = fts_update_sync_doc_id(
2738 table, table->name.m_name, cache->synced_doc_id, trx);
2739 }
2740
2741 *doc_id = cache->next_doc_id;
2742
2743func_exit:
2744
2745 if (error == DB_SUCCESS) {
2746 fts_sql_commit(trx);
2747 } else {
2748 *doc_id = 0;
2749
2750 ib::error() << "(" << ut_strerr(error) << ") while getting"
2751 " next doc id.";
2752 fts_sql_rollback(trx);
2753
2754 if (error == DB_DEADLOCK) {
2755 os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2756 goto retry;
2757 }
2758 }
2759
2760 trx_free(trx);
2761
2762 return(error);
2763}
2764
2765/*********************************************************************//**
2766Update the last document id. This function could create a new
2767transaction to update the last document id.
2768@return DB_SUCCESS if OK */
2769static
2770dberr_t
2771fts_update_sync_doc_id(
2772/*===================*/
2773 const dict_table_t* table, /*!< in: table */
2774 const char* table_name, /*!< in: table name, or NULL */
2775 doc_id_t doc_id, /*!< in: last document id */
2776 trx_t* trx) /*!< in: update trx, or NULL */
2777{
2778 byte id[FTS_MAX_ID_LEN];
2779 pars_info_t* info;
2780 fts_table_t fts_table;
2781 ulint id_len;
2782 que_t* graph = NULL;
2783 dberr_t error;
2784 ibool local_trx = FALSE;
2785 fts_cache_t* cache = table->fts->cache;
2786 char fts_name[MAX_FULL_NAME_LEN];
2787
2788 if (srv_read_only_mode) {
2789 return DB_READ_ONLY;
2790 }
2791
2792 fts_table.suffix = "CONFIG";
2793 fts_table.table_id = table->id;
2794 fts_table.type = FTS_COMMON_TABLE;
2795 fts_table.table = table;
2796 if (table_name) {
2797 fts_table.parent = table_name;
2798 } else {
2799 fts_table.parent = table->name.m_name;
2800 }
2801
2802 if (!trx) {
2803 trx = trx_create();
2804 trx_start_internal(trx);
2805
2806 trx->op_info = "setting last FTS document id";
2807 local_trx = TRUE;
2808 }
2809
2810 info = pars_info_create();
2811
2812 id_len = (ulint) snprintf(
2813 (char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2814
2815 pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2816
2817 fts_get_table_name(&fts_table, fts_name);
2818 pars_info_bind_id(info, true, "table_name", fts_name);
2819
2820 graph = fts_parse_sql(
2821 &fts_table, info,
2822 "BEGIN"
2823 " UPDATE $table_name SET value = :doc_id"
2824 " WHERE key = 'synced_doc_id';");
2825
2826 error = fts_eval_sql(trx, graph);
2827
2828 fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2829
2830 if (local_trx) {
2831 if (error == DB_SUCCESS) {
2832 fts_sql_commit(trx);
2833 cache->synced_doc_id = doc_id;
2834 } else {
2835
2836 ib::error() << "(" << ut_strerr(error) << ") while"
2837 " updating last doc id.";
2838
2839 fts_sql_rollback(trx);
2840 }
2841 trx_free(trx);
2842 }
2843
2844 return(error);
2845}
2846
2847/*********************************************************************//**
2848Create a new fts_doc_ids_t.
2849@return new fts_doc_ids_t */
2850fts_doc_ids_t*
2851fts_doc_ids_create(void)
2852/*====================*/
2853{
2854 fts_doc_ids_t* fts_doc_ids;
2855 mem_heap_t* heap = mem_heap_create(512);
2856
2857 fts_doc_ids = static_cast<fts_doc_ids_t*>(
2858 mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2859
2860 fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2861
2862 fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2863 fts_doc_ids->self_heap, sizeof(fts_update_t), 32));
2864
2865 return(fts_doc_ids);
2866}
2867
2868/*********************************************************************//**
2869Free a fts_doc_ids_t. */
2870void
2871fts_doc_ids_free(
2872/*=============*/
2873 fts_doc_ids_t* fts_doc_ids)
2874{
2875 mem_heap_t* heap = static_cast<mem_heap_t*>(
2876 fts_doc_ids->self_heap->arg);
2877
2878 memset(fts_doc_ids, 0, sizeof(*fts_doc_ids));
2879
2880 mem_heap_free(heap);
2881}
2882
2883/*********************************************************************//**
2884Do commit-phase steps necessary for the insertion of a new row. */
2885void
2886fts_add(
2887/*====*/
2888 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2889 fts_trx_row_t* row) /*!< in: row */
2890{
2891 dict_table_t* table = ftt->table;
2892 doc_id_t doc_id = row->doc_id;
2893
2894 ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
2895
2896 fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
2897
2898 mutex_enter(&table->fts->cache->deleted_lock);
2899 ++table->fts->cache->added;
2900 mutex_exit(&table->fts->cache->deleted_lock);
2901
2902 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
2903 && doc_id >= table->fts->cache->next_doc_id) {
2904 table->fts->cache->next_doc_id = doc_id + 1;
2905 }
2906}
2907
2908/*********************************************************************//**
2909Do commit-phase steps necessary for the deletion of a row.
2910@return DB_SUCCESS or error code */
2911static MY_ATTRIBUTE((nonnull, warn_unused_result))
2912dberr_t
2913fts_delete(
2914/*=======*/
2915 fts_trx_table_t*ftt, /*!< in: FTS trx table */
2916 fts_trx_row_t* row) /*!< in: row */
2917{
2918 que_t* graph;
2919 fts_table_t fts_table;
2920 dberr_t error = DB_SUCCESS;
2921 doc_id_t write_doc_id;
2922 dict_table_t* table = ftt->table;
2923 doc_id_t doc_id = row->doc_id;
2924 trx_t* trx = ftt->fts_trx->trx;
2925 pars_info_t* info = pars_info_create();
2926 fts_cache_t* cache = table->fts->cache;
2927
2928 /* we do not index Documents whose Doc ID value is 0 */
2929 if (doc_id == FTS_NULL_DOC_ID) {
2930 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
2931 return(error);
2932 }
2933
2934 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2935
2936 FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
2937
2938 /* Convert to "storage" byte order. */
2939 fts_write_doc_id((byte*) &write_doc_id, doc_id);
2940 fts_bind_doc_id(info, "doc_id", &write_doc_id);
2941
2942 /* It is possible we update a record that has not yet been sync-ed
2943 into cache from last crash (delete Doc will not initialize the
2944 sync). Avoid any added counter accounting until the FTS cache
2945 is re-established and sync-ed */
2946 if (table->fts->fts_status & ADDED_TABLE_SYNCED
2947 && doc_id > cache->synced_doc_id) {
2948 mutex_enter(&table->fts->cache->deleted_lock);
2949
2950 /* The Doc ID could belong to those left in
2951 ADDED table from last crash. So need to check
2952 if it is less than first_doc_id when we initialize
2953 the Doc ID system after reboot */
2954 if (doc_id >= table->fts->cache->first_doc_id
2955 && table->fts->cache->added > 0) {
2956 --table->fts->cache->added;
2957 }
2958
2959 mutex_exit(&table->fts->cache->deleted_lock);
2960
2961 /* Only if the row was really deleted. */
2962 ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2963 }
2964
2965 /* Note the deleted document for OPTIMIZE to purge. */
2966 if (error == DB_SUCCESS) {
2967 char table_name[MAX_FULL_NAME_LEN];
2968
2969 trx->op_info = "adding doc id to FTS DELETED";
2970
2971 info->graph_owns_us = TRUE;
2972
2973 fts_table.suffix = "DELETED";
2974
2975 fts_get_table_name(&fts_table, table_name);
2976 pars_info_bind_id(info, true, "deleted", table_name);
2977
2978 graph = fts_parse_sql(
2979 &fts_table,
2980 info,
2981 "BEGIN INSERT INTO $deleted VALUES (:doc_id);");
2982
2983 error = fts_eval_sql(trx, graph);
2984
2985 fts_que_graph_free(graph);
2986 } else {
2987 pars_info_free(info);
2988 }
2989
2990 /* Increment the total deleted count, this is used to calculate the
2991 number of documents indexed. */
2992 if (error == DB_SUCCESS) {
2993 mutex_enter(&table->fts->cache->deleted_lock);
2994
2995 ++table->fts->cache->deleted;
2996
2997 mutex_exit(&table->fts->cache->deleted_lock);
2998 }
2999
3000 return(error);
3001}
3002
3003/*********************************************************************//**
3004Do commit-phase steps necessary for the modification of a row.
3005@return DB_SUCCESS or error code */
3006static MY_ATTRIBUTE((nonnull, warn_unused_result))
3007dberr_t
3008fts_modify(
3009/*=======*/
3010 fts_trx_table_t* ftt, /*!< in: FTS trx table */
3011 fts_trx_row_t* row) /*!< in: row */
3012{
3013 dberr_t error;
3014
3015 ut_a(row->state == FTS_MODIFY);
3016
3017 error = fts_delete(ftt, row);
3018
3019 if (error == DB_SUCCESS) {
3020 fts_add(ftt, row);
3021 }
3022
3023 return(error);
3024}
3025
3026/*********************************************************************//**
3027The given transaction is about to be committed; do whatever is necessary
3028from the FTS system's POV.
3029@return DB_SUCCESS or error code */
3030static MY_ATTRIBUTE((nonnull, warn_unused_result))
3031dberr_t
3032fts_commit_table(
3033/*=============*/
3034 fts_trx_table_t* ftt) /*!< in: FTS table to commit*/
3035{
3036 if (srv_read_only_mode) {
3037 return DB_READ_ONLY;
3038 }
3039
3040 const ib_rbt_node_t* node;
3041 ib_rbt_t* rows;
3042 dberr_t error = DB_SUCCESS;
3043 fts_cache_t* cache = ftt->table->fts->cache;
3044 trx_t* trx = trx_create();
3045
3046 trx_start_internal(trx);
3047
3048 rows = ftt->rows;
3049
3050 ftt->fts_trx->trx = trx;
3051
3052 if (cache->get_docs == NULL) {
3053 rw_lock_x_lock(&cache->init_lock);
3054 if (cache->get_docs == NULL) {
3055 cache->get_docs = fts_get_docs_create(cache);
3056 }
3057 rw_lock_x_unlock(&cache->init_lock);
3058 }
3059
3060 for (node = rbt_first(rows);
3061 node != NULL && error == DB_SUCCESS;
3062 node = rbt_next(rows, node)) {
3063
3064 fts_trx_row_t* row = rbt_value(fts_trx_row_t, node);
3065
3066 switch (row->state) {
3067 case FTS_INSERT:
3068 fts_add(ftt, row);
3069 break;
3070
3071 case FTS_MODIFY:
3072 error = fts_modify(ftt, row);
3073 break;
3074
3075 case FTS_DELETE:
3076 error = fts_delete(ftt, row);
3077 break;
3078
3079 default:
3080 ut_error;
3081 }
3082 }
3083
3084 fts_sql_commit(trx);
3085
3086 trx_free(trx);
3087
3088 return(error);
3089}
3090
3091/*********************************************************************//**
3092The given transaction is about to be committed; do whatever is necessary
3093from the FTS system's POV.
3094@return DB_SUCCESS or error code */
3095dberr_t
3096fts_commit(
3097/*=======*/
3098 trx_t* trx) /*!< in: transaction */
3099{
3100 const ib_rbt_node_t* node;
3101 dberr_t error;
3102 ib_rbt_t* tables;
3103 fts_savepoint_t* savepoint;
3104
3105 savepoint = static_cast<fts_savepoint_t*>(
3106 ib_vector_last(trx->fts_trx->savepoints));
3107 tables = savepoint->tables;
3108
3109 for (node = rbt_first(tables), error = DB_SUCCESS;
3110 node != NULL && error == DB_SUCCESS;
3111 node = rbt_next(tables, node)) {
3112
3113 fts_trx_table_t** ftt;
3114
3115 ftt = rbt_value(fts_trx_table_t*, node);
3116
3117 error = fts_commit_table(*ftt);
3118 }
3119
3120 return(error);
3121}
3122
3123/*********************************************************************//**
3124Initialize a document. */
3125void
3126fts_doc_init(
3127/*=========*/
3128 fts_doc_t* doc) /*!< in: doc to initialize */
3129{
3130 mem_heap_t* heap = mem_heap_create(32);
3131
3132 memset(doc, 0, sizeof(*doc));
3133
3134 doc->self_heap = ib_heap_allocator_create(heap);
3135}
3136
3137/*********************************************************************//**
3138Free document. */
3139void
3140fts_doc_free(
3141/*=========*/
3142 fts_doc_t* doc) /*!< in: document */
3143{
3144 mem_heap_t* heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3145
3146 if (doc->tokens) {
3147 rbt_free(doc->tokens);
3148 }
3149
3150 ut_d(memset(doc, 0, sizeof(*doc)));
3151
3152 mem_heap_free(heap);
3153}
3154
3155/*********************************************************************//**
3156Callback function for fetch that stores the text of an FTS document,
3157converting each column to UTF-16.
3158@return always FALSE */
3159ibool
3160fts_query_expansion_fetch_doc(
3161/*==========================*/
3162 void* row, /*!< in: sel_node_t* */
3163 void* user_arg) /*!< in: fts_doc_t* */
3164{
3165 que_node_t* exp;
3166 sel_node_t* node = static_cast<sel_node_t*>(row);
3167 fts_doc_t* result_doc = static_cast<fts_doc_t*>(user_arg);
3168 dfield_t* dfield;
3169 ulint len;
3170 ulint doc_len;
3171 fts_doc_t doc;
3172 CHARSET_INFO* doc_charset = NULL;
3173 ulint field_no = 0;
3174
3175 len = 0;
3176
3177 fts_doc_init(&doc);
3178 doc.found = TRUE;
3179
3180 exp = node->select_list;
3181 doc_len = 0;
3182
3183 doc_charset = result_doc->charset;
3184
3185 /* Copy each indexed column content into doc->text.f_str */
3186 while (exp) {
3187 dfield = que_node_get_val(exp);
3188 len = dfield_get_len(dfield);
3189
3190 /* NULL column */
3191 if (len == UNIV_SQL_NULL) {
3192 exp = que_node_get_next(exp);
3193 continue;
3194 }
3195
3196 if (!doc_charset) {
3197 doc_charset = fts_get_charset(dfield->type.prtype);
3198 }
3199
3200 doc.charset = doc_charset;
3201
3202 if (dfield_is_ext(dfield)) {
3203 /* We ignore columns that are stored externally, this
3204 could result in too many words to search */
3205 exp = que_node_get_next(exp);
3206 continue;
3207 } else {
3208 doc.text.f_n_char = 0;
3209
3210 doc.text.f_str = static_cast<byte*>(
3211 dfield_get_data(dfield));
3212
3213 doc.text.f_len = len;
3214 }
3215
3216 if (field_no == 0) {
3217 fts_tokenize_document(&doc, result_doc,
3218 result_doc->parser);
3219 } else {
3220 fts_tokenize_document_next(&doc, doc_len, result_doc,
3221 result_doc->parser);
3222 }
3223
3224 exp = que_node_get_next(exp);
3225
3226 doc_len += (exp) ? len + 1 : len;
3227
3228 field_no++;
3229 }
3230
3231 ut_ad(doc_charset);
3232
3233 if (!result_doc->charset) {
3234 result_doc->charset = doc_charset;
3235 }
3236
3237 fts_doc_free(&doc);
3238
3239 return(FALSE);
3240}
3241
3242/*********************************************************************//**
3243fetch and tokenize the document. */
3244static
3245void
3246fts_fetch_doc_from_rec(
3247/*===================*/
3248 fts_get_doc_t* get_doc, /*!< in: FTS index's get_doc struct */
3249 dict_index_t* clust_index, /*!< in: cluster index */
3250 btr_pcur_t* pcur, /*!< in: cursor whose position
3251 has been stored */
3252 ulint* offsets, /*!< in: offsets */
3253 fts_doc_t* doc) /*!< out: fts doc to hold parsed
3254 documents */
3255{
3256 dict_index_t* index;
3257 dict_table_t* table;
3258 const rec_t* clust_rec;
3259 ulint num_field;
3260 const dict_field_t* ifield;
3261 const dict_col_t* col;
3262 ulint clust_pos;
3263 ulint i;
3264 ulint doc_len = 0;
3265 ulint processed_doc = 0;
3266 st_mysql_ftparser* parser;
3267
3268 if (!get_doc) {
3269 return;
3270 }
3271
3272 index = get_doc->index_cache->index;
3273 table = get_doc->index_cache->index->table;
3274 parser = get_doc->index_cache->index->parser;
3275
3276 clust_rec = btr_pcur_get_rec(pcur);
3277 ut_ad(!page_rec_is_comp(clust_rec)
3278 || rec_get_status(clust_rec) == REC_STATUS_ORDINARY);
3279
3280 num_field = dict_index_get_n_fields(index);
3281
3282 for (i = 0; i < num_field; i++) {
3283 ifield = dict_index_get_nth_field(index, i);
3284 col = dict_field_get_col(ifield);
3285 clust_pos = dict_col_get_clust_pos(col, clust_index);
3286
3287 if (!get_doc->index_cache->charset) {
3288 get_doc->index_cache->charset = fts_get_charset(
3289 ifield->col->prtype);
3290 }
3291
3292 if (rec_offs_nth_extern(offsets, clust_pos)) {
3293 doc->text.f_str =
3294 btr_rec_copy_externally_stored_field(
3295 clust_rec, offsets,
3296 dict_table_page_size(table),
3297 clust_pos, &doc->text.f_len,
3298 static_cast<mem_heap_t*>(
3299 doc->self_heap->arg));
3300 } else {
3301 doc->text.f_str = (byte*) rec_get_nth_field(
3302 clust_rec, offsets, clust_pos,
3303 &doc->text.f_len);
3304 }
3305
3306 doc->found = TRUE;
3307 doc->charset = get_doc->index_cache->charset;
3308
3309 /* Null Field */
3310 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3311 continue;
3312 }
3313
3314 if (processed_doc == 0) {
3315 fts_tokenize_document(doc, NULL, parser);
3316 } else {
3317 fts_tokenize_document_next(doc, doc_len, NULL, parser);
3318 }
3319
3320 processed_doc++;
3321 doc_len += doc->text.f_len + 1;
3322 }
3323}
3324
3325/** Fetch the data from tuple and tokenize the document.
3326@param[in] get_doc FTS index's get_doc struct
3327@param[in] tuple tuple should be arranged in table schema order
3328@param[out] doc fts doc to hold parsed documents. */
3329static
3330void
3331fts_fetch_doc_from_tuple(
3332 fts_get_doc_t* get_doc,
3333 const dtuple_t* tuple,
3334 fts_doc_t* doc)
3335{
3336 dict_index_t* index;
3337 st_mysql_ftparser* parser;
3338 ulint doc_len = 0;
3339 ulint processed_doc = 0;
3340 ulint num_field;
3341
3342 if (get_doc == NULL) {
3343 return;
3344 }
3345
3346 index = get_doc->index_cache->index;
3347 parser = get_doc->index_cache->index->parser;
3348 num_field = dict_index_get_n_fields(index);
3349
3350 for (ulint i = 0; i < num_field; i++) {
3351 const dict_field_t* ifield;
3352 const dict_col_t* col;
3353 ulint pos;
3354 dfield_t* field;
3355
3356 ifield = dict_index_get_nth_field(index, i);
3357 col = dict_field_get_col(ifield);
3358 pos = dict_col_get_no(col);
3359 field = dtuple_get_nth_field(tuple, pos);
3360
3361 if (!get_doc->index_cache->charset) {
3362 get_doc->index_cache->charset = fts_get_charset(
3363 ifield->col->prtype);
3364 }
3365
3366 ut_ad(!dfield_is_ext(field));
3367
3368 doc->text.f_str = (byte*) dfield_get_data(field);
3369 doc->text.f_len = dfield_get_len(field);
3370 doc->found = TRUE;
3371 doc->charset = get_doc->index_cache->charset;
3372
3373 /* field data is NULL. */
3374 if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3375 continue;
3376 }
3377
3378 if (processed_doc == 0) {
3379 fts_tokenize_document(doc, NULL, parser);
3380 } else {
3381 fts_tokenize_document_next(doc, doc_len, NULL, parser);
3382 }
3383
3384 processed_doc++;
3385 doc_len += doc->text.f_len + 1;
3386 }
3387}
3388
3389/** Fetch the document from tuple, tokenize the text data and
3390insert the text data into fts auxiliary table and
3391its cache. Moreover this tuple fields doesn't contain any information
3392about externally stored field. This tuple contains data directly
3393converted from mysql.
3394@param[in] ftt FTS transaction table
3395@param[in] doc_id doc id
3396@param[in] tuple tuple from where data can be retrieved
3397 and tuple should be arranged in table
3398 schema order. */
3399void
3400fts_add_doc_from_tuple(
3401 fts_trx_table_t*ftt,
3402 doc_id_t doc_id,
3403 const dtuple_t* tuple)
3404{
3405 mtr_t mtr;
3406 fts_cache_t* cache = ftt->table->fts->cache;
3407
3408 ut_ad(cache->get_docs);
3409
3410 if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3411 fts_init_index(ftt->table, FALSE);
3412 }
3413
3414 mtr_start(&mtr);
3415
3416 ulint num_idx = ib_vector_size(cache->get_docs);
3417
3418 for (ulint i = 0; i < num_idx; ++i) {
3419 fts_doc_t doc;
3420 dict_table_t* table;
3421 fts_get_doc_t* get_doc;
3422
3423 get_doc = static_cast<fts_get_doc_t*>(
3424 ib_vector_get(cache->get_docs, i));
3425 table = get_doc->index_cache->index->table;
3426
3427 fts_doc_init(&doc);
3428 fts_fetch_doc_from_tuple(
3429 get_doc, tuple, &doc);
3430
3431 if (doc.found) {
3432 mtr_commit(&mtr);
3433 rw_lock_x_lock(&table->fts->cache->lock);
3434
3435 if (table->fts->cache->stopword_info.status
3436 & STOPWORD_NOT_INIT) {
3437 fts_load_stopword(table, NULL, NULL,
3438 NULL, TRUE, TRUE);
3439 }
3440
3441 fts_cache_add_doc(
3442 table->fts->cache,
3443 get_doc->index_cache,
3444 doc_id, doc.tokens);
3445
3446 rw_lock_x_unlock(&table->fts->cache->lock);
3447
3448 if (cache->total_size > fts_max_cache_size / 5
3449 || fts_need_sync) {
3450 fts_sync(cache->sync, true, false, false);
3451 }
3452
3453 mtr_start(&mtr);
3454
3455 }
3456
3457 fts_doc_free(&doc);
3458 }
3459
3460 mtr_commit(&mtr);
3461}
3462
3463/*********************************************************************//**
3464This function fetches the document inserted during the committing
3465transaction, and tokenize the inserted text data and insert into
3466FTS auxiliary table and its cache.
3467@return TRUE if successful */
3468static
3469ulint
3470fts_add_doc_by_id(
3471/*==============*/
3472 fts_trx_table_t*ftt, /*!< in: FTS trx table */
3473 doc_id_t doc_id, /*!< in: doc id */
3474 ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)))
3475 /*!< in: affected fts indexes */
3476{
3477 mtr_t mtr;
3478 mem_heap_t* heap;
3479 btr_pcur_t pcur;
3480 dict_table_t* table;
3481 dtuple_t* tuple;
3482 dfield_t* dfield;
3483 fts_get_doc_t* get_doc;
3484 doc_id_t temp_doc_id;
3485 dict_index_t* clust_index;
3486 dict_index_t* fts_id_index;
3487 ibool is_id_cluster;
3488 fts_cache_t* cache = ftt->table->fts->cache;
3489
3490 ut_ad(cache->get_docs);
3491
3492 /* If Doc ID has been supplied by the user, then the table
3493 might not yet be sync-ed */
3494
3495 if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3496 fts_init_index(ftt->table, FALSE);
3497 }
3498
3499 /* Get the first FTS index's get_doc */
3500 get_doc = static_cast<fts_get_doc_t*>(
3501 ib_vector_get(cache->get_docs, 0));
3502 ut_ad(get_doc);
3503
3504 table = get_doc->index_cache->index->table;
3505
3506 heap = mem_heap_create(512);
3507
3508 clust_index = dict_table_get_first_index(table);
3509 fts_id_index = table->fts_doc_id_index;
3510
3511 /* Check whether the index on FTS_DOC_ID is cluster index */
3512 is_id_cluster = (clust_index == fts_id_index);
3513
3514 mtr_start(&mtr);
3515 btr_pcur_init(&pcur);
3516
3517 /* Search based on Doc ID. Here, we'll need to consider the case
3518 when there is no primary index on Doc ID */
3519 tuple = dtuple_create(heap, 1);
3520 dfield = dtuple_get_nth_field(tuple, 0);
3521 dfield->type.mtype = DATA_INT;
3522 dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3523
3524 mach_write_to_8((byte*) &temp_doc_id, doc_id);
3525 dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3526
3527 btr_pcur_open_with_no_init(
3528 fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3529 &pcur, 0, &mtr);
3530
3531 /* If we have a match, add the data to doc structure */
3532 if (btr_pcur_get_low_match(&pcur) == 1) {
3533 const rec_t* rec;
3534 btr_pcur_t* doc_pcur;
3535 const rec_t* clust_rec;
3536 btr_pcur_t clust_pcur;
3537 ulint* offsets = NULL;
3538 ulint num_idx = ib_vector_size(cache->get_docs);
3539
3540 rec = btr_pcur_get_rec(&pcur);
3541
3542 /* Doc could be deleted */
3543 if (page_rec_is_infimum(rec)
3544 || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3545
3546 goto func_exit;
3547 }
3548
3549 if (is_id_cluster) {
3550 clust_rec = rec;
3551 doc_pcur = &pcur;
3552 } else {
3553 dtuple_t* clust_ref;
3554 ulint n_fields;
3555
3556 btr_pcur_init(&clust_pcur);
3557 n_fields = dict_index_get_n_unique(clust_index);
3558
3559 clust_ref = dtuple_create(heap, n_fields);
3560 dict_index_copy_types(clust_ref, clust_index, n_fields);
3561
3562 row_build_row_ref_in_tuple(
3563 clust_ref, rec, fts_id_index, NULL);
3564
3565 btr_pcur_open_with_no_init(
3566 clust_index, clust_ref, PAGE_CUR_LE,
3567 BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3568
3569 doc_pcur = &clust_pcur;
3570 clust_rec = btr_pcur_get_rec(&clust_pcur);
3571
3572 }
3573
3574 offsets = rec_get_offsets(clust_rec, clust_index, NULL, true,
3575 ULINT_UNDEFINED, &heap);
3576
3577 for (ulint i = 0; i < num_idx; ++i) {
3578 fts_doc_t doc;
3579 dict_table_t* table;
3580 fts_get_doc_t* get_doc;
3581
3582 get_doc = static_cast<fts_get_doc_t*>(
3583 ib_vector_get(cache->get_docs, i));
3584
3585 table = get_doc->index_cache->index->table;
3586
3587 fts_doc_init(&doc);
3588
3589 fts_fetch_doc_from_rec(
3590 get_doc, clust_index, doc_pcur, offsets, &doc);
3591
3592 if (doc.found) {
3593 ibool success MY_ATTRIBUTE((unused));
3594
3595 btr_pcur_store_position(doc_pcur, &mtr);
3596 mtr_commit(&mtr);
3597
3598 rw_lock_x_lock(&table->fts->cache->lock);
3599
3600 if (table->fts->cache->stopword_info.status
3601 & STOPWORD_NOT_INIT) {
3602 fts_load_stopword(table, NULL, NULL,
3603 NULL, TRUE, TRUE);
3604 }
3605
3606 fts_cache_add_doc(
3607 table->fts->cache,
3608 get_doc->index_cache,
3609 doc_id, doc.tokens);
3610
3611 bool need_sync = false;
3612 if ((cache->total_size > fts_max_cache_size / 10
3613 || fts_need_sync)
3614 && !cache->sync->in_progress) {
3615 need_sync = true;
3616 }
3617
3618 rw_lock_x_unlock(&table->fts->cache->lock);
3619
3620 DBUG_EXECUTE_IF(
3621 "fts_instrument_sync",
3622 fts_optimize_request_sync_table(table);
3623 os_event_wait(cache->sync->event);
3624 );
3625
3626 DBUG_EXECUTE_IF(
3627 "fts_instrument_sync_debug",
3628 fts_sync(cache->sync, true, true, false);
3629 );
3630
3631 DEBUG_SYNC_C("fts_instrument_sync_request");
3632 DBUG_EXECUTE_IF(
3633 "fts_instrument_sync_request",
3634 fts_optimize_request_sync_table(table);
3635 );
3636
3637 if (need_sync) {
3638 fts_optimize_request_sync_table(table);
3639 }
3640
3641 mtr_start(&mtr);
3642
3643 if (i < num_idx - 1) {
3644
3645 success = btr_pcur_restore_position(
3646 BTR_SEARCH_LEAF, doc_pcur,
3647 &mtr);
3648
3649 ut_ad(success);
3650 }
3651 }
3652
3653 fts_doc_free(&doc);
3654 }
3655
3656 if (!is_id_cluster) {
3657 btr_pcur_close(doc_pcur);
3658 }
3659 }
3660func_exit:
3661 mtr_commit(&mtr);
3662
3663 btr_pcur_close(&pcur);
3664
3665 mem_heap_free(heap);
3666 return(TRUE);
3667}
3668
3669
3670/*********************************************************************//**
3671Callback function to read a single ulint column.
3672return always returns TRUE */
3673static
3674ibool
3675fts_read_ulint(
3676/*===========*/
3677 void* row, /*!< in: sel_node_t* */
3678 void* user_arg) /*!< in: pointer to ulint */
3679{
3680 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
3681 ulint* value = static_cast<ulint*>(user_arg);
3682 que_node_t* exp = sel_node->select_list;
3683 dfield_t* dfield = que_node_get_val(exp);
3684 void* data = dfield_get_data(dfield);
3685
3686 *value = static_cast<ulint>(mach_read_from_4(
3687 static_cast<const byte*>(data)));
3688
3689 return(TRUE);
3690}
3691
3692/*********************************************************************//**
3693Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3694@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3695doc_id_t
3696fts_get_max_doc_id(
3697/*===============*/
3698 dict_table_t* table) /*!< in: user table */
3699{
3700 dict_index_t* index;
3701 dict_field_t* dfield MY_ATTRIBUTE((unused)) = NULL;
3702 doc_id_t doc_id = 0;
3703 mtr_t mtr;
3704 btr_pcur_t pcur;
3705
3706 index = table->fts_doc_id_index;
3707
3708 if (!index) {
3709 return(0);
3710 }
3711
3712 ut_ad(!index->is_instant());
3713
3714 dfield = dict_index_get_nth_field(index, 0);
3715
3716#if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3717 ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3718#endif
3719
3720 mtr_start(&mtr);
3721
3722 /* fetch the largest indexes value */
3723 btr_pcur_open_at_index_side(
3724 false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3725
3726 if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3727 const rec_t* rec = NULL;
3728 ulint offsets_[REC_OFFS_NORMAL_SIZE];
3729 ulint* offsets = offsets_;
3730 mem_heap_t* heap = NULL;
3731 ulint len;
3732 const void* data;
3733
3734 rec_offs_init(offsets_);
3735
3736 do {
3737 rec = btr_pcur_get_rec(&pcur);
3738
3739 if (page_rec_is_user_rec(rec)) {
3740 break;
3741 }
3742 } while (btr_pcur_move_to_prev(&pcur, &mtr));
3743
3744 if (!rec) {
3745 goto func_exit;
3746 }
3747
3748 ut_ad(!rec_is_default_row(rec, index));
3749 offsets = rec_get_offsets(
3750 rec, index, offsets, true, ULINT_UNDEFINED, &heap);
3751
3752 data = rec_get_nth_field(rec, offsets, 0, &len);
3753
3754 doc_id = static_cast<doc_id_t>(fts_read_doc_id(
3755 static_cast<const byte*>(data)));
3756 }
3757
3758func_exit:
3759 btr_pcur_close(&pcur);
3760 mtr_commit(&mtr);
3761 return(doc_id);
3762}
3763
3764/*********************************************************************//**
3765Fetch document with the given document id.
3766@return DB_SUCCESS if OK else error */
3767dberr_t
3768fts_doc_fetch_by_doc_id(
3769/*====================*/
3770 fts_get_doc_t* get_doc, /*!< in: state */
3771 doc_id_t doc_id, /*!< in: id of document to
3772 fetch */
3773 dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
3774 or NULL */
3775 ulint option, /*!< in: search option, if it is
3776 greater than doc_id or equal */
3777 fts_sql_callback
3778 callback, /*!< in: callback to read */
3779 void* arg) /*!< in: callback arg */
3780{
3781 pars_info_t* info;
3782 dberr_t error;
3783 const char* select_str;
3784 doc_id_t write_doc_id;
3785 dict_index_t* index;
3786 trx_t* trx = trx_create();
3787 que_t* graph;
3788
3789 trx->op_info = "fetching indexed FTS document";
3790
3791 /* The FTS index can be supplied by caller directly with
3792 "index_to_use", otherwise, get it from "get_doc" */
3793 index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3794
3795 if (get_doc && get_doc->get_document_graph) {
3796 info = get_doc->get_document_graph->info;
3797 } else {
3798 info = pars_info_create();
3799 }
3800
3801 /* Convert to "storage" byte order. */
3802 fts_write_doc_id((byte*) &write_doc_id, doc_id);
3803 fts_bind_doc_id(info, "doc_id", &write_doc_id);
3804 pars_info_bind_function(info, "my_func", callback, arg);
3805
3806 select_str = fts_get_select_columns_str(index, info, info->heap);
3807 pars_info_bind_id(info, TRUE, "table_name", index->table->name.m_name);
3808
3809 if (!get_doc || !get_doc->get_document_graph) {
3810 if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3811 graph = fts_parse_sql(
3812 NULL,
3813 info,
3814 mem_heap_printf(info->heap,
3815 "DECLARE FUNCTION my_func;\n"
3816 "DECLARE CURSOR c IS"
3817 " SELECT %s FROM $table_name"
3818 " WHERE %s = :doc_id;\n"
3819 "BEGIN\n"
3820 ""
3821 "OPEN c;\n"
3822 "WHILE 1 = 1 LOOP\n"
3823 " FETCH c INTO my_func();\n"
3824 " IF c %% NOTFOUND THEN\n"
3825 " EXIT;\n"
3826 " END IF;\n"
3827 "END LOOP;\n"
3828 "CLOSE c;",
3829 select_str, FTS_DOC_ID_COL_NAME));
3830 } else {
3831 ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3832
3833 /* This is used for crash recovery of table with
3834 hidden DOC ID or FTS indexes. We will scan the table
3835 to re-processing user table rows whose DOC ID or
3836 FTS indexed documents have not been sync-ed to disc
3837 during recent crash.
3838 In the case that all fulltext indexes are dropped
3839 for a table, we will keep the "hidden" FTS_DOC_ID
3840 column, and this scan is to retreive the largest
3841 DOC ID being used in the table to determine the
3842 appropriate next DOC ID.
3843 In the case of there exists fulltext index(es), this
3844 operation will re-tokenize any docs that have not
3845 been sync-ed to the disk, and re-prime the FTS
3846 cached */
3847 graph = fts_parse_sql(
3848 NULL,
3849 info,
3850 mem_heap_printf(info->heap,
3851 "DECLARE FUNCTION my_func;\n"
3852 "DECLARE CURSOR c IS"
3853 " SELECT %s, %s FROM $table_name"
3854 " WHERE %s > :doc_id;\n"
3855 "BEGIN\n"
3856 ""
3857 "OPEN c;\n"
3858 "WHILE 1 = 1 LOOP\n"
3859 " FETCH c INTO my_func();\n"
3860 " IF c %% NOTFOUND THEN\n"
3861 " EXIT;\n"
3862 " END IF;\n"
3863 "END LOOP;\n"
3864 "CLOSE c;",
3865 FTS_DOC_ID_COL_NAME,
3866 select_str, FTS_DOC_ID_COL_NAME));
3867 }
3868 if (get_doc) {
3869 get_doc->get_document_graph = graph;
3870 }
3871 } else {
3872 graph = get_doc->get_document_graph;
3873 }
3874
3875 error = fts_eval_sql(trx, graph);
3876 fts_sql_commit(trx);
3877 trx_free(trx);
3878
3879 if (!get_doc) {
3880 fts_que_graph_free(graph);
3881 }
3882
3883 return(error);
3884}
3885
3886/*********************************************************************//**
3887Write out a single word's data as new entry/entries in the INDEX table.
3888@return DB_SUCCESS if all OK. */
3889dberr_t
3890fts_write_node(
3891/*===========*/
3892 trx_t* trx, /*!< in: transaction */
3893 que_t** graph, /*!< in: query graph */
3894 fts_table_t* fts_table, /*!< in: aux table */
3895 fts_string_t* word, /*!< in: word in UTF-8 */
3896 fts_node_t* node) /*!< in: node columns */
3897{
3898 pars_info_t* info;
3899 dberr_t error;
3900 ib_uint32_t doc_count;
3901 ib_time_t start_time;
3902 doc_id_t last_doc_id;
3903 doc_id_t first_doc_id;
3904 char table_name[MAX_FULL_NAME_LEN];
3905
3906 ut_a(node->ilist != NULL);
3907
3908 if (*graph) {
3909 info = (*graph)->info;
3910 } else {
3911 info = pars_info_create();
3912
3913 fts_get_table_name(fts_table, table_name);
3914 pars_info_bind_id(info, true, "index_table_name", table_name);
3915 }
3916
3917 pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3918
3919 /* Convert to "storage" byte order. */
3920 fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3921 fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3922
3923 /* Convert to "storage" byte order. */
3924 fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3925 fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3926
3927 ut_a(node->last_doc_id >= node->first_doc_id);
3928
3929 /* Convert to "storage" byte order. */
3930 mach_write_to_4((byte*) &doc_count, node->doc_count);
3931 pars_info_bind_int4_literal(
3932 info, "doc_count", (const ib_uint32_t*) &doc_count);
3933
3934 /* Set copy_name to FALSE since it's a static. */
3935 pars_info_bind_literal(
3936 info, "ilist", node->ilist, node->ilist_size,
3937 DATA_BLOB, DATA_BINARY_TYPE);
3938
3939 if (!*graph) {
3940
3941 *graph = fts_parse_sql(
3942 fts_table,
3943 info,
3944 "BEGIN\n"
3945 "INSERT INTO $index_table_name VALUES"
3946 " (:token, :first_doc_id,"
3947 " :last_doc_id, :doc_count, :ilist);");
3948 }
3949
3950 start_time = ut_time();
3951 error = fts_eval_sql(trx, *graph);
3952 elapsed_time += ut_time() - start_time;
3953 ++n_nodes;
3954
3955 return(error);
3956}
3957
3958/*********************************************************************//**
3959Add rows to the DELETED_CACHE table.
3960@return DB_SUCCESS if all went well else error code*/
3961static MY_ATTRIBUTE((nonnull, warn_unused_result))
3962dberr_t
3963fts_sync_add_deleted_cache(
3964/*=======================*/
3965 fts_sync_t* sync, /*!< in: sync state */
3966 ib_vector_t* doc_ids) /*!< in: doc ids to add */
3967{
3968 ulint i;
3969 pars_info_t* info;
3970 que_t* graph;
3971 fts_table_t fts_table;
3972 char table_name[MAX_FULL_NAME_LEN];
3973 doc_id_t dummy = 0;
3974 dberr_t error = DB_SUCCESS;
3975 ulint n_elems = ib_vector_size(doc_ids);
3976
3977 ut_a(ib_vector_size(doc_ids) > 0);
3978
3979 ib_vector_sort(doc_ids, fts_update_doc_id_cmp);
3980
3981 info = pars_info_create();
3982
3983 fts_bind_doc_id(info, "doc_id", &dummy);
3984
3985 FTS_INIT_FTS_TABLE(
3986 &fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
3987
3988 fts_get_table_name(&fts_table, table_name);
3989 pars_info_bind_id(info, true, "table_name", table_name);
3990
3991 graph = fts_parse_sql(
3992 &fts_table,
3993 info,
3994 "BEGIN INSERT INTO $table_name VALUES (:doc_id);");
3995
3996 for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
3997 fts_update_t* update;
3998 doc_id_t write_doc_id;
3999
4000 update = static_cast<fts_update_t*>(ib_vector_get(doc_ids, i));
4001
4002 /* Convert to "storage" byte order. */
4003 fts_write_doc_id((byte*) &write_doc_id, update->doc_id);
4004 fts_bind_doc_id(info, "doc_id", &write_doc_id);
4005
4006 error = fts_eval_sql(sync->trx, graph);
4007 }
4008
4009 fts_que_graph_free(graph);
4010
4011 return(error);
4012}
4013
4014/** Write the words and ilist to disk.
4015@param[in,out] trx transaction
4016@param[in] index_cache index cache
4017@param[in] unlock_cache whether unlock cache when write node
4018@return DB_SUCCESS if all went well else error code */
4019static MY_ATTRIBUTE((nonnull, warn_unused_result))
4020dberr_t
4021fts_sync_write_words(
4022 trx_t* trx,
4023 fts_index_cache_t* index_cache,
4024 bool unlock_cache)
4025{
4026 fts_table_t fts_table;
4027 ulint n_nodes = 0;
4028 ulint n_words = 0;
4029 const ib_rbt_node_t* rbt_node;
4030 dberr_t error = DB_SUCCESS;
4031 ibool print_error = FALSE;
4032 dict_table_t* table = index_cache->index->table;
4033
4034 FTS_INIT_INDEX_TABLE(
4035 &fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
4036
4037 n_words = rbt_size(index_cache->words);
4038
4039 /* We iterate over the entire tree, even if there is an error,
4040 since we want to free the memory used during caching. */
4041 for (rbt_node = rbt_first(index_cache->words);
4042 rbt_node;
4043 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4044
4045 ulint i;
4046 ulint selected;
4047 fts_tokenizer_word_t* word;
4048
4049 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4050
4051 selected = fts_select_index(
4052 index_cache->charset, word->text.f_str,
4053 word->text.f_len);
4054
4055 fts_table.suffix = fts_get_suffix(selected);
4056
4057 /* We iterate over all the nodes even if there was an error */
4058 for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4059
4060 fts_node_t* fts_node = static_cast<fts_node_t*>(
4061 ib_vector_get(word->nodes, i));
4062
4063 if (fts_node->synced) {
4064 continue;
4065 } else {
4066 fts_node->synced = true;
4067 }
4068
4069 /*FIXME: we need to handle the error properly. */
4070 if (error == DB_SUCCESS) {
4071 if (unlock_cache) {
4072 rw_lock_x_unlock(
4073 &table->fts->cache->lock);
4074 }
4075
4076 error = fts_write_node(
4077 trx,
4078 &index_cache->ins_graph[selected],
4079 &fts_table, &word->text, fts_node);
4080
4081 DEBUG_SYNC_C("fts_write_node");
4082 DBUG_EXECUTE_IF("fts_write_node_crash",
4083 DBUG_SUICIDE(););
4084
4085 DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4086 os_thread_sleep(1000000);
4087 );
4088
4089 if (unlock_cache) {
4090 rw_lock_x_lock(
4091 &table->fts->cache->lock);
4092 }
4093 }
4094 }
4095
4096 n_nodes += ib_vector_size(word->nodes);
4097
4098 if (error != DB_SUCCESS && !print_error) {
4099 ib::error() << "(" << ut_strerr(error) << ") writing"
4100 " word node to FTS auxiliary index table.";
4101 print_error = TRUE;
4102 }
4103 }
4104
4105 if (fts_enable_diag_print) {
4106 printf("Avg number of nodes: %lf\n",
4107 (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4108 }
4109
4110 return(error);
4111}
4112
4113/*********************************************************************//**
4114Begin Sync, create transaction, acquire locks, etc. */
4115static
4116void
4117fts_sync_begin(
4118/*===========*/
4119 fts_sync_t* sync) /*!< in: sync state */
4120{
4121 fts_cache_t* cache = sync->table->fts->cache;
4122
4123 n_nodes = 0;
4124 elapsed_time = 0;
4125
4126 sync->start_time = ut_time();
4127
4128 sync->trx = trx_create();
4129 trx_start_internal(sync->trx);
4130
4131 if (fts_enable_diag_print) {
4132 ib::info() << "FTS SYNC for table " << sync->table->name
4133 << ", deleted count: "
4134 << ib_vector_size(cache->deleted_doc_ids)
4135 << " size: " << cache->total_size << " bytes";
4136 }
4137}
4138
4139/*********************************************************************//**
4140Run SYNC on the table, i.e., write out data from the index specific
4141cache to the FTS aux INDEX table and FTS aux doc id stats table.
4142@return DB_SUCCESS if all OK */
4143static MY_ATTRIBUTE((nonnull, warn_unused_result))
4144dberr_t
4145fts_sync_index(
4146/*===========*/
4147 fts_sync_t* sync, /*!< in: sync state */
4148 fts_index_cache_t* index_cache) /*!< in: index cache */
4149{
4150 trx_t* trx = sync->trx;
4151
4152 trx->op_info = "doing SYNC index";
4153
4154 if (fts_enable_diag_print) {
4155 ib::info() << "SYNC words: " << rbt_size(index_cache->words);
4156 }
4157
4158 ut_ad(rbt_validate(index_cache->words));
4159
4160 return(fts_sync_write_words(trx, index_cache, sync->unlock_cache));
4161}
4162
4163/** Check if index cache has been synced completely
4164@param[in,out] index_cache index cache
4165@return true if index is synced, otherwise false. */
4166static
4167bool
4168fts_sync_index_check(
4169 fts_index_cache_t* index_cache)
4170{
4171 const ib_rbt_node_t* rbt_node;
4172
4173 for (rbt_node = rbt_first(index_cache->words);
4174 rbt_node != NULL;
4175 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4176
4177 fts_tokenizer_word_t* word;
4178 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4179
4180 fts_node_t* fts_node;
4181 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4182
4183 if (!fts_node->synced) {
4184 return(false);
4185 }
4186 }
4187
4188 return(true);
4189}
4190
4191/** Reset synced flag in index cache when rollback
4192@param[in,out] index_cache index cache */
4193static
4194void
4195fts_sync_index_reset(
4196 fts_index_cache_t* index_cache)
4197{
4198 const ib_rbt_node_t* rbt_node;
4199
4200 for (rbt_node = rbt_first(index_cache->words);
4201 rbt_node != NULL;
4202 rbt_node = rbt_next(index_cache->words, rbt_node)) {
4203
4204 fts_tokenizer_word_t* word;
4205 word = rbt_value(fts_tokenizer_word_t, rbt_node);
4206
4207 fts_node_t* fts_node;
4208 fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4209
4210 fts_node->synced = false;
4211 }
4212}
4213
4214/** Commit the SYNC, change state of processed doc ids etc.
4215@param[in,out] sync sync state
4216@return DB_SUCCESS if all OK */
4217static MY_ATTRIBUTE((nonnull, warn_unused_result))
4218dberr_t
4219fts_sync_commit(
4220 fts_sync_t* sync)
4221{
4222 dberr_t error;
4223 trx_t* trx = sync->trx;
4224 fts_cache_t* cache = sync->table->fts->cache;
4225 doc_id_t last_doc_id;
4226
4227 trx->op_info = "doing SYNC commit";
4228
4229 /* After each Sync, update the CONFIG table about the max doc id
4230 we just sync-ed to index table */
4231 error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4232 &last_doc_id);
4233
4234 /* Get the list of deleted documents that are either in the
4235 cache or were headed there but were deleted before the add
4236 thread got to them. */
4237
4238 if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4239
4240 error = fts_sync_add_deleted_cache(
4241 sync, cache->deleted_doc_ids);
4242 }
4243
4244 /* We need to do this within the deleted lock since fts_delete() can
4245 attempt to add a deleted doc id to the cache deleted id array. */
4246 fts_cache_clear(cache);
4247 DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4248 fts_cache_init(cache);
4249 rw_lock_x_unlock(&cache->lock);
4250
4251 if (error == DB_SUCCESS) {
4252
4253 fts_sql_commit(trx);
4254
4255 } else if (error != DB_SUCCESS) {
4256
4257 fts_sql_rollback(trx);
4258
4259 ib::error() << "(" << ut_strerr(error) << ") during SYNC.";
4260 }
4261
4262 if (fts_enable_diag_print && elapsed_time) {
4263 ib::info() << "SYNC for table " << sync->table->name
4264 << ": SYNC time: "
4265 << (ut_time() - sync->start_time)
4266 << " secs: elapsed "
4267 << (double) n_nodes / elapsed_time
4268 << " ins/sec";
4269 }
4270
4271 /* Avoid assertion in trx_free(). */
4272 trx->dict_operation_lock_mode = 0;
4273 trx_free(trx);
4274
4275 return(error);
4276}
4277
4278/** Rollback a sync operation
4279@param[in,out] sync sync state */
4280static
4281void
4282fts_sync_rollback(
4283 fts_sync_t* sync)
4284{
4285 trx_t* trx = sync->trx;
4286 fts_cache_t* cache = sync->table->fts->cache;
4287
4288 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4289 ulint j;
4290 fts_index_cache_t* index_cache;
4291
4292 index_cache = static_cast<fts_index_cache_t*>(
4293 ib_vector_get(cache->indexes, i));
4294
4295 /* Reset synced flag so nodes will not be skipped
4296 in the next sync, see fts_sync_write_words(). */
4297 fts_sync_index_reset(index_cache);
4298
4299 for (j = 0; fts_index_selector[j].value; ++j) {
4300
4301 if (index_cache->ins_graph[j] != NULL) {
4302
4303 fts_que_graph_free_check_lock(
4304 NULL, index_cache,
4305 index_cache->ins_graph[j]);
4306
4307 index_cache->ins_graph[j] = NULL;
4308 }
4309
4310 if (index_cache->sel_graph[j] != NULL) {
4311
4312 fts_que_graph_free_check_lock(
4313 NULL, index_cache,
4314 index_cache->sel_graph[j]);
4315
4316 index_cache->sel_graph[j] = NULL;
4317 }
4318 }
4319 }
4320
4321 rw_lock_x_unlock(&cache->lock);
4322
4323 fts_sql_rollback(trx);
4324
4325 /* Avoid assertion in trx_free(). */
4326 trx->dict_operation_lock_mode = 0;
4327 trx_free(trx);
4328}
4329
4330/** Run SYNC on the table, i.e., write out data from the cache to the
4331FTS auxiliary INDEX table and clear the cache at the end.
4332@param[in,out] sync sync state
4333@param[in] unlock_cache whether unlock cache lock when write node
4334@param[in] wait whether wait when a sync is in progress
4335@param[in] has_dict whether has dict operation lock
4336@return DB_SUCCESS if all OK */
4337static
4338dberr_t
4339fts_sync(
4340 fts_sync_t* sync,
4341 bool unlock_cache,
4342 bool wait,
4343 bool has_dict)
4344{
4345 if (srv_read_only_mode) {
4346 return DB_READ_ONLY;
4347 }
4348
4349 ulint i;
4350 dberr_t error = DB_SUCCESS;
4351 fts_cache_t* cache = sync->table->fts->cache;
4352
4353 rw_lock_x_lock(&cache->lock);
4354
4355 /* Check if cache is being synced.
4356 Note: we release cache lock in fts_sync_write_words() to
4357 avoid long wait for the lock by other threads. */
4358 while (sync->in_progress) {
4359 rw_lock_x_unlock(&cache->lock);
4360
4361 if (wait) {
4362 os_event_wait(sync->event);
4363 } else {
4364 return(DB_SUCCESS);
4365 }
4366
4367 rw_lock_x_lock(&cache->lock);
4368 }
4369
4370 sync->unlock_cache = unlock_cache;
4371 sync->in_progress = true;
4372
4373 DEBUG_SYNC_C("fts_sync_begin");
4374 fts_sync_begin(sync);
4375
4376 /* When sync in background, we hold dict operation lock
4377 to prevent DDL like DROP INDEX, etc. */
4378 if (has_dict) {
4379 sync->trx->dict_operation_lock_mode = RW_S_LATCH;
4380 }
4381
4382begin_sync:
4383 if (cache->total_size > fts_max_cache_size) {
4384 /* Avoid the case: sync never finish when
4385 insert/update keeps comming. */
4386 ut_ad(sync->unlock_cache);
4387 sync->unlock_cache = false;
4388 }
4389
4390 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4391 fts_index_cache_t* index_cache;
4392
4393 index_cache = static_cast<fts_index_cache_t*>(
4394 ib_vector_get(cache->indexes, i));
4395
4396 if (index_cache->index->to_be_dropped
4397 || index_cache->index->table->to_be_dropped) {
4398 continue;
4399 }
4400
4401 index_cache->index->index_fts_syncing = true;
4402 DBUG_EXECUTE_IF("fts_instrument_sync_sleep_drop_waits",
4403 os_thread_sleep(10000000);
4404 );
4405
4406 error = fts_sync_index(sync, index_cache);
4407
4408 if (error != DB_SUCCESS && !sync->interrupted) {
4409
4410 goto end_sync;
4411 }
4412 }
4413
4414 DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4415 sync->interrupted = true;
4416 error = DB_INTERRUPTED;
4417 goto end_sync;
4418 );
4419
4420 /* Make sure all the caches are synced. */
4421 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4422 fts_index_cache_t* index_cache;
4423
4424 index_cache = static_cast<fts_index_cache_t*>(
4425 ib_vector_get(cache->indexes, i));
4426
4427 if (index_cache->index->to_be_dropped
4428 || index_cache->index->table->to_be_dropped
4429 || fts_sync_index_check(index_cache)) {
4430 continue;
4431 }
4432
4433 goto begin_sync;
4434 }
4435
4436end_sync:
4437 if (error == DB_SUCCESS && !sync->interrupted) {
4438 error = fts_sync_commit(sync);
4439 } else {
4440 fts_sync_rollback(sync);
4441 }
4442
4443 rw_lock_x_lock(&cache->lock);
4444 /* Clear fts syncing flags of any indexes incase sync is
4445 interrupeted */
4446 for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4447 static_cast<fts_index_cache_t*>(
4448 ib_vector_get(cache->indexes, i))
4449 ->index->index_fts_syncing = false;
4450 }
4451
4452 sync->interrupted = false;
4453 sync->in_progress = false;
4454 os_event_set(sync->event);
4455 rw_lock_x_unlock(&cache->lock);
4456
4457 /* We need to check whether an optimize is required, for that
4458 we make copies of the two variables that control the trigger. These
4459 variables can change behind our back and we don't want to hold the
4460 lock for longer than is needed. */
4461 mutex_enter(&cache->deleted_lock);
4462
4463 cache->added = 0;
4464 cache->deleted = 0;
4465
4466 mutex_exit(&cache->deleted_lock);
4467
4468 return(error);
4469}
4470
4471/** Run SYNC on the table, i.e., write out data from the cache to the
4472FTS auxiliary INDEX table and clear the cache at the end.
4473@param[in,out] table fts table
4474@param[in] unlock_cache whether unlock cache when write node
4475@param[in] wait whether wait for existing sync to finish
4476@param[in] has_dict whether has dict operation lock
4477@return DB_SUCCESS on success, error code on failure. */
4478dberr_t
4479fts_sync_table(
4480 dict_table_t* table,
4481 bool unlock_cache,
4482 bool wait,
4483 bool has_dict)
4484{
4485 dberr_t err = DB_SUCCESS;
4486
4487 ut_ad(table->fts);
4488
4489 if (table->space && table->fts->cache
4490 && !dict_table_is_corrupted(table)) {
4491 err = fts_sync(table->fts->cache->sync,
4492 unlock_cache, wait, has_dict);
4493 }
4494
4495 return(err);
4496}
4497
4498/** Check if a fts token is a stopword or less than fts_min_token_size
4499or greater than fts_max_token_size.
4500@param[in] token token string
4501@param[in] stopwords stopwords rb tree
4502@param[in] cs token charset
4503@retval true if it is not stopword and length in range
4504@retval false if it is stopword or lenght not in range */
4505bool
4506fts_check_token(
4507 const fts_string_t* token,
4508 const ib_rbt_t* stopwords,
4509 const CHARSET_INFO* cs)
4510{
4511 ut_ad(cs != NULL || stopwords == NULL);
4512
4513 ib_rbt_bound_t parent;
4514
4515 return(token->f_n_char >= fts_min_token_size
4516 && token->f_n_char <= fts_max_token_size
4517 && (stopwords == NULL
4518 || rbt_search(stopwords, &parent, token) != 0));
4519}
4520
4521/** Add the token and its start position to the token's list of positions.
4522@param[in,out] result_doc result doc rb tree
4523@param[in] str token string
4524@param[in] position token position */
4525static
4526void
4527fts_add_token(
4528 fts_doc_t* result_doc,
4529 fts_string_t str,
4530 ulint position)
4531{
4532 /* Ignore string whose character number is less than
4533 "fts_min_token_size" or more than "fts_max_token_size" */
4534
4535 if (fts_check_token(&str, NULL, result_doc->charset)) {
4536
4537 mem_heap_t* heap;
4538 fts_string_t t_str;
4539 fts_token_t* token;
4540 ib_rbt_bound_t parent;
4541 ulint newlen;
4542
4543 heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
4544
4545 t_str.f_n_char = str.f_n_char;
4546
4547 t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
4548
4549 t_str.f_str = static_cast<byte*>(
4550 mem_heap_alloc(heap, t_str.f_len));
4551
4552 /* For binary collations, a case sensitive search is
4553 performed. Hence don't convert to lower case. */
4554 if (my_binary_compare(result_doc->charset)) {
4555 memcpy(t_str.f_str, str.f_str, str.f_len);
4556 t_str.f_str[str.f_len]= 0;
4557 newlen= str.f_len;
4558 } else {
4559 newlen = innobase_fts_casedn_str(
4560 result_doc->charset, (char*) str.f_str, str.f_len,
4561 (char*) t_str.f_str, t_str.f_len);
4562 }
4563
4564 t_str.f_len = newlen;
4565 t_str.f_str[newlen] = 0;
4566
4567 /* Add the word to the document statistics. If the word
4568 hasn't been seen before we create a new entry for it. */
4569 if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
4570 fts_token_t new_token;
4571
4572 new_token.text.f_len = newlen;
4573 new_token.text.f_str = t_str.f_str;
4574 new_token.text.f_n_char = t_str.f_n_char;
4575
4576 new_token.positions = ib_vector_create(
4577 result_doc->self_heap, sizeof(ulint), 32);
4578
4579 parent.last = rbt_add_node(
4580 result_doc->tokens, &parent, &new_token);
4581
4582 ut_ad(rbt_validate(result_doc->tokens));
4583 }
4584
4585 token = rbt_value(fts_token_t, parent.last);
4586 ib_vector_push(token->positions, &position);
4587 }
4588}
4589
4590/********************************************************************
4591Process next token from document starting at the given position, i.e., add
4592the token's start position to the token's list of positions.
4593@return number of characters handled in this call */
4594static
4595ulint
4596fts_process_token(
4597/*==============*/
4598 fts_doc_t* doc, /* in/out: document to
4599 tokenize */
4600 fts_doc_t* result, /* out: if provided, save
4601 result here */
4602 ulint start_pos, /*!< in: start position in text */
4603 ulint add_pos) /*!< in: add this position to all
4604 tokens from this tokenization */
4605{
4606 ulint ret;
4607 fts_string_t str;
4608 ulint position;
4609 fts_doc_t* result_doc;
4610 byte buf[FTS_MAX_WORD_LEN + 1];
4611
4612 str.f_str = buf;
4613
4614 /* Determine where to save the result. */
4615 result_doc = (result != NULL) ? result : doc;
4616
4617 /* The length of a string in characters is set here only. */
4618
4619 ret = innobase_mysql_fts_get_token(
4620 doc->charset, doc->text.f_str + start_pos,
4621 doc->text.f_str + doc->text.f_len, &str);
4622
4623 position = start_pos + ret - str.f_len + add_pos;
4624
4625 fts_add_token(result_doc, str, position);
4626
4627 return(ret);
4628}
4629
4630/*************************************************************//**
4631Get token char size by charset
4632@return token size */
4633ulint
4634fts_get_token_size(
4635/*===============*/
4636 const CHARSET_INFO* cs, /*!< in: Character set */
4637 const char* token, /*!< in: token */
4638 ulint len) /*!< in: token length */
4639{
4640 char* start;
4641 char* end;
4642 ulint size = 0;
4643
4644 /* const_cast is for reinterpret_cast below, or it will fail. */
4645 start = const_cast<char*>(token);
4646 end = start + len;
4647 while (start < end) {
4648 int ctype;
4649 int mbl;
4650
4651 mbl = cs->cset->ctype(
4652 cs, &ctype,
4653 reinterpret_cast<uchar*>(start),
4654 reinterpret_cast<uchar*>(end));
4655
4656 size++;
4657
4658 start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
4659 }
4660
4661 return(size);
4662}
4663
4664/*************************************************************//**
4665FTS plugin parser 'myql_parser' callback function for document tokenize.
4666Refer to 'st_mysql_ftparser_param' for more detail.
4667@return always returns 0 */
4668int
4669fts_tokenize_document_internal(
4670/*===========================*/
4671 MYSQL_FTPARSER_PARAM* param, /*!< in: parser parameter */
4672 const char* doc,/*!< in/out: document */
4673 int len) /*!< in: document length */
4674{
4675 fts_string_t str;
4676 byte buf[FTS_MAX_WORD_LEN + 1];
4677 /* JAN: TODO: MySQL 5.7
4678 MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
4679 { FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
4680 */
4681 MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
4682 { FT_TOKEN_WORD, 0, 0, 0, 0, ' ', 0};
4683
4684 ut_ad(len >= 0);
4685
4686 str.f_str = buf;
4687
4688 for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
4689 inc = innobase_mysql_fts_get_token(
4690 const_cast<CHARSET_INFO*>(param->cs),
4691 (uchar*)(doc) + i,
4692 (uchar*)(doc) + len,
4693 &str);
4694
4695 if (str.f_len > 0) {
4696 /* JAN: TODO: MySQL 5.7
4697 bool_info.position =
4698 static_cast<int>(i + inc - str.f_len);
4699 ut_ad(bool_info.position >= 0);
4700 */
4701
4702 /* Stop when add word fails */
4703 if (param->mysql_add_word(
4704 param,
4705 reinterpret_cast<char*>(str.f_str),
4706 static_cast<int>(str.f_len),
4707 &bool_info)) {
4708 break;
4709 }
4710 }
4711 }
4712
4713 return(0);
4714}
4715
4716/******************************************************************//**
4717FTS plugin parser 'myql_add_word' callback function for document tokenize.
4718Refer to 'st_mysql_ftparser_param' for more detail.
4719@return always returns 0 */
4720static
4721int
4722fts_tokenize_add_word_for_parser(
4723/*=============================*/
4724 MYSQL_FTPARSER_PARAM* param, /* in: parser paramter */
4725 const char* word, /* in: token word */
4726 int word_len, /* in: word len */
4727 MYSQL_FTPARSER_BOOLEAN_INFO*)
4728{
4729 fts_string_t str;
4730 fts_tokenize_param_t* fts_param;
4731 fts_doc_t* result_doc;
4732 ulint position;
4733
4734 fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
4735 result_doc = fts_param->result_doc;
4736 ut_ad(result_doc != NULL);
4737
4738 str.f_str = (byte*)(word);
4739 str.f_len = ulint(word_len);
4740 str.f_n_char = fts_get_token_size(
4741 const_cast<CHARSET_INFO*>(param->cs), word, str.f_len);
4742
4743 /* JAN: TODO: MySQL 5.7 FTS
4744 ut_ad(boolean_info->position >= 0);
4745 position = boolean_info->position + fts_param->add_pos;
4746 */
4747 position = fts_param->add_pos;
4748
4749 fts_add_token(result_doc, str, position);
4750
4751 return(0);
4752}
4753
4754/******************************************************************//**
4755Parse a document using an external / user supplied parser */
4756static
4757void
4758fts_tokenize_by_parser(
4759/*===================*/
4760 fts_doc_t* doc, /* in/out: document to tokenize */
4761 st_mysql_ftparser* parser, /* in: plugin fts parser */
4762 fts_tokenize_param_t* fts_param) /* in: fts tokenize param */
4763{
4764 MYSQL_FTPARSER_PARAM param;
4765
4766 ut_a(parser);
4767
4768 /* Set paramters for param */
4769 param.mysql_parse = fts_tokenize_document_internal;
4770 param.mysql_add_word = fts_tokenize_add_word_for_parser;
4771 param.mysql_ftparam = fts_param;
4772 param.cs = doc->charset;
4773 param.doc = reinterpret_cast<char*>(doc->text.f_str);
4774 param.length = static_cast<int>(doc->text.f_len);
4775 param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
4776
4777 PARSER_INIT(parser, &param);
4778 parser->parse(&param);
4779 PARSER_DEINIT(parser, &param);
4780}
4781
4782/** Tokenize a document.
4783@param[in,out] doc document to tokenize
4784@param[out] result tokenization result
4785@param[in] parser pluggable parser */
4786static
4787void
4788fts_tokenize_document(
4789 fts_doc_t* doc,
4790 fts_doc_t* result,
4791 st_mysql_ftparser* parser)
4792{
4793 ut_a(!doc->tokens);
4794 ut_a(doc->charset);
4795
4796 doc->tokens = rbt_create_arg_cmp(
4797 sizeof(fts_token_t), innobase_fts_text_cmp, (void*) doc->charset);
4798
4799 if (parser != NULL) {
4800 fts_tokenize_param_t fts_param;
4801
4802 fts_param.result_doc = (result != NULL) ? result : doc;
4803 fts_param.add_pos = 0;
4804
4805 fts_tokenize_by_parser(doc, parser, &fts_param);
4806 } else {
4807 ulint inc;
4808
4809 for (ulint i = 0; i < doc->text.f_len; i += inc) {
4810 inc = fts_process_token(doc, result, i, 0);
4811 ut_a(inc > 0);
4812 }
4813 }
4814}
4815
4816/** Continue to tokenize a document.
4817@param[in,out] doc document to tokenize
4818@param[in] add_pos add this position to all tokens from this tokenization
4819@param[out] result tokenization result
4820@param[in] parser pluggable parser */
4821static
4822void
4823fts_tokenize_document_next(
4824 fts_doc_t* doc,
4825 ulint add_pos,
4826 fts_doc_t* result,
4827 st_mysql_ftparser* parser)
4828{
4829 ut_a(doc->tokens);
4830
4831 if (parser) {
4832 fts_tokenize_param_t fts_param;
4833
4834 fts_param.result_doc = (result != NULL) ? result : doc;
4835 fts_param.add_pos = add_pos;
4836
4837 fts_tokenize_by_parser(doc, parser, &fts_param);
4838 } else {
4839 ulint inc;
4840
4841 for (ulint i = 0; i < doc->text.f_len; i += inc) {
4842 inc = fts_process_token(doc, result, i, add_pos);
4843 ut_a(inc > 0);
4844 }
4845 }
4846}
4847
4848/** Create the vector of fts_get_doc_t instances.
4849@param[in,out] cache fts cache
4850@return vector of fts_get_doc_t instances */
4851static
4852ib_vector_t*
4853fts_get_docs_create(
4854 fts_cache_t* cache)
4855{
4856 ib_vector_t* get_docs;
4857
4858 ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
4859
4860 /* We need one instance of fts_get_doc_t per index. */
4861 get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
4862
4863 /* Create the get_doc instance, we need one of these
4864 per FTS index. */
4865 for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4866
4867 dict_index_t** index;
4868 fts_get_doc_t* get_doc;
4869
4870 index = static_cast<dict_index_t**>(
4871 ib_vector_get(cache->indexes, i));
4872
4873 get_doc = static_cast<fts_get_doc_t*>(
4874 ib_vector_push(get_docs, NULL));
4875
4876 memset(get_doc, 0x0, sizeof(*get_doc));
4877
4878 get_doc->index_cache = fts_get_index_cache(cache, *index);
4879 get_doc->cache = cache;
4880
4881 /* Must find the index cache. */
4882 ut_a(get_doc->index_cache != NULL);
4883 }
4884
4885 return(get_docs);
4886}
4887
4888/********************************************************************
4889Release any resources held by the fts_get_doc_t instances. */
4890static
4891void
4892fts_get_docs_clear(
4893/*===============*/
4894 ib_vector_t* get_docs) /*!< in: Doc retrieval vector */
4895{
4896 ulint i;
4897
4898 /* Release the get doc graphs if any. */
4899 for (i = 0; i < ib_vector_size(get_docs); ++i) {
4900
4901 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(
4902 ib_vector_get(get_docs, i));
4903
4904 if (get_doc->get_document_graph != NULL) {
4905
4906 ut_a(get_doc->index_cache);
4907
4908 fts_que_graph_free(get_doc->get_document_graph);
4909 get_doc->get_document_graph = NULL;
4910 }
4911 }
4912}
4913
4914/*********************************************************************//**
4915Get the initial Doc ID by consulting the CONFIG table
4916@return initial Doc ID */
4917doc_id_t
4918fts_init_doc_id(
4919/*============*/
4920 const dict_table_t* table) /*!< in: table */
4921{
4922 doc_id_t max_doc_id = 0;
4923
4924 rw_lock_x_lock(&table->fts->cache->lock);
4925
4926 /* Return if the table is already initialized for DOC ID */
4927 if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
4928 rw_lock_x_unlock(&table->fts->cache->lock);
4929 return(0);
4930 }
4931
4932 DEBUG_SYNC_C("fts_initialize_doc_id");
4933
4934 /* Then compare this value with the ID value stored in the CONFIG
4935 table. The larger one will be our new initial Doc ID */
4936 fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
4937
4938 /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
4939 creating index (and add doc id column. No need to recovery
4940 documents */
4941 if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
4942 fts_init_index((dict_table_t*) table, TRUE);
4943 }
4944
4945 table->fts->fts_status |= ADDED_TABLE_SYNCED;
4946
4947 table->fts->cache->first_doc_id = max_doc_id;
4948
4949 rw_lock_x_unlock(&table->fts->cache->lock);
4950
4951 ut_ad(max_doc_id > 0);
4952
4953 return(max_doc_id);
4954}
4955
4956#ifdef FTS_MULT_INDEX
4957/*********************************************************************//**
4958Check if the index is in the affected set.
4959@return TRUE if index is updated */
4960static
4961ibool
4962fts_is_index_updated(
4963/*=================*/
4964 const ib_vector_t* fts_indexes, /*!< in: affected FTS indexes */
4965 const fts_get_doc_t* get_doc) /*!< in: info for reading
4966 document */
4967{
4968 ulint i;
4969 dict_index_t* index = get_doc->index_cache->index;
4970
4971 for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
4972 const dict_index_t* updated_fts_index;
4973
4974 updated_fts_index = static_cast<const dict_index_t*>(
4975 ib_vector_getp_const(fts_indexes, i));
4976
4977 ut_a(updated_fts_index != NULL);
4978
4979 if (updated_fts_index == index) {
4980 return(TRUE);
4981 }
4982 }
4983
4984 return(FALSE);
4985}
4986#endif
4987
4988/*********************************************************************//**
4989Fetch COUNT(*) from specified table.
4990@return the number of rows in the table */
4991ulint
4992fts_get_rows_count(
4993/*===============*/
4994 fts_table_t* fts_table) /*!< in: fts table to read */
4995{
4996 trx_t* trx;
4997 pars_info_t* info;
4998 que_t* graph;
4999 dberr_t error;
5000 ulint count = 0;
5001 char table_name[MAX_FULL_NAME_LEN];
5002
5003 trx = trx_create();
5004 trx->op_info = "fetching FT table rows count";
5005
5006 info = pars_info_create();
5007
5008 pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
5009
5010 fts_get_table_name(fts_table, table_name);
5011 pars_info_bind_id(info, true, "table_name", table_name);
5012
5013 graph = fts_parse_sql(
5014 fts_table,
5015 info,
5016 "DECLARE FUNCTION my_func;\n"
5017 "DECLARE CURSOR c IS"
5018 " SELECT COUNT(*)"
5019 " FROM $table_name;\n"
5020 "BEGIN\n"
5021 "\n"
5022 "OPEN c;\n"
5023 "WHILE 1 = 1 LOOP\n"
5024 " FETCH c INTO my_func();\n"
5025 " IF c % NOTFOUND THEN\n"
5026 " EXIT;\n"
5027 " END IF;\n"
5028 "END LOOP;\n"
5029 "CLOSE c;");
5030
5031 for (;;) {
5032 error = fts_eval_sql(trx, graph);
5033
5034 if (error == DB_SUCCESS) {
5035 fts_sql_commit(trx);
5036
5037 break; /* Exit the loop. */
5038 } else {
5039 fts_sql_rollback(trx);
5040
5041 if (error == DB_LOCK_WAIT_TIMEOUT) {
5042 ib::warn() << "lock wait timeout reading"
5043 " FTS table. Retrying!";
5044
5045 trx->error_state = DB_SUCCESS;
5046 } else {
5047 ib::error() << "(" << ut_strerr(error)
5048 << ") while reading FTS table.";
5049
5050 break; /* Exit the loop. */
5051 }
5052 }
5053 }
5054
5055 fts_que_graph_free(graph);
5056
5057 trx_free(trx);
5058
5059 return(count);
5060}
5061
5062#ifdef FTS_CACHE_SIZE_DEBUG
5063/*********************************************************************//**
5064Read the max cache size parameter from the config table. */
5065static
5066void
5067fts_update_max_cache_size(
5068/*======================*/
5069 fts_sync_t* sync) /*!< in: sync state */
5070{
5071 trx_t* trx;
5072 fts_table_t fts_table;
5073
5074 trx = trx_create();
5075
5076 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
5077
5078 /* The size returned is in bytes. */
5079 sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5080
5081 fts_sql_commit(trx);
5082
5083 trx_free(trx);
5084}
5085#endif /* FTS_CACHE_SIZE_DEBUG */
5086
5087/*********************************************************************//**
5088Free the modified rows of a table. */
5089UNIV_INLINE
5090void
5091fts_trx_table_rows_free(
5092/*====================*/
5093 ib_rbt_t* rows) /*!< in: rbt of rows to free */
5094{
5095 const ib_rbt_node_t* node;
5096
5097 for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5098 fts_trx_row_t* row;
5099
5100 row = rbt_value(fts_trx_row_t, node);
5101
5102 if (row->fts_indexes != NULL) {
5103 /* This vector shouldn't be using the
5104 heap allocator. */
5105 ut_a(row->fts_indexes->allocator->arg == NULL);
5106
5107 ib_vector_free(row->fts_indexes);
5108 row->fts_indexes = NULL;
5109 }
5110
5111 ut_free(rbt_remove_node(rows, node));
5112 }
5113
5114 ut_a(rbt_empty(rows));
5115 rbt_free(rows);
5116}
5117
5118/*********************************************************************//**
5119Free an FTS savepoint instance. */
5120UNIV_INLINE
5121void
5122fts_savepoint_free(
5123/*===============*/
5124 fts_savepoint_t* savepoint) /*!< in: savepoint instance */
5125{
5126 const ib_rbt_node_t* node;
5127 ib_rbt_t* tables = savepoint->tables;
5128
5129 /* Nothing to free! */
5130 if (tables == NULL) {
5131 return;
5132 }
5133
5134 for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5135 fts_trx_table_t* ftt;
5136 fts_trx_table_t** fttp;
5137
5138 fttp = rbt_value(fts_trx_table_t*, node);
5139 ftt = *fttp;
5140
5141 /* This can be NULL if a savepoint was released. */
5142 if (ftt->rows != NULL) {
5143 fts_trx_table_rows_free(ftt->rows);
5144 ftt->rows = NULL;
5145 }
5146
5147 /* This can be NULL if a savepoint was released. */
5148 if (ftt->added_doc_ids != NULL) {
5149 fts_doc_ids_free(ftt->added_doc_ids);
5150 ftt->added_doc_ids = NULL;
5151 }
5152
5153 /* The default savepoint name must be NULL. */
5154 if (ftt->docs_added_graph) {
5155 fts_que_graph_free(ftt->docs_added_graph);
5156 }
5157
5158 /* NOTE: We are responsible for free'ing the node */
5159 ut_free(rbt_remove_node(tables, node));
5160 }
5161
5162 ut_a(rbt_empty(tables));
5163 rbt_free(tables);
5164 savepoint->tables = NULL;
5165}
5166
5167/*********************************************************************//**
5168Free an FTS trx. */
5169void
5170fts_trx_free(
5171/*=========*/
5172 fts_trx_t* fts_trx) /* in, own: FTS trx */
5173{
5174 ulint i;
5175
5176 for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5177 fts_savepoint_t* savepoint;
5178
5179 savepoint = static_cast<fts_savepoint_t*>(
5180 ib_vector_get(fts_trx->savepoints, i));
5181
5182 /* The default savepoint name must be NULL. */
5183 if (i == 0) {
5184 ut_a(savepoint->name == NULL);
5185 }
5186
5187 fts_savepoint_free(savepoint);
5188 }
5189
5190 for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5191 fts_savepoint_t* savepoint;
5192
5193 savepoint = static_cast<fts_savepoint_t*>(
5194 ib_vector_get(fts_trx->last_stmt, i));
5195
5196 /* The default savepoint name must be NULL. */
5197 if (i == 0) {
5198 ut_a(savepoint->name == NULL);
5199 }
5200
5201 fts_savepoint_free(savepoint);
5202 }
5203
5204 if (fts_trx->heap) {
5205 mem_heap_free(fts_trx->heap);
5206 }
5207}
5208
5209/*********************************************************************//**
5210Extract the doc id from the FTS hidden column.
5211@return doc id that was extracted from rec */
5212doc_id_t
5213fts_get_doc_id_from_row(
5214/*====================*/
5215 dict_table_t* table, /*!< in: table */
5216 dtuple_t* row) /*!< in: row whose FTS doc id we
5217 want to extract.*/
5218{
5219 dfield_t* field;
5220 doc_id_t doc_id = 0;
5221
5222 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5223
5224 field = dtuple_get_nth_field(row, table->fts->doc_col);
5225
5226 ut_a(dfield_get_len(field) == sizeof(doc_id));
5227 ut_a(dfield_get_type(field)->mtype == DATA_INT);
5228
5229 doc_id = fts_read_doc_id(
5230 static_cast<const byte*>(dfield_get_data(field)));
5231
5232 return(doc_id);
5233}
5234
5235/** Extract the doc id from the record that belongs to index.
5236@param[in] table table
5237@param[in] rec record contains FTS_DOC_ID
5238@param[in] index index of rec
5239@param[in] heap heap memory
5240@return doc id that was extracted from rec */
5241doc_id_t
5242fts_get_doc_id_from_rec(
5243 dict_table_t* table,
5244 const rec_t* rec,
5245 const dict_index_t* index,
5246 mem_heap_t* heap)
5247{
5248 ulint len;
5249 const byte* data;
5250 ulint col_no;
5251 doc_id_t doc_id = 0;
5252 ulint offsets_[REC_OFFS_NORMAL_SIZE];
5253 ulint* offsets = offsets_;
5254 mem_heap_t* my_heap = heap;
5255
5256 ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5257
5258 rec_offs_init(offsets_);
5259
5260 offsets = rec_get_offsets(
5261 rec, index, offsets, true, ULINT_UNDEFINED, &my_heap);
5262
5263 col_no = dict_col_get_index_pos(
5264 &table->cols[table->fts->doc_col], index);
5265
5266 ut_ad(col_no != ULINT_UNDEFINED);
5267
5268 data = rec_get_nth_field(rec, offsets, col_no, &len);
5269
5270 ut_a(len == 8);
5271 ut_ad(8 == sizeof(doc_id));
5272 doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5273
5274 if (my_heap && !heap) {
5275 mem_heap_free(my_heap);
5276 }
5277
5278 return(doc_id);
5279}
5280
5281/*********************************************************************//**
5282Search the index specific cache for a particular FTS index.
5283@return the index specific cache else NULL */
5284fts_index_cache_t*
5285fts_find_index_cache(
5286/*=================*/
5287 const fts_cache_t* cache, /*!< in: cache to search */
5288 const dict_index_t* index) /*!< in: index to search for */
5289{
5290 /* We cast away the const because our internal function, takes
5291 non-const cache arg and returns a non-const pointer. */
5292 return(static_cast<fts_index_cache_t*>(
5293 fts_get_index_cache((fts_cache_t*) cache, index)));
5294}
5295
5296/*********************************************************************//**
5297Search cache for word.
5298@return the word node vector if found else NULL */
5299const ib_vector_t*
5300fts_cache_find_word(
5301/*================*/
5302 const fts_index_cache_t*index_cache, /*!< in: cache to search */
5303 const fts_string_t* text) /*!< in: word to search for */
5304{
5305 ib_rbt_bound_t parent;
5306 const ib_vector_t* nodes = NULL;
5307#ifdef UNIV_DEBUG
5308 dict_table_t* table = index_cache->index->table;
5309 fts_cache_t* cache = table->fts->cache;
5310
5311 ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5312#endif /* UNIV_DEBUG */
5313
5314 /* Lookup the word in the rb tree */
5315 if (rbt_search(index_cache->words, &parent, text) == 0) {
5316 const fts_tokenizer_word_t* word;
5317
5318 word = rbt_value(fts_tokenizer_word_t, parent.last);
5319
5320 nodes = word->nodes;
5321 }
5322
5323 return(nodes);
5324}
5325
5326/*********************************************************************//**
5327Append deleted doc ids to vector. */
5328void
5329fts_cache_append_deleted_doc_ids(
5330/*=============================*/
5331 const fts_cache_t* cache, /*!< in: cache to use */
5332 ib_vector_t* vector) /*!< in: append to this vector */
5333{
5334 mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
5335
5336 if (cache->deleted_doc_ids == NULL) {
5337 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5338 return;
5339 }
5340
5341
5342 for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5343 fts_update_t* update;
5344
5345 update = static_cast<fts_update_t*>(
5346 ib_vector_get(cache->deleted_doc_ids, i));
5347
5348 ib_vector_push(vector, &update->doc_id);
5349 }
5350
5351 mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5352}
5353
5354/*********************************************************************//**
5355Wait for the background thread to start. We poll to detect change
5356of state, which is acceptable, since the wait should happen only
5357once during startup.
5358@return true if the thread started else FALSE (i.e timed out) */
5359ibool
5360fts_wait_for_background_thread_to_start(
5361/*====================================*/
5362 dict_table_t* table, /*!< in: table to which the thread
5363 is attached */
5364 ulint max_wait) /*!< in: time in microseconds, if
5365 set to 0 then it disables
5366 timeout checking */
5367{
5368 ulint count = 0;
5369 ibool done = FALSE;
5370
5371 ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT);
5372
5373 for (;;) {
5374 fts_t* fts = table->fts;
5375
5376 mutex_enter(&fts->bg_threads_mutex);
5377
5378 if (fts->fts_status & BG_THREAD_READY) {
5379
5380 done = TRUE;
5381 }
5382
5383 mutex_exit(&fts->bg_threads_mutex);
5384
5385 if (!done) {
5386 os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT);
5387
5388 if (max_wait > 0) {
5389
5390 max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT;
5391
5392 /* We ignore the residual value. */
5393 if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) {
5394 break;
5395 }
5396 }
5397
5398 ++count;
5399 } else {
5400 break;
5401 }
5402
5403 if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
5404 ib::error() << "The background thread for the FTS"
5405 " table " << table->name
5406 << " refuses to start";
5407
5408 count = 0;
5409 }
5410 }
5411
5412 return(done);
5413}
5414
5415/*********************************************************************//**
5416Add the FTS document id hidden column. */
5417void
5418fts_add_doc_id_column(
5419/*==================*/
5420 dict_table_t* table, /*!< in/out: Table with FTS index */
5421 mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
5422{
5423 dict_mem_table_add_col(
5424 table, heap,
5425 FTS_DOC_ID_COL_NAME,
5426 DATA_INT,
5427 dtype_form_prtype(
5428 DATA_NOT_NULL | DATA_UNSIGNED
5429 | DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5430 sizeof(doc_id_t));
5431 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5432}
5433
5434/** Add new fts doc id to the update vector.
5435@param[in] table the table that contains the FTS index.
5436@param[in,out] ufield the fts doc id field in the update vector.
5437 No new memory is allocated for this in this
5438 function.
5439@param[in,out] next_doc_id the fts doc id that has been added to the
5440 update vector. If 0, a new fts doc id is
5441 automatically generated. The memory provided
5442 for this argument will be used by the update
5443 vector. Ensure that the life time of this
5444 memory matches that of the update vector.
5445@return the fts doc id used in the update vector */
5446doc_id_t
5447fts_update_doc_id(
5448 dict_table_t* table,
5449 upd_field_t* ufield,
5450 doc_id_t* next_doc_id)
5451{
5452 doc_id_t doc_id;
5453 dberr_t error = DB_SUCCESS;
5454
5455 if (*next_doc_id) {
5456 doc_id = *next_doc_id;
5457 } else {
5458 /* Get the new document id that will be added. */
5459 error = fts_get_next_doc_id(table, &doc_id);
5460 }
5461
5462 if (error == DB_SUCCESS) {
5463 dict_index_t* clust_index;
5464 dict_col_t* col = dict_table_get_nth_col(
5465 table, table->fts->doc_col);
5466
5467 ufield->exp = NULL;
5468
5469 ufield->new_val.len = sizeof(doc_id);
5470
5471 clust_index = dict_table_get_first_index(table);
5472
5473 ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5474 dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
5475
5476 /* It is possible we update record that has
5477 not yet be sync-ed from last crash. */
5478
5479 /* Convert to storage byte order. */
5480 ut_a(doc_id != FTS_NULL_DOC_ID);
5481 fts_write_doc_id((byte*) next_doc_id, doc_id);
5482
5483 ufield->new_val.data = next_doc_id;
5484 ufield->new_val.ext = 0;
5485 }
5486
5487 return(doc_id);
5488}
5489
5490/** fts_t constructor.
5491@param[in] table table with FTS indexes
5492@param[in,out] heap memory heap where 'this' is stored */
5493fts_t::fts_t(
5494 const dict_table_t* table,
5495 mem_heap_t* heap)
5496 :
5497 bg_threads(0),
5498 fts_status(0),
5499 add_wq(NULL),
5500 cache(NULL),
5501 doc_col(ULINT_UNDEFINED),
5502 fts_heap(heap)
5503{
5504 ut_a(table->fts == NULL);
5505
5506 mutex_create(LATCH_ID_FTS_BG_THREADS, &bg_threads_mutex);
5507
5508 ib_alloc_t* heap_alloc = ib_heap_allocator_create(fts_heap);
5509
5510 indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
5511
5512 dict_table_get_all_fts_indexes(table, indexes);
5513}
5514
5515/** fts_t destructor. */
5516fts_t::~fts_t()
5517{
5518 mutex_free(&bg_threads_mutex);
5519
5520 ut_ad(add_wq == NULL);
5521
5522 if (cache != NULL) {
5523 fts_cache_clear(cache);
5524 fts_cache_destroy(cache);
5525 cache = NULL;
5526 }
5527
5528 /* There is no need to call ib_vector_free() on this->indexes
5529 because it is stored in this->fts_heap. */
5530}
5531
5532/*********************************************************************//**
5533Create an instance of fts_t.
5534@return instance of fts_t */
5535fts_t*
5536fts_create(
5537/*=======*/
5538 dict_table_t* table) /*!< in/out: table with FTS indexes */
5539{
5540 fts_t* fts;
5541 mem_heap_t* heap;
5542
5543 heap = mem_heap_create(512);
5544
5545 fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
5546
5547 new(fts) fts_t(table, heap);
5548
5549 return(fts);
5550}
5551
5552/*********************************************************************//**
5553Free the FTS resources. */
5554void
5555fts_free(
5556/*=====*/
5557 dict_table_t* table) /*!< in/out: table with FTS indexes */
5558{
5559 fts_t* fts = table->fts;
5560
5561 fts->~fts_t();
5562
5563 mem_heap_free(fts->fts_heap);
5564
5565 table->fts = NULL;
5566}
5567
5568#if 0 // TODO: Enable this in WL#6608
5569/*********************************************************************//**
5570Signal FTS threads to initiate shutdown. */
5571void
5572fts_start_shutdown(
5573/*===============*/
5574 dict_table_t* table, /*!< in: table with FTS indexes */
5575 fts_t* fts) /*!< in: fts instance that needs
5576 to be informed about shutdown */
5577{
5578 mutex_enter(&fts->bg_threads_mutex);
5579
5580 fts->fts_status |= BG_THREAD_STOP;
5581
5582 mutex_exit(&fts->bg_threads_mutex);
5583
5584}
5585
5586/*********************************************************************//**
5587Wait for FTS threads to shutdown. */
5588void
5589fts_shutdown(
5590/*=========*/
5591 dict_table_t* table, /*!< in: table with FTS indexes */
5592 fts_t* fts) /*!< in: fts instance to shutdown */
5593{
5594 mutex_enter(&fts->bg_threads_mutex);
5595
5596 ut_a(fts->fts_status & BG_THREAD_STOP);
5597
5598 dict_table_wait_for_bg_threads_to_exit(table, 20000);
5599
5600 mutex_exit(&fts->bg_threads_mutex);
5601}
5602#endif
5603
5604/*********************************************************************//**
5605Take a FTS savepoint. */
5606UNIV_INLINE
5607void
5608fts_savepoint_copy(
5609/*===============*/
5610 const fts_savepoint_t* src, /*!< in: source savepoint */
5611 fts_savepoint_t* dst) /*!< out: destination savepoint */
5612{
5613 const ib_rbt_node_t* node;
5614 const ib_rbt_t* tables;
5615
5616 tables = src->tables;
5617
5618 for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
5619
5620 fts_trx_table_t* ftt_dst;
5621 const fts_trx_table_t** ftt_src;
5622
5623 ftt_src = rbt_value(const fts_trx_table_t*, node);
5624
5625 ftt_dst = fts_trx_table_clone(*ftt_src);
5626
5627 rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
5628 }
5629}
5630
5631/*********************************************************************//**
5632Take a FTS savepoint. */
5633void
5634fts_savepoint_take(
5635/*===============*/
5636 fts_trx_t* fts_trx, /*!< in: fts transaction */
5637 const char* name) /*!< in: savepoint name */
5638{
5639 mem_heap_t* heap;
5640 fts_savepoint_t* savepoint;
5641 fts_savepoint_t* last_savepoint;
5642
5643 ut_a(name != NULL);
5644
5645 heap = fts_trx->heap;
5646
5647 /* The implied savepoint must exist. */
5648 ut_a(ib_vector_size(fts_trx->savepoints) > 0);
5649
5650 last_savepoint = static_cast<fts_savepoint_t*>(
5651 ib_vector_last(fts_trx->savepoints));
5652 savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
5653
5654 if (last_savepoint->tables != NULL) {
5655 fts_savepoint_copy(last_savepoint, savepoint);
5656 }
5657}
5658
5659/*********************************************************************//**
5660Lookup a savepoint instance by name.
5661@return ULINT_UNDEFINED if not found */
5662UNIV_INLINE
5663ulint
5664fts_savepoint_lookup(
5665/*==================*/
5666 ib_vector_t* savepoints, /*!< in: savepoints */
5667 const char* name) /*!< in: savepoint name */
5668{
5669 ulint i;
5670
5671 ut_a(ib_vector_size(savepoints) > 0);
5672
5673 for (i = 1; i < ib_vector_size(savepoints); ++i) {
5674 fts_savepoint_t* savepoint;
5675
5676 savepoint = static_cast<fts_savepoint_t*>(
5677 ib_vector_get(savepoints, i));
5678
5679 if (strcmp(name, savepoint->name) == 0) {
5680 return(i);
5681 }
5682 }
5683
5684 return(ULINT_UNDEFINED);
5685}
5686
5687/*********************************************************************//**
5688Release the savepoint data identified by name. All savepoints created
5689after the named savepoint are kept.
5690@return DB_SUCCESS or error code */
5691void
5692fts_savepoint_release(
5693/*==================*/
5694 trx_t* trx, /*!< in: transaction */
5695 const char* name) /*!< in: savepoint name */
5696{
5697 ut_a(name != NULL);
5698
5699 ib_vector_t* savepoints = trx->fts_trx->savepoints;
5700
5701 ut_a(ib_vector_size(savepoints) > 0);
5702
5703 ulint i = fts_savepoint_lookup(savepoints, name);
5704 if (i != ULINT_UNDEFINED) {
5705 ut_a(i >= 1);
5706
5707 fts_savepoint_t* savepoint;
5708 savepoint = static_cast<fts_savepoint_t*>(
5709 ib_vector_get(savepoints, i));
5710
5711 if (i == ib_vector_size(savepoints) - 1) {
5712 /* If the savepoint is the last, we save its
5713 tables to the previous savepoint. */
5714 fts_savepoint_t* prev_savepoint;
5715 prev_savepoint = static_cast<fts_savepoint_t*>(
5716 ib_vector_get(savepoints, i - 1));
5717
5718 ib_rbt_t* tables = savepoint->tables;
5719 savepoint->tables = prev_savepoint->tables;
5720 prev_savepoint->tables = tables;
5721 }
5722
5723 fts_savepoint_free(savepoint);
5724 ib_vector_remove(savepoints, *(void**)savepoint);
5725
5726 /* Make sure we don't delete the implied savepoint. */
5727 ut_a(ib_vector_size(savepoints) > 0);
5728 }
5729}
5730
5731/**********************************************************************//**
5732Refresh last statement savepoint. */
5733void
5734fts_savepoint_laststmt_refresh(
5735/*===========================*/
5736 trx_t* trx) /*!< in: transaction */
5737{
5738
5739 fts_trx_t* fts_trx;
5740 fts_savepoint_t* savepoint;
5741
5742 fts_trx = trx->fts_trx;
5743
5744 savepoint = static_cast<fts_savepoint_t*>(
5745 ib_vector_pop(fts_trx->last_stmt));
5746 fts_savepoint_free(savepoint);
5747
5748 ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
5749 savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
5750}
5751
5752/********************************************************************
5753Undo the Doc ID add/delete operations in last stmt */
5754static
5755void
5756fts_undo_last_stmt(
5757/*===============*/
5758 fts_trx_table_t* s_ftt, /*!< in: Transaction FTS table */
5759 fts_trx_table_t* l_ftt) /*!< in: last stmt FTS table */
5760{
5761 ib_rbt_t* s_rows;
5762 ib_rbt_t* l_rows;
5763 const ib_rbt_node_t* node;
5764
5765 l_rows = l_ftt->rows;
5766 s_rows = s_ftt->rows;
5767
5768 for (node = rbt_first(l_rows);
5769 node;
5770 node = rbt_next(l_rows, node)) {
5771 fts_trx_row_t* l_row = rbt_value(fts_trx_row_t, node);
5772 ib_rbt_bound_t parent;
5773
5774 rbt_search(s_rows, &parent, &(l_row->doc_id));
5775
5776 if (parent.result == 0) {
5777 fts_trx_row_t* s_row = rbt_value(
5778 fts_trx_row_t, parent.last);
5779
5780 switch (l_row->state) {
5781 case FTS_INSERT:
5782 ut_free(rbt_remove_node(s_rows, parent.last));
5783 break;
5784
5785 case FTS_DELETE:
5786 if (s_row->state == FTS_NOTHING) {
5787 s_row->state = FTS_INSERT;
5788 } else if (s_row->state == FTS_DELETE) {
5789 ut_free(rbt_remove_node(
5790 s_rows, parent.last));
5791 }
5792 break;
5793
5794 /* FIXME: Check if FTS_MODIFY need to be addressed */
5795 case FTS_MODIFY:
5796 case FTS_NOTHING:
5797 break;
5798 default:
5799 ut_error;
5800 }
5801 }
5802 }
5803}
5804
5805/**********************************************************************//**
5806Rollback to savepoint indentified by name.
5807@return DB_SUCCESS or error code */
5808void
5809fts_savepoint_rollback_last_stmt(
5810/*=============================*/
5811 trx_t* trx) /*!< in: transaction */
5812{
5813 ib_vector_t* savepoints;
5814 fts_savepoint_t* savepoint;
5815 fts_savepoint_t* last_stmt;
5816 fts_trx_t* fts_trx;
5817 ib_rbt_bound_t parent;
5818 const ib_rbt_node_t* node;
5819 ib_rbt_t* l_tables;
5820 ib_rbt_t* s_tables;
5821
5822 fts_trx = trx->fts_trx;
5823 savepoints = fts_trx->savepoints;
5824
5825 savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
5826 last_stmt = static_cast<fts_savepoint_t*>(
5827 ib_vector_last(fts_trx->last_stmt));
5828
5829 l_tables = last_stmt->tables;
5830 s_tables = savepoint->tables;
5831
5832 for (node = rbt_first(l_tables);
5833 node;
5834 node = rbt_next(l_tables, node)) {
5835
5836 fts_trx_table_t** l_ftt;
5837
5838 l_ftt = rbt_value(fts_trx_table_t*, node);
5839
5840 rbt_search_cmp(
5841 s_tables, &parent, &(*l_ftt)->table->id,
5842 fts_trx_table_id_cmp, NULL);
5843
5844 if (parent.result == 0) {
5845 fts_trx_table_t** s_ftt;
5846
5847 s_ftt = rbt_value(fts_trx_table_t*, parent.last);
5848
5849 fts_undo_last_stmt(*s_ftt, *l_ftt);
5850 }
5851 }
5852}
5853
5854/**********************************************************************//**
5855Rollback to savepoint indentified by name.
5856@return DB_SUCCESS or error code */
5857void
5858fts_savepoint_rollback(
5859/*===================*/
5860 trx_t* trx, /*!< in: transaction */
5861 const char* name) /*!< in: savepoint name */
5862{
5863 ulint i;
5864 ib_vector_t* savepoints;
5865
5866 ut_a(name != NULL);
5867
5868 savepoints = trx->fts_trx->savepoints;
5869
5870 /* We pop all savepoints from the the top of the stack up to
5871 and including the instance that was found. */
5872 i = fts_savepoint_lookup(savepoints, name);
5873
5874 if (i != ULINT_UNDEFINED) {
5875 fts_savepoint_t* savepoint;
5876
5877 ut_a(i > 0);
5878
5879 while (ib_vector_size(savepoints) > i) {
5880 fts_savepoint_t* savepoint;
5881
5882 savepoint = static_cast<fts_savepoint_t*>(
5883 ib_vector_pop(savepoints));
5884
5885 if (savepoint->name != NULL) {
5886 /* Since name was allocated on the heap, the
5887 memory will be released when the transaction
5888 completes. */
5889 savepoint->name = NULL;
5890
5891 fts_savepoint_free(savepoint);
5892 }
5893 }
5894
5895 /* Pop all a elements from the top of the stack that may
5896 have been released. We have to be careful that we don't
5897 delete the implied savepoint. */
5898
5899 for (savepoint = static_cast<fts_savepoint_t*>(
5900 ib_vector_last(savepoints));
5901 ib_vector_size(savepoints) > 1
5902 && savepoint->name == NULL;
5903 savepoint = static_cast<fts_savepoint_t*>(
5904 ib_vector_last(savepoints))) {
5905
5906 ib_vector_pop(savepoints);
5907 }
5908
5909 /* Make sure we don't delete the implied savepoint. */
5910 ut_a(ib_vector_size(savepoints) > 0);
5911
5912 /* Restore the savepoint. */
5913 fts_savepoint_take(trx->fts_trx, name);
5914 }
5915}
5916
5917/** Check if a table is an FTS auxiliary table name.
5918@param[out] table FTS table info
5919@param[in] name Table name
5920@param[in] len Length of table name
5921@return true if the name matches an auxiliary table name pattern */
5922static
5923bool
5924fts_is_aux_table_name(
5925 fts_aux_table_t* table,
5926 const char* name,
5927 ulint len)
5928{
5929 const char* ptr;
5930 char* end;
5931 char my_name[MAX_FULL_NAME_LEN + 1];
5932
5933 ut_ad(len <= MAX_FULL_NAME_LEN);
5934 ut_memcpy(my_name, name, len);
5935 my_name[len] = 0;
5936 end = my_name + len;
5937
5938 ptr = static_cast<const char*>(memchr(my_name, '/', len));
5939
5940 if (ptr != NULL) {
5941 /* We will start the match after the '/' */
5942 ++ptr;
5943 len = ulint(end - ptr);
5944 }
5945
5946 /* All auxiliary tables are prefixed with "FTS_" and the name
5947 length will be at the very least greater than 20 bytes. */
5948 if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) {
5949 ulint i;
5950
5951 /* Skip the prefix. */
5952 ptr += 4;
5953 len -= 4;
5954
5955 /* Try and read the table id. */
5956 if (!fts_read_object_id(&table->parent_id, ptr)) {
5957 return(false);
5958 }
5959
5960 /* Skip the table id. */
5961 ptr = static_cast<const char*>(memchr(ptr, '_', len));
5962
5963 if (ptr == NULL) {
5964 return(false);
5965 }
5966
5967 /* Skip the underscore. */
5968 ++ptr;
5969 ut_a(end > ptr);
5970 len = ulint(end - ptr);
5971
5972 /* First search the common table suffix array. */
5973 for (i = 0; fts_common_tables[i] != NULL; ++i) {
5974
5975 if (strncmp(ptr, fts_common_tables[i], len) == 0) {
5976 return(true);
5977 }
5978 }
5979
5980 /* Could be obsolete common tables. */
5981 if (strncmp(ptr, "ADDED", len) == 0
5982 || strncmp(ptr, "STOPWORDS", len) == 0) {
5983 return(true);
5984 }
5985
5986 /* Try and read the index id. */
5987 if (!fts_read_object_id(&table->index_id, ptr)) {
5988 return(false);
5989 }
5990
5991 /* Skip the table id. */
5992 ptr = static_cast<const char*>(memchr(ptr, '_', len));
5993
5994 if (ptr == NULL) {
5995 return(false);
5996 }
5997
5998 /* Skip the underscore. */
5999 ++ptr;
6000 ut_a(end > ptr);
6001 len = ulint(end - ptr);
6002
6003 /* Search the FT index specific array. */
6004 for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
6005
6006 if (strncmp(ptr, fts_get_suffix(i), len) == 0) {
6007 return(true);
6008 }
6009 }
6010
6011 /* Other FT index specific table(s). */
6012 if (strncmp(ptr, "DOC_ID", len) == 0) {
6013 return(true);
6014 }
6015 }
6016
6017 return(false);
6018}
6019
6020/**********************************************************************//**
6021Callback function to read a single table ID column.
6022@return Always return TRUE */
6023static
6024ibool
6025fts_read_tables(
6026/*============*/
6027 void* row, /*!< in: sel_node_t* */
6028 void* user_arg) /*!< in: pointer to ib_vector_t */
6029{
6030 int i;
6031 fts_aux_table_t*table;
6032 mem_heap_t* heap;
6033 ibool done = FALSE;
6034 ib_vector_t* tables = static_cast<ib_vector_t*>(user_arg);
6035 sel_node_t* sel_node = static_cast<sel_node_t*>(row);
6036 que_node_t* exp = sel_node->select_list;
6037
6038 /* Must be a heap allocated vector. */
6039 ut_a(tables->allocator->arg != NULL);
6040
6041 /* We will use this heap for allocating strings. */
6042 heap = static_cast<mem_heap_t*>(tables->allocator->arg);
6043 table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL));
6044
6045 memset(table, 0x0, sizeof(*table));
6046
6047 /* Iterate over the columns and read the values. */
6048 for (i = 0; exp && !done; exp = que_node_get_next(exp), ++i) {
6049
6050 dfield_t* dfield = que_node_get_val(exp);
6051 void* data = dfield_get_data(dfield);
6052 ulint len = dfield_get_len(dfield);
6053
6054 ut_a(len != UNIV_SQL_NULL);
6055
6056 /* Note: The column numbers below must match the SELECT */
6057 switch (i) {
6058 case 0: /* NAME */
6059
6060 if (!fts_is_aux_table_name(
6061 table, static_cast<const char*>(data), len)) {
6062 ib_vector_pop(tables);
6063 done = TRUE;
6064 break;
6065 }
6066
6067 table->name = static_cast<char*>(
6068 mem_heap_alloc(heap, len + 1));
6069 memcpy(table->name, data, len);
6070 table->name[len] = 0;
6071 break;
6072
6073 case 1: /* ID */
6074 ut_a(len == 8);
6075 table->id = mach_read_from_8(
6076 static_cast<const byte*>(data));
6077 break;
6078
6079 default:
6080 ut_error;
6081 }
6082 }
6083
6084 return(TRUE);
6085}
6086
6087/******************************************************************//**
6088Callback that sets a hex formatted FTS table's flags2 in
6089SYS_TABLES. The flags is stored in MIX_LEN column.
6090@return FALSE if all OK */
6091static
6092ibool
6093fts_set_hex_format(
6094/*===============*/
6095 void* row, /*!< in: sel_node_t* */
6096 void* user_arg) /*!< in: bool set/unset flag */
6097{
6098 sel_node_t* node = static_cast<sel_node_t*>(row);
6099 dfield_t* dfield = que_node_get_val(node->select_list);
6100
6101 ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT);
6102 ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t));
6103 /* There should be at most one matching record. So the value
6104 must be the default value. */
6105 ut_ad(mach_read_from_4(static_cast<byte*>(user_arg))
6106 == ULINT32_UNDEFINED);
6107
6108 ulint flags2 = mach_read_from_4(
6109 static_cast<byte*>(dfield_get_data(dfield)));
6110
6111 flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
6112
6113 mach_write_to_4(static_cast<byte*>(user_arg), flags2);
6114
6115 return(FALSE);
6116}
6117
6118/*****************************************************************//**
6119Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES.
6120@return DB_SUCCESS or error code. */
6121static
6122dberr_t
6123fts_update_hex_format_flag(
6124/*=======================*/
6125 trx_t* trx, /*!< in/out: transaction that
6126 covers the update */
6127 table_id_t table_id, /*!< in: Table for which we want
6128 to set the root table->flags2 */
6129 bool dict_locked) /*!< in: set to true if the
6130 caller already owns the
6131 dict_sys_t::mutex. */
6132{
6133 pars_info_t* info;
6134 ib_uint32_t flags2;
6135
6136 static const char sql[] =
6137 "PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n"
6138 "DECLARE FUNCTION my_func;\n"
6139 "DECLARE CURSOR c IS\n"
6140 " SELECT MIX_LEN"
6141 " FROM SYS_TABLES"
6142 " WHERE ID = :table_id FOR UPDATE;"
6143 "\n"
6144 "BEGIN\n"
6145 "OPEN c;\n"
6146 "WHILE 1 = 1 LOOP\n"
6147 " FETCH c INTO my_func();\n"
6148 " IF c % NOTFOUND THEN\n"
6149 " EXIT;\n"
6150 " END IF;\n"
6151 "END LOOP;\n"
6152 "UPDATE SYS_TABLES"
6153 " SET MIX_LEN = :flags2"
6154 " WHERE ID = :table_id;\n"
6155 "CLOSE c;\n"
6156 "END;\n";
6157
6158 flags2 = ULINT32_UNDEFINED;
6159
6160 info = pars_info_create();
6161
6162 pars_info_add_ull_literal(info, "table_id", table_id);
6163 pars_info_bind_int4_literal(info, "flags2", &flags2);
6164
6165 pars_info_bind_function(
6166 info, "my_func", fts_set_hex_format, &flags2);
6167
6168 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6169 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6170 }
6171
6172 dberr_t err = que_eval_sql(info, sql, !dict_locked, trx);
6173
6174 ut_a(flags2 != ULINT32_UNDEFINED);
6175
6176 return(err);
6177}
6178
6179/*********************************************************************//**
6180Rename an aux table to HEX format. It's called when "%016llu" is used
6181to format an object id in table name, which only happens in Windows. */
6182static MY_ATTRIBUTE((nonnull, warn_unused_result))
6183dberr_t
6184fts_rename_one_aux_table_to_hex_format(
6185/*===================================*/
6186 trx_t* trx, /*!< in: transaction */
6187 const fts_aux_table_t* aux_table, /*!< in: table info */
6188 const dict_table_t* parent_table) /*!< in: parent table name */
6189{
6190 const char* ptr;
6191 fts_table_t fts_table;
6192 char new_name[MAX_FULL_NAME_LEN];
6193 dberr_t error;
6194
6195 ptr = strchr(aux_table->name, '/');
6196 ut_a(ptr != NULL);
6197 ++ptr;
6198 /* Skip "FTS_", table id and underscore */
6199 for (ulint i = 0; i < 2; ++i) {
6200 ptr = strchr(ptr, '_');
6201 ut_a(ptr != NULL);
6202 ++ptr;
6203 }
6204
6205 fts_table.suffix = NULL;
6206 if (aux_table->index_id == 0) {
6207 fts_table.type = FTS_COMMON_TABLE;
6208
6209 for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
6210 if (strcmp(ptr, fts_common_tables[i]) == 0) {
6211 fts_table.suffix = fts_common_tables[i];
6212 break;
6213 }
6214 }
6215 } else {
6216 fts_table.type = FTS_INDEX_TABLE;
6217
6218 /* Skip index id and underscore */
6219 ptr = strchr(ptr, '_');
6220 ut_a(ptr != NULL);
6221 ++ptr;
6222
6223 for (ulint i = 0; fts_index_selector[i].value; ++i) {
6224 if (strcmp(ptr, fts_get_suffix(i)) == 0) {
6225 fts_table.suffix = fts_get_suffix(i);
6226 break;
6227 }
6228 }
6229 }
6230
6231 ut_a(fts_table.suffix != NULL);
6232
6233 fts_table.parent = parent_table->name.m_name;
6234 fts_table.table_id = aux_table->parent_id;
6235 fts_table.index_id = aux_table->index_id;
6236 fts_table.table = parent_table;
6237
6238 fts_get_table_name(&fts_table, new_name);
6239 ut_ad(strcmp(new_name, aux_table->name) != 0);
6240
6241 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6242 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6243 }
6244
6245 error = row_rename_table_for_mysql(aux_table->name, new_name, trx,
6246 FALSE);
6247
6248 if (error != DB_SUCCESS) {
6249 ib::warn() << "Failed to rename aux table '"
6250 << aux_table->name << "' to new format '"
6251 << new_name << "'.";
6252 } else {
6253 ib::info() << "Renamed aux table '" << aux_table->name
6254 << "' to '" << new_name << "'.";
6255 }
6256
6257 return(error);
6258}
6259
6260/**********************************************************************//**
6261Rename all aux tables of a parent table to HEX format. Also set aux tables'
6262flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME.
6263It's called when "%016llu" is used to format an object id in table name,
6264which only happens in Windows.
6265Note the ids in tables are correct but the names are old ambiguous ones.
6266
6267This function should make sure that either all the parent table and aux tables
6268are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */
6269static MY_ATTRIBUTE((nonnull, warn_unused_result))
6270dberr_t
6271fts_rename_aux_tables_to_hex_format_low(
6272/*====================================*/
6273 trx_t* trx, /*!< in: transaction */
6274 dict_table_t* parent_table, /*!< in: parent table */
6275 ib_vector_t* tables) /*!< in: aux tables to rename. */
6276{
6277 dberr_t error;
6278 ulint count;
6279
6280 ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
6281 ut_ad(!ib_vector_is_empty(tables));
6282
6283 error = fts_update_hex_format_flag(trx, parent_table->id, true);
6284
6285 if (error != DB_SUCCESS) {
6286 ib::warn() << "Setting parent table " << parent_table->name
6287 << " to hex format failed.";
6288 fts_sql_rollback(trx);
6289 return(error);
6290 }
6291
6292 DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6293
6294 for (count = 0; count < ib_vector_size(tables); ++count) {
6295 dict_table_t* table;
6296 fts_aux_table_t* aux_table;
6297
6298 aux_table = static_cast<fts_aux_table_t*>(
6299 ib_vector_get(tables, count));
6300
6301 table = dict_table_open_on_id(aux_table->id, TRUE,
6302 DICT_TABLE_OP_NORMAL);
6303
6304 ut_ad(table != NULL);
6305 ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME));
6306
6307 /* Set HEX_NAME flag here to make sure we can get correct
6308 new table name in following function */
6309 DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6310 error = fts_rename_one_aux_table_to_hex_format(trx,
6311 aux_table, parent_table);
6312 /* We will rollback the trx if the error != DB_SUCCESS,
6313 so setting the flag here is the same with setting it in
6314 row_rename_table_for_mysql */
6315 DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;);
6316
6317 if (error != DB_SUCCESS) {
6318 dict_table_close(table, TRUE, FALSE);
6319
6320 ib::warn() << "Failed to rename one aux table "
6321 << aux_table->name << ". Will revert"
6322 " all successful rename operations.";
6323
6324 fts_sql_rollback(trx);
6325 break;
6326 }
6327
6328 error = fts_update_hex_format_flag(trx, aux_table->id, true);
6329 dict_table_close(table, TRUE, FALSE);
6330
6331 if (error != DB_SUCCESS) {
6332 ib::warn() << "Setting aux table " << aux_table->name
6333 << " to hex format failed.";
6334
6335 fts_sql_rollback(trx);
6336 break;
6337 }
6338 }
6339
6340 if (error != DB_SUCCESS) {
6341 ut_ad(count != ib_vector_size(tables));
6342
6343 /* If rename fails, thr trx would be rolled back, we can't
6344 use it any more, we'll start a new background trx to do
6345 the reverting. */
6346
6347 ut_ad(!trx_is_started(trx));
6348
6349 bool not_rename = false;
6350
6351 /* Try to revert those succesful rename operations
6352 in order to revert the ibd file rename. */
6353 for (ulint i = 0; i <= count; ++i) {
6354 dict_table_t* table;
6355 fts_aux_table_t* aux_table;
6356 trx_t* trx_bg;
6357 dberr_t err;
6358
6359 aux_table = static_cast<fts_aux_table_t*>(
6360 ib_vector_get(tables, i));
6361
6362 table = dict_table_open_on_id(aux_table->id, TRUE,
6363 DICT_TABLE_OP_NORMAL);
6364 ut_ad(table != NULL);
6365
6366 if (not_rename) {
6367 DICT_TF2_FLAG_UNSET(table,
6368 DICT_TF2_FTS_AUX_HEX_NAME);
6369 }
6370
6371 if (!DICT_TF2_FLAG_IS_SET(table,
6372 DICT_TF2_FTS_AUX_HEX_NAME)) {
6373 dict_table_close(table, TRUE, FALSE);
6374 continue;
6375 }
6376
6377 trx_bg = trx_create();
6378 trx_bg->op_info = "Revert half done rename";
6379 trx_bg->dict_operation_lock_mode = RW_X_LATCH;
6380 trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
6381
6382 DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6383 err = row_rename_table_for_mysql(table->name.m_name,
6384 aux_table->name,
6385 trx_bg, FALSE);
6386
6387 trx_bg->dict_operation_lock_mode = 0;
6388 dict_table_close(table, TRUE, FALSE);
6389
6390 if (err != DB_SUCCESS) {
6391 ib::warn() << "Failed to revert table "
6392 << table->name << ". Please revert"
6393 " manually.";
6394 fts_sql_rollback(trx_bg);
6395 trx_free(trx_bg);
6396 /* Continue to clear aux tables' flags2 */
6397 not_rename = true;
6398 continue;
6399 }
6400
6401 fts_sql_commit(trx_bg);
6402 trx_free(trx_bg);
6403 }
6404
6405 DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6406 }
6407
6408 return(error);
6409}
6410
6411/**********************************************************************//**
6412Convert an id, which is actually a decimal number but was regard as a HEX
6413from a string, to its real value. */
6414static
6415ib_id_t
6416fts_fake_hex_to_dec(
6417/*================*/
6418 ib_id_t id) /*!< in: number to convert */
6419{
6420 ib_id_t dec_id = 0;
6421 char tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
6422
6423#ifdef UNIV_DEBUG
6424 int ret =
6425#endif /* UNIV_DEBUG */
6426 sprintf(tmp_id, UINT64PFx, id);
6427 ut_ad(ret == 16);
6428#ifdef UNIV_DEBUG
6429 ret =
6430#endif /* UNIV_DEBUG */
6431 sscanf(tmp_id, "%016" UINT64scan, &dec_id);
6432 ut_ad(ret == 1);
6433
6434 return dec_id;
6435}
6436
6437/*********************************************************************//**
6438Compare two fts_aux_table_t parent_ids.
6439@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
6440UNIV_INLINE
6441int
6442fts_check_aux_table_parent_id_cmp(
6443/*==============================*/
6444 const void* p1, /*!< in: id1 */
6445 const void* p2) /*!< in: id2 */
6446{
6447 const fts_aux_table_t* fa1 = static_cast<const fts_aux_table_t*>(p1);
6448 const fts_aux_table_t* fa2 = static_cast<const fts_aux_table_t*>(p2);
6449
6450 return static_cast<int>(fa1->parent_id - fa2->parent_id);
6451}
6452
6453/** Mark all the fts index associated with the parent table as corrupted.
6454@param[in] trx transaction
6455@param[in, out] parent_table fts index associated with this parent table
6456 will be marked as corrupted. */
6457static
6458void
6459fts_parent_all_index_set_corrupt(
6460 trx_t* trx,
6461 dict_table_t* parent_table)
6462{
6463 fts_t* fts = parent_table->fts;
6464
6465 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6466 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6467 }
6468
6469 for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
6470 dict_index_t* index = static_cast<dict_index_t*>(
6471 ib_vector_getp_const(fts->indexes, j));
6472 dict_set_corrupted(index,
6473 trx, "DROP ORPHANED TABLE");
6474 }
6475}
6476
6477/** Mark the fts index which index id matches the id as corrupted.
6478@param[in] trx transaction
6479@param[in] id index id to search
6480@param[in, out] parent_table parent table to check with all
6481 the index. */
6482static
6483void
6484fts_set_index_corrupt(
6485 trx_t* trx,
6486 index_id_t id,
6487 dict_table_t* table)
6488{
6489 fts_t* fts = table->fts;
6490
6491 if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6492 trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6493 }
6494
6495 for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
6496 dict_index_t* index = static_cast<dict_index_t*>(
6497 ib_vector_getp_const(fts->indexes, j));
6498 if (index->id == id) {
6499 dict_set_corrupted(index, trx,
6500 "DROP ORPHANED TABLE");
6501 break;
6502 }
6503 }
6504}
6505
6506/** Check the index for the aux table is corrupted.
6507@param[in] aux_table auxiliary table
6508@retval nonzero if index is corrupted, zero for valid index */
6509static
6510ulint
6511fts_check_corrupt_index(
6512 fts_aux_table_t* aux_table)
6513{
6514 dict_table_t* table;
6515 dict_index_t* index;
6516 table = dict_table_open_on_id(
6517 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
6518
6519 if (table == NULL) {
6520 return(0);
6521 }
6522
6523 for (index = UT_LIST_GET_FIRST(table->indexes);
6524 index;
6525 index = UT_LIST_GET_NEXT(indexes, index)) {
6526 if (index->id == aux_table->index_id) {
6527 ut_ad(index->type & DICT_FTS);
6528 dict_table_close(table, true, false);
6529 return index->is_corrupted();
6530 }
6531 }
6532
6533 dict_table_close(table, true, false);
6534 return(0);
6535}
6536
6537/* Get parent table name if it's a fts aux table
6538@param[in] aux_table_name aux table name
6539@param[in] aux_table_len aux table length
6540@return parent table name, or NULL */
6541char*
6542fts_get_parent_table_name(
6543 const char* aux_table_name,
6544 ulint aux_table_len)
6545{
6546 fts_aux_table_t aux_table;
6547 char* parent_table_name = NULL;
6548
6549 if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
6550 dict_table_t* parent_table;
6551
6552 parent_table = dict_table_open_on_id(
6553 aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
6554
6555 if (parent_table != NULL) {
6556 parent_table_name = mem_strdupl(
6557 parent_table->name.m_name,
6558 strlen(parent_table->name.m_name));
6559
6560 dict_table_close(parent_table, TRUE, FALSE);
6561 }
6562 }
6563
6564 return(parent_table_name);
6565}
6566
6567/** Check the validity of the parent table.
6568@param[in] aux_table auxiliary table
6569@return true if it is a valid table or false if it is not */
6570static
6571bool
6572fts_valid_parent_table(
6573 const fts_aux_table_t* aux_table)
6574{
6575 dict_table_t* parent_table;
6576 bool valid = false;
6577
6578 parent_table = dict_table_open_on_id(
6579 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
6580
6581 if (parent_table != NULL && parent_table->fts != NULL) {
6582 if (aux_table->index_id == 0) {
6583 valid = true;
6584 } else {
6585 index_id_t id = aux_table->index_id;
6586 dict_index_t* index;
6587
6588 /* Search for the FT index in the table's list. */
6589 for (index = UT_LIST_GET_FIRST(parent_table->indexes);
6590 index;
6591 index = UT_LIST_GET_NEXT(indexes, index)) {
6592 if (index->id == id) {
6593 valid = true;
6594 break;
6595 }
6596
6597 }
6598 }
6599 }
6600
6601 if (parent_table) {
6602 dict_table_close(parent_table, TRUE, FALSE);
6603 }
6604
6605 return(valid);
6606}
6607
6608/** Try to rename all aux tables of the specified parent table.
6609@param[in] aux_tables aux_tables to be renamed
6610@param[in] parent_table parent table of all aux
6611 tables stored in tables. */
6612static
6613void
6614fts_rename_aux_tables_to_hex_format(
6615 ib_vector_t* aux_tables,
6616 dict_table_t* parent_table)
6617{
6618 dberr_t err;
6619 trx_t* trx_rename = trx_create();
6620 trx_rename->op_info = "Rename aux tables to hex format";
6621 trx_rename->dict_operation_lock_mode = RW_X_LATCH;
6622 trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE);
6623
6624 err = fts_rename_aux_tables_to_hex_format_low(trx_rename,
6625 parent_table, aux_tables);
6626
6627 trx_rename->dict_operation_lock_mode = 0;
6628
6629 if (err != DB_SUCCESS) {
6630
6631 ib::warn() << "Rollback operations on all aux tables of "
6632 "table "<< parent_table->name << ". All the fts index "
6633 "associated with the table are marked as corrupted. "
6634 "Please rebuild the index again.";
6635
6636 /* Corrupting the fts index related to parent table. */
6637 trx_t* trx_corrupt;
6638 trx_corrupt = trx_create();
6639 trx_corrupt->dict_operation_lock_mode = RW_X_LATCH;
6640 trx_start_for_ddl(trx_corrupt, TRX_DICT_OP_TABLE);
6641 fts_parent_all_index_set_corrupt(trx_corrupt, parent_table);
6642 trx_corrupt->dict_operation_lock_mode = 0;
6643 fts_sql_commit(trx_corrupt);
6644 trx_free(trx_corrupt);
6645 } else {
6646 fts_sql_commit(trx_rename);
6647 }
6648
6649 trx_free(trx_rename);
6650 ib_vector_reset(aux_tables);
6651}
6652
6653/** Set the hex format flag for the parent table.
6654@param[in, out] parent_table parent table
6655@param[in] trx transaction */
6656static
6657void
6658fts_set_parent_hex_format_flag(
6659 dict_table_t* parent_table,
6660 trx_t* trx)
6661{
6662 if (!DICT_TF2_FLAG_IS_SET(parent_table,
6663 DICT_TF2_FTS_AUX_HEX_NAME)) {
6664 DBUG_EXECUTE_IF("parent_table_flag_fail", DBUG_SUICIDE(););
6665
6666 dberr_t err = fts_update_hex_format_flag(
6667 trx, parent_table->id, true);
6668
6669 if (err != DB_SUCCESS) {
6670 ib::fatal() << "Setting parent table "
6671 << parent_table->name
6672 << "to hex format failed. Please try "
6673 << "to restart the server again, if it "
6674 << "doesn't work, the system tables "
6675 << "might be corrupted.";
6676 } else {
6677 DICT_TF2_FLAG_SET(
6678 parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6679 }
6680 }
6681}
6682
6683/** Drop the obsolete auxilary table.
6684@param[in] tables tables to be dropped. */
6685static
6686void
6687fts_drop_obsolete_aux_table_from_vector(
6688 ib_vector_t* tables)
6689{
6690 dberr_t err;
6691
6692 for (ulint count = 0; count < ib_vector_size(tables);
6693 ++count) {
6694
6695 fts_aux_table_t* aux_drop_table;
6696 aux_drop_table = static_cast<fts_aux_table_t*>(
6697 ib_vector_get(tables, count));
6698 trx_t* trx_drop = trx_create();
6699 trx_drop->op_info = "Drop obsolete aux tables";
6700 trx_drop->dict_operation_lock_mode = RW_X_LATCH;
6701 trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
6702
6703 err = row_drop_table_for_mysql(
6704 aux_drop_table->name, trx_drop, false, true);
6705
6706 trx_drop->dict_operation_lock_mode = 0;
6707
6708 if (err != DB_SUCCESS) {
6709 /* We don't need to worry about the
6710 failure, since server would try to
6711 drop it on next restart, even if
6712 the table was broken. */
6713 ib::warn() << "Failed to drop obsolete aux table "
6714 << aux_drop_table->name << ", which is "
6715 << "harmless. will try to drop it on next "
6716 << "restart.";
6717
6718 fts_sql_rollback(trx_drop);
6719 } else {
6720 ib::info() << "Dropped obsolete aux"
6721 " table '" << aux_drop_table->name
6722 << "'.";
6723
6724 fts_sql_commit(trx_drop);
6725 }
6726
6727 trx_free(trx_drop);
6728 }
6729}
6730
6731/** Drop all the auxiliary table present in the vector.
6732@param[in] trx transaction
6733@param[in] tables tables to be dropped */
6734static
6735void
6736fts_drop_aux_table_from_vector(
6737 trx_t* trx,
6738 ib_vector_t* tables)
6739{
6740 for (ulint count = 0; count < ib_vector_size(tables);
6741 ++count) {
6742 fts_aux_table_t* aux_drop_table;
6743 aux_drop_table = static_cast<fts_aux_table_t*>(
6744 ib_vector_get(tables, count));
6745
6746 /* Check for the validity of the parent table */
6747 if (!fts_valid_parent_table(aux_drop_table)) {
6748
6749 ib::warn() << "Parent table of FTS auxiliary table "
6750 << aux_drop_table->name << " not found.";
6751
6752 dberr_t err = fts_drop_table(trx, aux_drop_table->name);
6753 if (err == DB_FAIL) {
6754
6755 char* path = fil_make_filepath(
6756 NULL, aux_drop_table->name, IBD, false);
6757
6758 if (path != NULL) {
6759 os_file_delete_if_exists(
6760 innodb_data_file_key,
6761 path , NULL);
6762 ut_free(path);
6763 }
6764 }
6765 }
6766 }
6767}
6768
6769/**********************************************************************//**
6770Check and drop all orphaned FTS auxiliary tables, those that don't have
6771a parent table or FTS index defined on them.
6772@return DB_SUCCESS or error code */
6773static MY_ATTRIBUTE((nonnull))
6774void
6775fts_check_and_drop_orphaned_tables(
6776/*===============================*/
6777 trx_t* trx, /*!< in: transaction */
6778 ib_vector_t* tables) /*!< in: tables to check */
6779{
6780 mem_heap_t* heap;
6781 ib_vector_t* aux_tables_to_rename;
6782 ib_vector_t* invalid_aux_tables;
6783 ib_vector_t* valid_aux_tables;
6784 ib_vector_t* drop_aux_tables;
6785 ib_vector_t* obsolete_aux_tables;
6786 ib_alloc_t* heap_alloc;
6787
6788 heap = mem_heap_create(1024);
6789 heap_alloc = ib_heap_allocator_create(heap);
6790
6791 /* We store all aux tables belonging to the same parent table here,
6792 and rename all these tables in a batch mode. */
6793 aux_tables_to_rename = ib_vector_create(heap_alloc,
6794 sizeof(fts_aux_table_t), 128);
6795
6796 /* We store all fake auxiliary table and orphaned table here. */
6797 invalid_aux_tables = ib_vector_create(heap_alloc,
6798 sizeof(fts_aux_table_t), 128);
6799
6800 /* We store all valid aux tables. We use this to filter the
6801 fake auxiliary table from invalid auxiliary tables. */
6802 valid_aux_tables = ib_vector_create(heap_alloc,
6803 sizeof(fts_aux_table_t), 128);
6804
6805 /* We store all auxiliary tables to be dropped. */
6806 drop_aux_tables = ib_vector_create(heap_alloc,
6807 sizeof(fts_aux_table_t), 128);
6808
6809 /* We store all obsolete auxiliary tables to be dropped. */
6810 obsolete_aux_tables = ib_vector_create(heap_alloc,
6811 sizeof(fts_aux_table_t), 128);
6812
6813 /* Sort by parent_id first, in case rename will fail */
6814 ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp);
6815
6816 for (ulint i = 0; i < ib_vector_size(tables); ++i) {
6817 dict_table_t* parent_table;
6818 fts_aux_table_t* aux_table;
6819 bool drop = false;
6820 dict_table_t* table;
6821 fts_aux_table_t* next_aux_table = NULL;
6822 ib_id_t orig_parent_id = 0;
6823 ib_id_t orig_index_id = 0;
6824 bool rename = false;
6825
6826 aux_table = static_cast<fts_aux_table_t*>(
6827 ib_vector_get(tables, i));
6828
6829 table = dict_table_open_on_id(
6830 aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
6831 orig_parent_id = aux_table->parent_id;
6832 orig_index_id = aux_table->index_id;
6833
6834 if (table == NULL
6835 || strcmp(table->name.m_name, aux_table->name)) {
6836
6837 bool fake_aux = false;
6838
6839 if (table != NULL) {
6840 dict_table_close(table, TRUE, FALSE);
6841 }
6842
6843 if (i + 1 < ib_vector_size(tables)) {
6844 next_aux_table = static_cast<fts_aux_table_t*>(
6845 ib_vector_get(tables, i + 1));
6846 }
6847
6848 /* To know whether aux table is fake fts or
6849 orphan fts table. */
6850 for (ulint count = 0;
6851 count < ib_vector_size(valid_aux_tables);
6852 count++) {
6853 fts_aux_table_t* valid_aux;
6854 valid_aux = static_cast<fts_aux_table_t*>(
6855 ib_vector_get(valid_aux_tables, count));
6856 if (strcmp(valid_aux->name,
6857 aux_table->name) == 0) {
6858 fake_aux = true;
6859 break;
6860 }
6861 }
6862
6863 /* All aux tables of parent table, whose id is
6864 last_parent_id, have been checked, try to rename
6865 them if necessary. */
6866 if ((next_aux_table == NULL
6867 || orig_parent_id != next_aux_table->parent_id)
6868 && (!ib_vector_is_empty(aux_tables_to_rename))) {
6869
6870 ib_id_t parent_id = fts_fake_hex_to_dec(
6871 aux_table->parent_id);
6872
6873 parent_table = dict_table_open_on_id(
6874 parent_id, TRUE,
6875 DICT_TABLE_OP_NORMAL);
6876
6877 fts_rename_aux_tables_to_hex_format(
6878 aux_tables_to_rename, parent_table);
6879
6880 dict_table_close(parent_table, TRUE,
6881 FALSE);
6882 }
6883
6884 /* If the aux table is fake aux table. Skip it. */
6885 if (!fake_aux) {
6886 ib_vector_push(invalid_aux_tables, aux_table);
6887 }
6888
6889 continue;
6890 } else if (!DICT_TF2_FLAG_IS_SET(table,
6891 DICT_TF2_FTS_AUX_HEX_NAME)) {
6892
6893 aux_table->parent_id = fts_fake_hex_to_dec(
6894 aux_table->parent_id);
6895
6896 if (aux_table->index_id != 0) {
6897 aux_table->index_id = fts_fake_hex_to_dec(
6898 aux_table->index_id);
6899 }
6900
6901 ut_ad(aux_table->id > aux_table->parent_id);
6902
6903 /* Check whether parent table id and index id
6904 are stored as decimal format. */
6905 if (fts_valid_parent_table(aux_table)) {
6906
6907 parent_table = dict_table_open_on_id(
6908 aux_table->parent_id, true,
6909 DICT_TABLE_OP_NORMAL);
6910
6911 ut_ad(parent_table != NULL);
6912 ut_ad(parent_table->fts != NULL);
6913
6914 if (!DICT_TF2_FLAG_IS_SET(
6915 parent_table,
6916 DICT_TF2_FTS_AUX_HEX_NAME)) {
6917 rename = true;
6918 }
6919
6920 dict_table_close(parent_table, TRUE, FALSE);
6921 }
6922
6923 if (!rename) {
6924 /* Reassign the original value of
6925 aux table if it is not in decimal format */
6926 aux_table->parent_id = orig_parent_id;
6927 aux_table->index_id = orig_index_id;
6928 }
6929 }
6930
6931 if (table != NULL) {
6932 dict_table_close(table, TRUE, FALSE);
6933 }
6934
6935 if (!rename) {
6936 /* Check the validity of the parent table. */
6937 if (!fts_valid_parent_table(aux_table)) {
6938 drop = true;
6939 }
6940 }
6941
6942 /* Filter out the fake aux table by comparing with the
6943 current valid auxiliary table name. */
6944 for (ulint count = 0;
6945 count < ib_vector_size(invalid_aux_tables); count++) {
6946 fts_aux_table_t* invalid_aux;
6947 invalid_aux = static_cast<fts_aux_table_t*>(
6948 ib_vector_get(invalid_aux_tables, count));
6949 if (strcmp(invalid_aux->name, aux_table->name) == 0) {
6950 ib_vector_remove(
6951 invalid_aux_tables,
6952 *reinterpret_cast<void**>(invalid_aux));
6953 break;
6954 }
6955 }
6956
6957 ib_vector_push(valid_aux_tables, aux_table);
6958
6959 /* If the index associated with aux table is corrupted,
6960 skip it. */
6961 if (fts_check_corrupt_index(aux_table) > 0) {
6962
6963 if (i + 1 < ib_vector_size(tables)) {
6964 next_aux_table = static_cast<fts_aux_table_t*>(
6965 ib_vector_get(tables, i + 1));
6966 }
6967
6968 if (next_aux_table == NULL
6969 || orig_parent_id != next_aux_table->parent_id) {
6970
6971 parent_table = dict_table_open_on_id(
6972 aux_table->parent_id, TRUE,
6973 DICT_TABLE_OP_NORMAL);
6974
6975 if (!ib_vector_is_empty(aux_tables_to_rename)) {
6976 fts_rename_aux_tables_to_hex_format(
6977 aux_tables_to_rename, parent_table);
6978 } else {
6979 fts_set_parent_hex_format_flag(
6980 parent_table, trx);
6981 }
6982
6983 dict_table_close(parent_table, TRUE, FALSE);
6984 }
6985
6986 continue;
6987 }
6988
6989 parent_table = dict_table_open_on_id(
6990 aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
6991
6992 if (drop) {
6993 ib_vector_push(drop_aux_tables, aux_table);
6994 } else {
6995 if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
6996 ib_vector_push(obsolete_aux_tables, aux_table);
6997 continue;
6998 }
6999 }
7000
7001 /* If the aux table is in decimal format, we should
7002 rename it, so push it to aux_tables_to_rename */
7003 if (!drop && rename) {
7004 bool rename_table = true;
7005 for (ulint count = 0;
7006 count < ib_vector_size(aux_tables_to_rename);
7007 count++) {
7008 fts_aux_table_t* rename_aux =
7009 static_cast<fts_aux_table_t*>(
7010 ib_vector_get(aux_tables_to_rename,
7011 count));
7012 if (strcmp(rename_aux->name,
7013 aux_table->name) == 0) {
7014 rename_table = false;
7015 break;
7016 }
7017 }
7018
7019 if (rename_table) {
7020 ib_vector_push(aux_tables_to_rename,
7021 aux_table);
7022 }
7023 }
7024
7025 if (i + 1 < ib_vector_size(tables)) {
7026 next_aux_table = static_cast<fts_aux_table_t*>(
7027 ib_vector_get(tables, i + 1));
7028 }
7029
7030 if ((next_aux_table == NULL
7031 || orig_parent_id != next_aux_table->parent_id)
7032 && !ib_vector_is_empty(aux_tables_to_rename)) {
7033
7034 ut_ad(rename);
7035 ut_ad(!DICT_TF2_FLAG_IS_SET(
7036 parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
7037
7038 fts_rename_aux_tables_to_hex_format(
7039 aux_tables_to_rename,parent_table);
7040 }
7041
7042 /* The IDs are already in correct hex format. */
7043 if (!drop && !rename) {
7044 dict_table_t* table;
7045
7046 table = dict_table_open_on_id(
7047 aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7048
7049 if (table != NULL
7050 && strcmp(table->name.m_name, aux_table->name)) {
7051 dict_table_close(table, TRUE, FALSE);
7052 table = NULL;
7053 }
7054
7055 if (table != NULL
7056 && !DICT_TF2_FLAG_IS_SET(
7057 table,
7058 DICT_TF2_FTS_AUX_HEX_NAME)) {
7059
7060 DBUG_EXECUTE_IF("aux_table_flag_fail",
7061 ib::warn() << "Setting aux table "
7062 << table->name << " to hex "
7063 "format failed.";
7064 fts_set_index_corrupt(
7065 trx, aux_table->index_id,
7066 parent_table);
7067 goto table_exit;);
7068
7069 dberr_t err = fts_update_hex_format_flag(
7070 trx, table->id, true);
7071
7072 if (err != DB_SUCCESS) {
7073 ib::warn() << "Setting aux table "
7074 << table->name << " to hex "
7075 "format failed.";
7076
7077 fts_set_index_corrupt(
7078 trx, aux_table->index_id,
7079 parent_table);
7080 } else {
7081 DICT_TF2_FLAG_SET(table,
7082 DICT_TF2_FTS_AUX_HEX_NAME);
7083 }
7084 }
7085#ifndef DBUG_OFF
7086table_exit:
7087#endif /* !DBUG_OFF */
7088
7089 if (table != NULL) {
7090 dict_table_close(table, TRUE, FALSE);
7091 }
7092
7093 ut_ad(parent_table != NULL);
7094
7095 fts_set_parent_hex_format_flag(
7096 parent_table, trx);
7097 }
7098
7099 if (parent_table != NULL) {
7100 dict_table_close(parent_table, TRUE, FALSE);
7101 }
7102 }
7103
7104 fts_drop_aux_table_from_vector(trx, invalid_aux_tables);
7105 fts_drop_aux_table_from_vector(trx, drop_aux_tables);
7106 fts_sql_commit(trx);
7107
7108 fts_drop_obsolete_aux_table_from_vector(obsolete_aux_tables);
7109
7110 /* Free the memory allocated at the beginning */
7111 if (heap != NULL) {
7112 mem_heap_free(heap);
7113 }
7114}
7115
7116/**********************************************************************//**
7117Drop all orphaned FTS auxiliary tables, those that don't have a parent
7118table or FTS index defined on them. */
7119void
7120fts_drop_orphaned_tables(void)
7121/*==========================*/
7122{
7123 trx_t* trx;
7124 pars_info_t* info;
7125 mem_heap_t* heap;
7126 que_t* graph;
7127 ib_vector_t* tables;
7128 ib_alloc_t* heap_alloc;
7129
7130 heap = mem_heap_create(1024);
7131 heap_alloc = ib_heap_allocator_create(heap);
7132
7133 /* We store the table ids of all the FTS indexes that were found. */
7134 tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128);
7135
7136 /* Get the list of all known .ibd files and check for orphaned
7137 FTS auxiliary files in that list. We need to remove them because
7138 users can't map them back to table names and this will create
7139 unnecessary clutter. */
7140
7141 mutex_enter(&fil_system.mutex);
7142
7143 for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
7144 space != NULL;
7145 space = UT_LIST_GET_NEXT(space_list, space)) {
7146
7147 if (space->purpose != FIL_TYPE_TABLESPACE) {
7148 continue;
7149 }
7150
7151 fts_aux_table_t fts_aux_table;
7152 memset(&fts_aux_table, 0x0, sizeof fts_aux_table);
7153
7154 size_t len = strlen(space->name);
7155
7156 if (!fts_is_aux_table_name(&fts_aux_table, space->name, len)) {
7157 continue;
7158 }
7159
7160 fts_aux_table.id = space->id;
7161 fts_aux_table.name = mem_heap_strdupl(heap, space->name, len);
7162 ib_vector_push(tables, &fts_aux_table);
7163 }
7164
7165 mutex_exit(&fil_system.mutex);
7166
7167 trx = trx_create();
7168 trx->op_info = "dropping orphaned FTS tables";
7169 row_mysql_lock_data_dictionary(trx);
7170
7171 info = pars_info_create();
7172
7173 pars_info_bind_function(info, "my_func", fts_read_tables, tables);
7174
7175 graph = fts_parse_sql_no_dict_lock(
7176 info,
7177 "DECLARE FUNCTION my_func;\n"
7178 "DECLARE CURSOR c IS"
7179 " SELECT NAME, ID"
7180 " FROM SYS_TABLES;\n"
7181 "BEGIN\n"
7182 "\n"
7183 "OPEN c;\n"
7184 "WHILE 1 = 1 LOOP\n"
7185 " FETCH c INTO my_func();\n"
7186 " IF c % NOTFOUND THEN\n"
7187 " EXIT;\n"
7188 " END IF;\n"
7189 "END LOOP;\n"
7190 "CLOSE c;");
7191
7192 for (;;) {
7193 dberr_t error = fts_eval_sql(trx, graph);
7194
7195 if (error == DB_SUCCESS) {
7196 fts_check_and_drop_orphaned_tables(trx, tables);
7197 break; /* Exit the loop. */
7198 } else {
7199 ib_vector_reset(tables);
7200
7201 fts_sql_rollback(trx);
7202
7203 if (error == DB_LOCK_WAIT_TIMEOUT) {
7204 ib::warn() << "lock wait timeout reading"
7205 " SYS_TABLES. Retrying!";
7206
7207 trx->error_state = DB_SUCCESS;
7208 } else {
7209 ib::error() << "(" << ut_strerr(error)
7210 << ") while reading SYS_TABLES.";
7211
7212 break; /* Exit the loop. */
7213 }
7214 }
7215 }
7216
7217 que_graph_free(graph);
7218
7219 row_mysql_unlock_data_dictionary(trx);
7220
7221 trx_free(trx);
7222
7223 if (heap != NULL) {
7224 mem_heap_free(heap);
7225 }
7226}
7227
7228/**********************************************************************//**
7229Check whether user supplied stopword table is of the right format.
7230Caller is responsible to hold dictionary locks.
7231@return the stopword column charset if qualifies */
7232CHARSET_INFO*
7233fts_valid_stopword_table(
7234/*=====================*/
7235 const char* stopword_table_name) /*!< in: Stopword table
7236 name */
7237{
7238 dict_table_t* table;
7239 dict_col_t* col = NULL;
7240
7241 if (!stopword_table_name) {
7242 return(NULL);
7243 }
7244
7245 table = dict_table_get_low(stopword_table_name);
7246
7247 if (!table) {
7248 ib::error() << "User stopword table " << stopword_table_name
7249 << " does not exist.";
7250
7251 return(NULL);
7252 } else {
7253 const char* col_name;
7254
7255 col_name = dict_table_get_col_name(table, 0);
7256
7257 if (ut_strcmp(col_name, "value")) {
7258 ib::error() << "Invalid column name for stopword"
7259 " table " << stopword_table_name << ". Its"
7260 " first column must be named as 'value'.";
7261
7262 return(NULL);
7263 }
7264
7265 col = dict_table_get_nth_col(table, 0);
7266
7267 if (col->mtype != DATA_VARCHAR
7268 && col->mtype != DATA_VARMYSQL) {
7269 ib::error() << "Invalid column type for stopword"
7270 " table " << stopword_table_name << ". Its"
7271 " first column must be of varchar type";
7272
7273 return(NULL);
7274 }
7275 }
7276
7277 ut_ad(col);
7278
7279 return(fts_get_charset(col->prtype));
7280}
7281
7282/**********************************************************************//**
7283This function loads the stopword into the FTS cache. It also
7284records/fetches stopword configuration to/from FTS configure
7285table, depending on whether we are creating or reloading the
7286FTS.
7287@return TRUE if load operation is successful */
7288ibool
7289fts_load_stopword(
7290/*==============*/
7291 const dict_table_t*
7292 table, /*!< in: Table with FTS */
7293 trx_t* trx, /*!< in: Transactions */
7294 const char* global_stopword_table, /*!< in: Global stopword table
7295 name */
7296 const char* session_stopword_table, /*!< in: Session stopword table
7297 name */
7298 ibool stopword_is_on, /*!< in: Whether stopword
7299 option is turned on/off */
7300 ibool reload) /*!< in: Whether it is
7301 for reloading FTS table */
7302{
7303 fts_table_t fts_table;
7304 fts_string_t str;
7305 dberr_t error = DB_SUCCESS;
7306 ulint use_stopword;
7307 fts_cache_t* cache;
7308 const char* stopword_to_use = NULL;
7309 ibool new_trx = FALSE;
7310 byte str_buffer[MAX_FULL_NAME_LEN + 1];
7311
7312 FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
7313
7314 cache = table->fts->cache;
7315
7316 if (!reload && !(cache->stopword_info.status
7317 & STOPWORD_NOT_INIT)) {
7318 return(TRUE);
7319 }
7320
7321 if (!trx) {
7322 trx = trx_create();
7323 if (srv_read_only_mode) {
7324 trx_start_internal_read_only(trx);
7325 } else {
7326 trx_start_internal(trx);
7327 }
7328 trx->op_info = "upload FTS stopword";
7329 new_trx = TRUE;
7330 }
7331
7332 /* First check whether stopword filtering is turned off */
7333 if (reload) {
7334 error = fts_config_get_ulint(
7335 trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
7336 } else {
7337 use_stopword = (ulint) stopword_is_on;
7338
7339 error = fts_config_set_ulint(
7340 trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
7341 }
7342
7343 if (error != DB_SUCCESS) {
7344 goto cleanup;
7345 }
7346
7347 /* If stopword is turned off, no need to continue to load the
7348 stopword into cache, but still need to do initialization */
7349 if (!use_stopword) {
7350 cache->stopword_info.status = STOPWORD_OFF;
7351 goto cleanup;
7352 }
7353
7354 if (reload) {
7355 /* Fetch the stopword table name from FTS config
7356 table */
7357 str.f_n_char = 0;
7358 str.f_str = str_buffer;
7359 str.f_len = sizeof(str_buffer) - 1;
7360
7361 error = fts_config_get_value(
7362 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7363
7364 if (error != DB_SUCCESS) {
7365 goto cleanup;
7366 }
7367
7368 if (strlen((char*) str.f_str) > 0) {
7369 stopword_to_use = (const char*) str.f_str;
7370 }
7371 } else {
7372 stopword_to_use = (session_stopword_table)
7373 ? session_stopword_table : global_stopword_table;
7374 }
7375
7376 if (stopword_to_use
7377 && fts_load_user_stopword(table->fts, stopword_to_use,
7378 &cache->stopword_info)) {
7379 /* Save the stopword table name to the configure
7380 table */
7381 if (!reload) {
7382 str.f_n_char = 0;
7383 str.f_str = (byte*) stopword_to_use;
7384 str.f_len = ut_strlen(stopword_to_use);
7385
7386 error = fts_config_set_value(
7387 trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7388 }
7389 } else {
7390 /* Load system default stopword list */
7391 fts_load_default_stopword(&cache->stopword_info);
7392 }
7393
7394cleanup:
7395 if (new_trx) {
7396 if (error == DB_SUCCESS) {
7397 fts_sql_commit(trx);
7398 } else {
7399 fts_sql_rollback(trx);
7400 }
7401
7402 trx_free(trx);
7403 }
7404
7405 if (!cache->stopword_info.cached_stopword) {
7406 cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
7407 sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
7408 &my_charset_latin1);
7409 }
7410
7411 return(error == DB_SUCCESS);
7412}
7413
7414/**********************************************************************//**
7415Callback function when we initialize the FTS at the start up
7416time. It recovers the maximum Doc IDs presented in the current table.
7417@return: always returns TRUE */
7418static
7419ibool
7420fts_init_get_doc_id(
7421/*================*/
7422 void* row, /*!< in: sel_node_t* */
7423 void* user_arg) /*!< in: fts cache */
7424{
7425 doc_id_t doc_id = FTS_NULL_DOC_ID;
7426 sel_node_t* node = static_cast<sel_node_t*>(row);
7427 que_node_t* exp = node->select_list;
7428 fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg);
7429
7430 ut_ad(ib_vector_is_empty(cache->get_docs));
7431
7432 /* Copy each indexed column content into doc->text.f_str */
7433 if (exp) {
7434 dfield_t* dfield = que_node_get_val(exp);
7435 dtype_t* type = dfield_get_type(dfield);
7436 void* data = dfield_get_data(dfield);
7437
7438 ut_a(dtype_get_mtype(type) == DATA_INT);
7439
7440 doc_id = static_cast<doc_id_t>(mach_read_from_8(
7441 static_cast<const byte*>(data)));
7442
7443 if (doc_id >= cache->next_doc_id) {
7444 cache->next_doc_id = doc_id + 1;
7445 }
7446 }
7447
7448 return(TRUE);
7449}
7450
7451/**********************************************************************//**
7452Callback function when we initialize the FTS at the start up
7453time. It recovers Doc IDs that have not sync-ed to the auxiliary
7454table, and require to bring them back into FTS index.
7455@return: always returns TRUE */
7456static
7457ibool
7458fts_init_recover_doc(
7459/*=================*/
7460 void* row, /*!< in: sel_node_t* */
7461 void* user_arg) /*!< in: fts cache */
7462{
7463
7464 fts_doc_t doc;
7465 ulint doc_len = 0;
7466 ulint field_no = 0;
7467 fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg);
7468 doc_id_t doc_id = FTS_NULL_DOC_ID;
7469 sel_node_t* node = static_cast<sel_node_t*>(row);
7470 que_node_t* exp = node->select_list;
7471 fts_cache_t* cache = get_doc->cache;
7472 st_mysql_ftparser* parser = get_doc->index_cache->index->parser;
7473
7474 fts_doc_init(&doc);
7475 doc.found = TRUE;
7476
7477 ut_ad(cache);
7478
7479 /* Copy each indexed column content into doc->text.f_str */
7480 while (exp) {
7481 dfield_t* dfield = que_node_get_val(exp);
7482 ulint len = dfield_get_len(dfield);
7483
7484 if (field_no == 0) {
7485 dtype_t* type = dfield_get_type(dfield);
7486 void* data = dfield_get_data(dfield);
7487
7488 ut_a(dtype_get_mtype(type) == DATA_INT);
7489
7490 doc_id = static_cast<doc_id_t>(mach_read_from_8(
7491 static_cast<const byte*>(data)));
7492
7493 field_no++;
7494 exp = que_node_get_next(exp);
7495 continue;
7496 }
7497
7498 if (len == UNIV_SQL_NULL) {
7499 exp = que_node_get_next(exp);
7500 continue;
7501 }
7502
7503 ut_ad(get_doc);
7504
7505 if (!get_doc->index_cache->charset) {
7506 get_doc->index_cache->charset = fts_get_charset(
7507 dfield->type.prtype);
7508 }
7509
7510 doc.charset = get_doc->index_cache->charset;
7511
7512 if (dfield_is_ext(dfield)) {
7513 dict_table_t* table = cache->sync->table;
7514
7515 doc.text.f_str = btr_copy_externally_stored_field(
7516 &doc.text.f_len,
7517 static_cast<byte*>(dfield_get_data(dfield)),
7518 dict_table_page_size(table), len,
7519 static_cast<mem_heap_t*>(doc.self_heap->arg));
7520 } else {
7521 doc.text.f_str = static_cast<byte*>(
7522 dfield_get_data(dfield));
7523
7524 doc.text.f_len = len;
7525 }
7526
7527 if (field_no == 1) {
7528 fts_tokenize_document(&doc, NULL, parser);
7529 } else {
7530 fts_tokenize_document_next(&doc, doc_len, NULL, parser);
7531 }
7532
7533 exp = que_node_get_next(exp);
7534
7535 doc_len += (exp) ? len + 1 : len;
7536
7537 field_no++;
7538 }
7539
7540 fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
7541
7542 fts_doc_free(&doc);
7543
7544 cache->added++;
7545
7546 if (doc_id >= cache->next_doc_id) {
7547 cache->next_doc_id = doc_id + 1;
7548 }
7549
7550 return(TRUE);
7551}
7552
7553/**********************************************************************//**
7554This function brings FTS index in sync when FTS index is first
7555used. There are documents that have not yet sync-ed to auxiliary
7556tables from last server abnormally shutdown, we will need to bring
7557such document into FTS cache before any further operations
7558@return TRUE if all OK */
7559ibool
7560fts_init_index(
7561/*===========*/
7562 dict_table_t* table, /*!< in: Table with FTS */
7563 ibool has_cache_lock) /*!< in: Whether we already have
7564 cache lock */
7565{
7566 dict_index_t* index;
7567 doc_id_t start_doc;
7568 fts_get_doc_t* get_doc = NULL;
7569 fts_cache_t* cache = table->fts->cache;
7570 bool need_init = false;
7571
7572 ut_ad(!mutex_own(&dict_sys->mutex));
7573
7574 /* First check cache->get_docs is initialized */
7575 if (!has_cache_lock) {
7576 rw_lock_x_lock(&cache->lock);
7577 }
7578
7579 rw_lock_x_lock(&cache->init_lock);
7580 if (cache->get_docs == NULL) {
7581 cache->get_docs = fts_get_docs_create(cache);
7582 }
7583 rw_lock_x_unlock(&cache->init_lock);
7584
7585 if (table->fts->fts_status & ADDED_TABLE_SYNCED) {
7586 goto func_exit;
7587 }
7588
7589 need_init = true;
7590
7591 start_doc = cache->synced_doc_id;
7592
7593 if (!start_doc) {
7594 fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
7595 cache->synced_doc_id = start_doc;
7596 }
7597
7598 /* No FTS index, this is the case when previous FTS index
7599 dropped, and we re-initialize the Doc ID system for subsequent
7600 insertion */
7601 if (ib_vector_is_empty(cache->get_docs)) {
7602 index = table->fts_doc_id_index;
7603
7604 ut_a(index);
7605
7606 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
7607 FTS_FETCH_DOC_BY_ID_LARGE,
7608 fts_init_get_doc_id, cache);
7609 } else {
7610 if (table->fts->cache->stopword_info.status
7611 & STOPWORD_NOT_INIT) {
7612 fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
7613 }
7614
7615 for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
7616 get_doc = static_cast<fts_get_doc_t*>(
7617 ib_vector_get(cache->get_docs, i));
7618
7619 index = get_doc->index_cache->index;
7620
7621 fts_doc_fetch_by_doc_id(NULL, start_doc, index,
7622 FTS_FETCH_DOC_BY_ID_LARGE,
7623 fts_init_recover_doc, get_doc);
7624 }
7625 }
7626
7627 table->fts->fts_status |= ADDED_TABLE_SYNCED;
7628
7629 fts_get_docs_clear(cache->get_docs);
7630
7631func_exit:
7632 if (!has_cache_lock) {
7633 rw_lock_x_unlock(&cache->lock);
7634 }
7635
7636 if (need_init) {
7637 mutex_enter(&dict_sys->mutex);
7638 /* Register the table with the optimize thread. */
7639 fts_optimize_add_table(table);
7640 mutex_exit(&dict_sys->mutex);
7641 }
7642
7643 return(TRUE);
7644}
7645
7646/** Check if the all the auxillary tables associated with FTS index are in
7647consistent state. For now consistency is check only by ensuring
7648index->page_no != FIL_NULL
7649@param[out] base_table table has host fts index
7650@param[in,out] trx trx handler */
7651void
7652fts_check_corrupt(
7653 dict_table_t* base_table,
7654 trx_t* trx)
7655{
7656 bool sane = true;
7657 fts_table_t fts_table;
7658
7659 /* Iterate over the common table and check for their sanity. */
7660 FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, base_table);
7661
7662 for (ulint i = 0; fts_common_tables[i] != NULL && sane; ++i) {
7663
7664 char table_name[MAX_FULL_NAME_LEN];
7665
7666 fts_table.suffix = fts_common_tables[i];
7667 fts_get_table_name(&fts_table, table_name);
7668
7669 dict_table_t* aux_table = dict_table_open_on_name(
7670 table_name, true, FALSE, DICT_ERR_IGNORE_NONE);
7671
7672 if (aux_table == NULL) {
7673 dict_set_corrupted(
7674 dict_table_get_first_index(base_table),
7675 trx, "FTS_SANITY_CHECK");
7676 ut_ad(base_table->corrupted == TRUE);
7677 sane = false;
7678 continue;
7679 }
7680
7681 for (dict_index_t* aux_table_index =
7682 UT_LIST_GET_FIRST(aux_table->indexes);
7683 aux_table_index != NULL;
7684 aux_table_index =
7685 UT_LIST_GET_NEXT(indexes, aux_table_index)) {
7686
7687 /* Check if auxillary table needed for FTS is sane. */
7688 if (aux_table_index->page == FIL_NULL) {
7689 dict_set_corrupted(
7690 dict_table_get_first_index(base_table),
7691 trx, "FTS_SANITY_CHECK");
7692 ut_ad(base_table->corrupted == TRUE);
7693 sane = false;
7694 }
7695 }
7696
7697 dict_table_close(aux_table, FALSE, FALSE);
7698 }
7699}
7700