1/*****************************************************************************
2
3Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2015, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/**************************************************//**
21@file include/row0ftsort.h
22Create Full Text Index with (parallel) merge sort
23
24Created 10/13/2010 Jimmy Yang
25*******************************************************/
26
27#ifndef row0ftsort_h
28#define row0ftsort_h
29
30#include "univ.i"
31#include "data0data.h"
32#include "dict0types.h"
33#include "row0mysql.h"
34#include "fts0fts.h"
35#include "fts0types.h"
36#include "fts0priv.h"
37#include "row0merge.h"
38#include "btr0bulk.h"
39#include "os0thread.h"
40
41/** This structure defineds information the scan thread will fetch
42and put to the linked list for parallel tokenization/sort threads
43to process */
44typedef struct fts_doc_item fts_doc_item_t;
45
46/** Information about temporary files used in merge sort */
47struct fts_doc_item {
48 dfield_t* field; /*!< field contains document string */
49 doc_id_t doc_id; /*!< document ID */
50 UT_LIST_NODE_T(fts_doc_item_t) doc_list;
51 /*!< list of doc items */
52};
53
54/** This defines the list type that scan thread would feed the parallel
55tokenization threads and sort threads. */
56typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t;
57
58#define FTS_PLL_MERGE 1
59
60/** Sort information passed to each individual parallel sort thread */
61struct fts_psort_t;
62
63/** Common info passed to each parallel sort thread */
64struct fts_psort_common_t {
65 row_merge_dup_t* dup; /*!< descriptor of FTS index */
66 dict_table_t* new_table; /*!< source table */
67 trx_t* trx; /*!< transaction */
68 fts_psort_t* all_info; /*!< all parallel sort info */
69 os_event_t sort_event; /*!< sort event */
70 os_event_t merge_event; /*!< merge event */
71 ibool opt_doc_id_size;/*!< whether to use 4 bytes
72 instead of 8 bytes integer to
73 store Doc ID during sort, if
74 Doc ID will not be big enough
75 to use 8 bytes value */
76};
77
78struct fts_psort_t {
79 ulint psort_id; /*!< Parallel sort ID */
80 row_merge_buf_t* merge_buf[FTS_NUM_AUX_INDEX];
81 /*!< sort buffer */
82 merge_file_t* merge_file[FTS_NUM_AUX_INDEX];
83 /*!< sort file */
84 row_merge_block_t* merge_block[FTS_NUM_AUX_INDEX];
85 /*!< buffer to write to file */
86 row_merge_block_t* block_alloc[FTS_NUM_AUX_INDEX];
87 /*!< buffer to allocated */
88 row_merge_block_t* crypt_block[FTS_NUM_AUX_INDEX];
89 /*!< buffer to crypt data */
90 row_merge_block_t* crypt_alloc[FTS_NUM_AUX_INDEX];
91 /*!< buffer to allocated */
92 ulint child_status; /*!< child thread status */
93 ulint state; /*!< parent thread state */
94 fts_doc_list_t fts_doc_list; /*!< doc list to process */
95 fts_psort_common_t* psort_common; /*!< ptr to all psort info */
96 os_thread_t thread_hdl; /*!< thread handler */
97 dberr_t error; /*!< db error during psort */
98 ulint memory_used; /*!< memory used by fts_doc_list */
99 ib_mutex_t mutex; /*!< mutex for fts_doc_list */
100};
101
102/** Row fts token for plugin parser */
103struct row_fts_token_t {
104 fts_string_t* text; /*!< token */
105 UT_LIST_NODE_T(row_fts_token_t)
106 token_list; /*!< next token link */
107};
108
109typedef UT_LIST_BASE_NODE_T(row_fts_token_t) fts_token_list_t;
110
111/** Structure stores information from string tokenization operation */
112struct fts_tokenize_ctx {
113 ulint processed_len; /*!< processed string length */
114 ulint init_pos; /*!< doc start position */
115 ulint buf_used; /*!< the sort buffer (ID) when
116 tokenization stops, which
117 could due to sort buffer full */
118 ulint rows_added[FTS_NUM_AUX_INDEX];
119 /*!< number of rows added for
120 each FTS index partition */
121 ib_rbt_t* cached_stopword;/*!< in: stopword list */
122 dfield_t sort_field[FTS_NUM_FIELDS_SORT];
123 /*!< in: sort field */
124 fts_token_list_t fts_token_list;
125};
126
127typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
128
129/** Structure stores information needed for the insertion phase of FTS
130parallel sort. */
131struct fts_psort_insert {
132 CHARSET_INFO* charset; /*!< charset info */
133 mem_heap_t* heap; /*!< heap */
134 ibool opt_doc_id_size;/*!< Whether to use smaller (4 bytes)
135 integer for Doc ID */
136 BtrBulk* btr_bulk; /*!< Bulk load instance */
137 dtuple_t* tuple; /*!< Tuple to insert */
138
139#ifdef UNIV_DEBUG
140 ulint aux_index_id; /*!< Auxiliary index id */
141#endif
142};
143
144typedef struct fts_psort_insert fts_psort_insert_t;
145
146
147/** status bit used for communication between parent and child thread */
148#define FTS_PARENT_COMPLETE 1
149#define FTS_PARENT_EXITING 2
150#define FTS_CHILD_COMPLETE 1
151#define FTS_CHILD_EXITING 2
152
153/** Print some debug information */
154#define FTSORT_PRINT
155
156#ifdef FTSORT_PRINT
157#define DEBUG_FTS_SORT_PRINT(str) \
158 do { \
159 ut_print_timestamp(stderr); \
160 fprintf(stderr, str); \
161 } while (0)
162#else
163#define DEBUG_FTS_SORT_PRINT(str)
164#endif /* FTSORT_PRINT */
165
166/*************************************************************//**
167Create a temporary "fts sort index" used to merge sort the
168tokenized doc string. The index has three "fields":
169
1701) Tokenized word,
1712) Doc ID
1723) Word's position in original 'doc'.
173
174@return dict_index_t structure for the fts sort index */
175dict_index_t*
176row_merge_create_fts_sort_index(
177/*============================*/
178 dict_index_t* index, /*!< in: Original FTS index
179 based on which this sort index
180 is created */
181 dict_table_t* table, /*!< in,out: table that FTS index
182 is being created on */
183 ibool* opt_doc_id_size);
184 /*!< out: whether to use 4 bytes
185 instead of 8 bytes integer to
186 store Doc ID during sort */
187
188/********************************************************************//**
189Initialize FTS parallel sort structures.
190@return TRUE if all successful */
191ibool
192row_fts_psort_info_init(
193/*====================*/
194 trx_t* trx, /*!< in: transaction */
195 row_merge_dup_t* dup, /*!< in,own: descriptor of
196 FTS index being created */
197 const dict_table_t* new_table,/*!< in: table where indexes are
198 created */
199 ibool opt_doc_id_size,
200 /*!< in: whether to use 4 bytes
201 instead of 8 bytes integer to
202 store Doc ID during sort */
203 fts_psort_t** psort, /*!< out: parallel sort info to be
204 instantiated */
205 fts_psort_t** merge) /*!< out: parallel merge info
206 to be instantiated */
207 MY_ATTRIBUTE((nonnull));
208/********************************************************************//**
209Clean up and deallocate FTS parallel sort structures, and close
210temparary merge sort files */
211void
212row_fts_psort_info_destroy(
213/*=======================*/
214 fts_psort_t* psort_info, /*!< parallel sort info */
215 fts_psort_t* merge_info); /*!< parallel merge info */
216/********************************************************************//**
217Free up merge buffers when merge sort is done */
218void
219row_fts_free_pll_merge_buf(
220/*=======================*/
221 fts_psort_t* psort_info); /*!< in: parallel sort info */
222
223/*********************************************************************//**
224Start the parallel tokenization and parallel merge sort */
225void
226row_fts_start_psort(
227/*================*/
228 fts_psort_t* psort_info); /*!< in: parallel sort info */
229/*********************************************************************//**
230Kick off the parallel merge and insert thread */
231void
232row_fts_start_parallel_merge(
233/*=========================*/
234 fts_psort_t* merge_info); /*!< in: parallel sort info */
235/********************************************************************//**
236Propagate a newly added record up one level in the selection tree
237@return parent where this value propagated to */
238int
239row_merge_fts_sel_propagate(
240/*========================*/
241 int propogated, /*<! in: tree node propagated */
242 int* sel_tree, /*<! in: selection tree */
243 ulint level, /*<! in: selection tree level */
244 const mrec_t** mrec, /*<! in: sort record */
245 ulint** offsets, /*<! in: record offsets */
246 dict_index_t* index); /*<! in: FTS index */
247/********************************************************************//**
248Read sorted file containing index data tuples and insert these data
249tuples to the index
250@return DB_SUCCESS or error number */
251dberr_t
252row_fts_merge_insert(
253/*=================*/
254 dict_index_t* index, /*!< in: index */
255 dict_table_t* table, /*!< in: new table */
256 fts_psort_t* psort_info, /*!< parallel sort info */
257 ulint id) /* !< in: which auxiliary table's data
258 to insert to */
259 MY_ATTRIBUTE((nonnull));
260#endif /* row0ftsort_h */
261