| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. |
| 4 | Copyright (c) 2015, 2018, MariaDB Corporation. |
| 5 | |
| 6 | This program is free software; you can redistribute it and/or modify it under |
| 7 | the terms of the GNU General Public License as published by the Free Software |
| 8 | Foundation; version 2 of the License. |
| 9 | |
| 10 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU General Public License along with |
| 15 | this program; if not, write to the Free Software Foundation, Inc., |
| 16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 17 | |
| 18 | *****************************************************************************/ |
| 19 | |
| 20 | /**************************************************//** |
| 21 | @file include/row0ftsort.h |
| 22 | Create Full Text Index with (parallel) merge sort |
| 23 | |
| 24 | Created 10/13/2010 Jimmy Yang |
| 25 | *******************************************************/ |
| 26 | |
| 27 | #ifndef row0ftsort_h |
| 28 | #define row0ftsort_h |
| 29 | |
| 30 | #include "univ.i" |
| 31 | #include "data0data.h" |
| 32 | #include "dict0types.h" |
| 33 | #include "row0mysql.h" |
| 34 | #include "fts0fts.h" |
| 35 | #include "fts0types.h" |
| 36 | #include "fts0priv.h" |
| 37 | #include "row0merge.h" |
| 38 | #include "btr0bulk.h" |
| 39 | #include "os0thread.h" |
| 40 | |
| 41 | /** This structure defineds information the scan thread will fetch |
| 42 | and put to the linked list for parallel tokenization/sort threads |
| 43 | to process */ |
| 44 | typedef struct fts_doc_item fts_doc_item_t; |
| 45 | |
| 46 | /** Information about temporary files used in merge sort */ |
| 47 | struct fts_doc_item { |
| 48 | dfield_t* field; /*!< field contains document string */ |
| 49 | doc_id_t doc_id; /*!< document ID */ |
| 50 | UT_LIST_NODE_T(fts_doc_item_t) doc_list; |
| 51 | /*!< list of doc items */ |
| 52 | }; |
| 53 | |
| 54 | /** This defines the list type that scan thread would feed the parallel |
| 55 | tokenization threads and sort threads. */ |
| 56 | typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t; |
| 57 | |
| 58 | #define FTS_PLL_MERGE 1 |
| 59 | |
| 60 | /** Sort information passed to each individual parallel sort thread */ |
| 61 | struct fts_psort_t; |
| 62 | |
| 63 | /** Common info passed to each parallel sort thread */ |
| 64 | struct fts_psort_common_t { |
| 65 | row_merge_dup_t* dup; /*!< descriptor of FTS index */ |
| 66 | dict_table_t* new_table; /*!< source table */ |
| 67 | trx_t* trx; /*!< transaction */ |
| 68 | fts_psort_t* all_info; /*!< all parallel sort info */ |
| 69 | os_event_t sort_event; /*!< sort event */ |
| 70 | os_event_t merge_event; /*!< merge event */ |
| 71 | ibool opt_doc_id_size;/*!< whether to use 4 bytes |
| 72 | instead of 8 bytes integer to |
| 73 | store Doc ID during sort, if |
| 74 | Doc ID will not be big enough |
| 75 | to use 8 bytes value */ |
| 76 | }; |
| 77 | |
| 78 | struct fts_psort_t { |
| 79 | ulint psort_id; /*!< Parallel sort ID */ |
| 80 | row_merge_buf_t* merge_buf[FTS_NUM_AUX_INDEX]; |
| 81 | /*!< sort buffer */ |
| 82 | merge_file_t* merge_file[FTS_NUM_AUX_INDEX]; |
| 83 | /*!< sort file */ |
| 84 | row_merge_block_t* merge_block[FTS_NUM_AUX_INDEX]; |
| 85 | /*!< buffer to write to file */ |
| 86 | row_merge_block_t* block_alloc[FTS_NUM_AUX_INDEX]; |
| 87 | /*!< buffer to allocated */ |
| 88 | row_merge_block_t* crypt_block[FTS_NUM_AUX_INDEX]; |
| 89 | /*!< buffer to crypt data */ |
| 90 | row_merge_block_t* crypt_alloc[FTS_NUM_AUX_INDEX]; |
| 91 | /*!< buffer to allocated */ |
| 92 | ulint child_status; /*!< child thread status */ |
| 93 | ulint state; /*!< parent thread state */ |
| 94 | fts_doc_list_t fts_doc_list; /*!< doc list to process */ |
| 95 | fts_psort_common_t* psort_common; /*!< ptr to all psort info */ |
| 96 | os_thread_t thread_hdl; /*!< thread handler */ |
| 97 | dberr_t error; /*!< db error during psort */ |
| 98 | ulint memory_used; /*!< memory used by fts_doc_list */ |
| 99 | ib_mutex_t mutex; /*!< mutex for fts_doc_list */ |
| 100 | }; |
| 101 | |
| 102 | /** Row fts token for plugin parser */ |
| 103 | struct row_fts_token_t { |
| 104 | fts_string_t* text; /*!< token */ |
| 105 | UT_LIST_NODE_T(row_fts_token_t) |
| 106 | token_list; /*!< next token link */ |
| 107 | }; |
| 108 | |
| 109 | typedef UT_LIST_BASE_NODE_T(row_fts_token_t) fts_token_list_t; |
| 110 | |
| 111 | /** Structure stores information from string tokenization operation */ |
| 112 | struct fts_tokenize_ctx { |
| 113 | ulint processed_len; /*!< processed string length */ |
| 114 | ulint init_pos; /*!< doc start position */ |
| 115 | ulint buf_used; /*!< the sort buffer (ID) when |
| 116 | tokenization stops, which |
| 117 | could due to sort buffer full */ |
| 118 | ulint rows_added[FTS_NUM_AUX_INDEX]; |
| 119 | /*!< number of rows added for |
| 120 | each FTS index partition */ |
| 121 | ib_rbt_t* cached_stopword;/*!< in: stopword list */ |
| 122 | dfield_t sort_field[FTS_NUM_FIELDS_SORT]; |
| 123 | /*!< in: sort field */ |
| 124 | fts_token_list_t fts_token_list; |
| 125 | }; |
| 126 | |
| 127 | typedef struct fts_tokenize_ctx fts_tokenize_ctx_t; |
| 128 | |
| 129 | /** Structure stores information needed for the insertion phase of FTS |
| 130 | parallel sort. */ |
| 131 | struct fts_psort_insert { |
| 132 | CHARSET_INFO* charset; /*!< charset info */ |
| 133 | mem_heap_t* heap; /*!< heap */ |
| 134 | ibool opt_doc_id_size;/*!< Whether to use smaller (4 bytes) |
| 135 | integer for Doc ID */ |
| 136 | BtrBulk* btr_bulk; /*!< Bulk load instance */ |
| 137 | dtuple_t* tuple; /*!< Tuple to insert */ |
| 138 | |
| 139 | #ifdef UNIV_DEBUG |
| 140 | ulint aux_index_id; /*!< Auxiliary index id */ |
| 141 | #endif |
| 142 | }; |
| 143 | |
| 144 | typedef struct fts_psort_insert fts_psort_insert_t; |
| 145 | |
| 146 | |
| 147 | /** status bit used for communication between parent and child thread */ |
| 148 | #define FTS_PARENT_COMPLETE 1 |
| 149 | #define FTS_PARENT_EXITING 2 |
| 150 | #define FTS_CHILD_COMPLETE 1 |
| 151 | #define FTS_CHILD_EXITING 2 |
| 152 | |
| 153 | /** Print some debug information */ |
| 154 | #define FTSORT_PRINT |
| 155 | |
| 156 | #ifdef FTSORT_PRINT |
| 157 | #define DEBUG_FTS_SORT_PRINT(str) \ |
| 158 | do { \ |
| 159 | ut_print_timestamp(stderr); \ |
| 160 | fprintf(stderr, str); \ |
| 161 | } while (0) |
| 162 | #else |
| 163 | #define DEBUG_FTS_SORT_PRINT(str) |
| 164 | #endif /* FTSORT_PRINT */ |
| 165 | |
| 166 | /*************************************************************//** |
| 167 | Create a temporary "fts sort index" used to merge sort the |
| 168 | tokenized doc string. The index has three "fields": |
| 169 | |
| 170 | 1) Tokenized word, |
| 171 | 2) Doc ID |
| 172 | 3) Word's position in original 'doc'. |
| 173 | |
| 174 | @return dict_index_t structure for the fts sort index */ |
| 175 | dict_index_t* |
| 176 | row_merge_create_fts_sort_index( |
| 177 | /*============================*/ |
| 178 | dict_index_t* index, /*!< in: Original FTS index |
| 179 | based on which this sort index |
| 180 | is created */ |
| 181 | dict_table_t* table, /*!< in,out: table that FTS index |
| 182 | is being created on */ |
| 183 | ibool* opt_doc_id_size); |
| 184 | /*!< out: whether to use 4 bytes |
| 185 | instead of 8 bytes integer to |
| 186 | store Doc ID during sort */ |
| 187 | |
| 188 | /********************************************************************//** |
| 189 | Initialize FTS parallel sort structures. |
| 190 | @return TRUE if all successful */ |
| 191 | ibool |
| 192 | row_fts_psort_info_init( |
| 193 | /*====================*/ |
| 194 | trx_t* trx, /*!< in: transaction */ |
| 195 | row_merge_dup_t* dup, /*!< in,own: descriptor of |
| 196 | FTS index being created */ |
| 197 | const dict_table_t* new_table,/*!< in: table where indexes are |
| 198 | created */ |
| 199 | ibool opt_doc_id_size, |
| 200 | /*!< in: whether to use 4 bytes |
| 201 | instead of 8 bytes integer to |
| 202 | store Doc ID during sort */ |
| 203 | fts_psort_t** psort, /*!< out: parallel sort info to be |
| 204 | instantiated */ |
| 205 | fts_psort_t** merge) /*!< out: parallel merge info |
| 206 | to be instantiated */ |
| 207 | MY_ATTRIBUTE((nonnull)); |
| 208 | /********************************************************************//** |
| 209 | Clean up and deallocate FTS parallel sort structures, and close |
| 210 | temparary merge sort files */ |
| 211 | void |
| 212 | row_fts_psort_info_destroy( |
| 213 | /*=======================*/ |
| 214 | fts_psort_t* psort_info, /*!< parallel sort info */ |
| 215 | fts_psort_t* merge_info); /*!< parallel merge info */ |
| 216 | /********************************************************************//** |
| 217 | Free up merge buffers when merge sort is done */ |
| 218 | void |
| 219 | row_fts_free_pll_merge_buf( |
| 220 | /*=======================*/ |
| 221 | fts_psort_t* psort_info); /*!< in: parallel sort info */ |
| 222 | |
| 223 | /*********************************************************************//** |
| 224 | Start the parallel tokenization and parallel merge sort */ |
| 225 | void |
| 226 | row_fts_start_psort( |
| 227 | /*================*/ |
| 228 | fts_psort_t* psort_info); /*!< in: parallel sort info */ |
| 229 | /*********************************************************************//** |
| 230 | Kick off the parallel merge and insert thread */ |
| 231 | void |
| 232 | row_fts_start_parallel_merge( |
| 233 | /*=========================*/ |
| 234 | fts_psort_t* merge_info); /*!< in: parallel sort info */ |
| 235 | /********************************************************************//** |
| 236 | Propagate a newly added record up one level in the selection tree |
| 237 | @return parent where this value propagated to */ |
| 238 | int |
| 239 | row_merge_fts_sel_propagate( |
| 240 | /*========================*/ |
| 241 | int propogated, /*<! in: tree node propagated */ |
| 242 | int* sel_tree, /*<! in: selection tree */ |
| 243 | ulint level, /*<! in: selection tree level */ |
| 244 | const mrec_t** mrec, /*<! in: sort record */ |
| 245 | ulint** offsets, /*<! in: record offsets */ |
| 246 | dict_index_t* index); /*<! in: FTS index */ |
| 247 | /********************************************************************//** |
| 248 | Read sorted file containing index data tuples and insert these data |
| 249 | tuples to the index |
| 250 | @return DB_SUCCESS or error number */ |
| 251 | dberr_t |
| 252 | row_fts_merge_insert( |
| 253 | /*=================*/ |
| 254 | dict_index_t* index, /*!< in: index */ |
| 255 | dict_table_t* table, /*!< in: new table */ |
| 256 | fts_psort_t* psort_info, /*!< parallel sort info */ |
| 257 | ulint id) /* !< in: which auxiliary table's data |
| 258 | to insert to */ |
| 259 | MY_ATTRIBUTE((nonnull)); |
| 260 | #endif /* row0ftsort_h */ |
| 261 | |