| 1 | /***************************************************************************** |
| 2 | |
| 3 | Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. |
| 4 | |
| 5 | This program is free software; you can redistribute it and/or modify it under |
| 6 | the terms of the GNU General Public License as published by the Free Software |
| 7 | Foundation; version 2 of the License. |
| 8 | |
| 9 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 10 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 11 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License along with |
| 14 | this program; if not, write to the Free Software Foundation, Inc., |
| 15 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
| 16 | |
| 17 | *****************************************************************************/ |
| 18 | |
| 19 | /******************************************************************//** |
| 20 | @file include/fts0ast.h |
| 21 | The FTS query parser (AST) abstract syntax tree routines |
| 22 | |
| 23 | Created 2007/03/16/03 Sunny Bains |
| 24 | *******************************************************/ |
| 25 | |
| 26 | #ifndef INNOBASE_FST0AST_H |
| 27 | #define INNOBASE_FST0AST_H |
| 28 | |
| 29 | #include "ha_prototypes.h" |
| 30 | #include "mem0mem.h" |
| 31 | |
| 32 | #ifdef UNIV_PFS_MEMORY |
| 33 | |
| 34 | #define malloc(A) ut_malloc_nokey(A) |
| 35 | #define free(A) ut_free(A) |
| 36 | #define realloc(P, A) ut_realloc(P, A) |
| 37 | |
| 38 | #endif /* UNIV_PFS_MEMORY */ |
| 39 | |
| 40 | /* The type of AST Node */ |
| 41 | enum fts_ast_type_t { |
| 42 | FTS_AST_OPER, /*!< Operator */ |
| 43 | FTS_AST_NUMB, /*!< Number */ |
| 44 | FTS_AST_TERM, /*!< Term (or word) */ |
| 45 | FTS_AST_TEXT, /*!< Text string */ |
| 46 | FTS_AST_PARSER_PHRASE_LIST, /*!< Phase for plugin parser |
| 47 | The difference from text type |
| 48 | is that we tokenize text into |
| 49 | term list */ |
| 50 | FTS_AST_LIST, /*!< Expression list */ |
| 51 | FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */ |
| 52 | }; |
| 53 | |
| 54 | /* The FTS query operators that we support */ |
| 55 | enum fts_ast_oper_t { |
| 56 | FTS_NONE, /*!< No operator */ |
| 57 | |
| 58 | FTS_IGNORE, /*!< Ignore rows that contain |
| 59 | this word */ |
| 60 | |
| 61 | FTS_EXIST, /*!< Include rows that contain |
| 62 | this word */ |
| 63 | |
| 64 | FTS_NEGATE, /*!< Include rows that contain |
| 65 | this word but rank them |
| 66 | lower*/ |
| 67 | |
| 68 | FTS_INCR_RATING, /*!< Increase the rank for this |
| 69 | word*/ |
| 70 | |
| 71 | FTS_DECR_RATING, /*!< Decrease the rank for this |
| 72 | word*/ |
| 73 | |
| 74 | FTS_DISTANCE, /*!< Proximity distance */ |
| 75 | FTS_IGNORE_SKIP, /*!< Transient node operator |
| 76 | signifies that this is a |
| 77 | FTS_IGNORE node, and ignored in |
| 78 | the first pass of |
| 79 | fts_ast_visit() */ |
| 80 | FTS_EXIST_SKIP /*!< Transient node operator |
| 81 | signifies that this ia a |
| 82 | FTS_EXIST node, and ignored in |
| 83 | the first pass of |
| 84 | fts_ast_visit() */ |
| 85 | }; |
| 86 | |
| 87 | /* Data types used by the FTS parser */ |
| 88 | struct fts_lexer_t; |
| 89 | struct fts_ast_node_t; |
| 90 | struct fts_ast_state_t; |
| 91 | struct fts_ast_string_t; |
| 92 | |
| 93 | typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*); |
| 94 | |
| 95 | /******************************************************************** |
| 96 | Parse the string using the lexer setup within state.*/ |
| 97 | int |
| 98 | fts_parse( |
| 99 | /*======*/ |
| 100 | /* out: 0 on OK, 1 on error */ |
| 101 | fts_ast_state_t* state); /*!< in: ast state instance.*/ |
| 102 | |
| 103 | /******************************************************************** |
| 104 | Create an AST operator node */ |
| 105 | extern |
| 106 | fts_ast_node_t* |
| 107 | fts_ast_create_node_oper( |
| 108 | /*=====================*/ |
| 109 | void* arg, /*!< in: ast state */ |
| 110 | fts_ast_oper_t oper); /*!< in: ast operator */ |
| 111 | /******************************************************************** |
| 112 | Create an AST term node, makes a copy of ptr */ |
| 113 | extern |
| 114 | fts_ast_node_t* |
| 115 | fts_ast_create_node_term( |
| 116 | /*=====================*/ |
| 117 | void* arg, /*!< in: ast state */ |
| 118 | const fts_ast_string_t* ptr); /*!< in: term string */ |
| 119 | /******************************************************************** |
| 120 | Create an AST text node */ |
| 121 | extern |
| 122 | fts_ast_node_t* |
| 123 | fts_ast_create_node_text( |
| 124 | /*=====================*/ |
| 125 | void* arg, /*!< in: ast state */ |
| 126 | const fts_ast_string_t* ptr); /*!< in: text string */ |
| 127 | /******************************************************************** |
| 128 | Create an AST expr list node */ |
| 129 | extern |
| 130 | fts_ast_node_t* |
| 131 | fts_ast_create_node_list( |
| 132 | /*=====================*/ |
| 133 | void* arg, /*!< in: ast state */ |
| 134 | fts_ast_node_t* expr); /*!< in: ast expr */ |
| 135 | /******************************************************************** |
| 136 | Create a sub-expression list node. This function takes ownership of |
| 137 | expr and is responsible for deleting it. */ |
| 138 | extern |
| 139 | fts_ast_node_t* |
| 140 | fts_ast_create_node_subexp_list( |
| 141 | /*============================*/ |
| 142 | /* out: new node */ |
| 143 | void* arg, /*!< in: ast state instance */ |
| 144 | fts_ast_node_t* expr); /*!< in: ast expr instance */ |
| 145 | /******************************************************************** |
| 146 | Set the wildcard attribute of a term.*/ |
| 147 | extern |
| 148 | void |
| 149 | fts_ast_term_set_wildcard( |
| 150 | /*======================*/ |
| 151 | fts_ast_node_t* node); /*!< in: term to change */ |
| 152 | /******************************************************************** |
| 153 | Set the proximity attribute of a text node. */ |
| 154 | void |
| 155 | fts_ast_text_set_distance( |
| 156 | /*======================*/ |
| 157 | fts_ast_node_t* node, /*!< in/out: text node */ |
| 158 | ulint distance); /*!< in: the text proximity |
| 159 | distance */ |
| 160 | /********************************************************************//** |
| 161 | Free a fts_ast_node_t instance. |
| 162 | @return next node to free */ |
| 163 | fts_ast_node_t* |
| 164 | fts_ast_free_node( |
| 165 | /*==============*/ |
| 166 | fts_ast_node_t* node); /*!< in: node to free */ |
| 167 | /******************************************************************** |
| 168 | Add a sub-expression to an AST*/ |
| 169 | extern |
| 170 | fts_ast_node_t* |
| 171 | fts_ast_add_node( |
| 172 | /*=============*/ |
| 173 | fts_ast_node_t* list, /*!< in: list node instance */ |
| 174 | fts_ast_node_t* node); /*!< in: (sub) expr to add */ |
| 175 | /******************************************************************** |
| 176 | Print the AST node recursively.*/ |
| 177 | extern |
| 178 | void |
| 179 | fts_ast_node_print( |
| 180 | /*===============*/ |
| 181 | fts_ast_node_t* node); /*!< in: ast node to print */ |
| 182 | /******************************************************************** |
| 183 | Free node and expr allocations.*/ |
| 184 | extern |
| 185 | void |
| 186 | fts_ast_state_free( |
| 187 | /*===============*/ |
| 188 | fts_ast_state_t*state); /*!< in: state instance |
| 189 | to free */ |
| 190 | /** Check only union operation involved in the node |
| 191 | @param[in] node ast node to check |
| 192 | @return true if the node contains only union else false. */ |
| 193 | bool |
| 194 | fts_ast_node_check_union( |
| 195 | fts_ast_node_t* node); |
| 196 | |
| 197 | /******************************************************************//** |
| 198 | Traverse the AST - in-order traversal. |
| 199 | @return DB_SUCCESS if all went well */ |
| 200 | dberr_t |
| 201 | fts_ast_visit( |
| 202 | /*==========*/ |
| 203 | fts_ast_oper_t oper, /*!< in: FTS operator */ |
| 204 | fts_ast_node_t* node, /*!< in: instance to traverse*/ |
| 205 | fts_ast_callback visitor, /*!< in: callback */ |
| 206 | void* arg, /*!< in: callback arg */ |
| 207 | bool* has_ignore) /*!< out: whether we encounter |
| 208 | and ignored processing an |
| 209 | operator, currently we only |
| 210 | ignore FTS_IGNORE operator */ |
| 211 | MY_ATTRIBUTE((nonnull, warn_unused_result)); |
| 212 | /******************************************************************** |
| 213 | Create a lex instance.*/ |
| 214 | fts_lexer_t* |
| 215 | fts_lexer_create( |
| 216 | /*=============*/ |
| 217 | ibool boolean_mode, /*!< in: query type */ |
| 218 | const byte* query, /*!< in: query string */ |
| 219 | ulint query_len) /*!< in: query string len */ |
| 220 | MY_ATTRIBUTE((nonnull, malloc, warn_unused_result)); |
| 221 | /******************************************************************** |
| 222 | Free an fts_lexer_t instance.*/ |
| 223 | void |
| 224 | fts_lexer_free( |
| 225 | /*===========*/ |
| 226 | fts_lexer_t* fts_lexer) /*!< in: lexer instance to |
| 227 | free */ |
| 228 | MY_ATTRIBUTE((nonnull)); |
| 229 | |
| 230 | /** |
| 231 | Create an ast string object, with NUL-terminator, so the string |
| 232 | has one more byte than len |
| 233 | @param[in] str pointer to string |
| 234 | @param[in] len length of the string |
| 235 | @return ast string with NUL-terminator */ |
| 236 | fts_ast_string_t* |
| 237 | fts_ast_string_create( |
| 238 | const byte* str, |
| 239 | ulint len); |
| 240 | |
| 241 | /** |
| 242 | Free an ast string instance |
| 243 | @param[in,out] ast_str string to free */ |
| 244 | void |
| 245 | fts_ast_string_free( |
| 246 | fts_ast_string_t* ast_str); |
| 247 | |
| 248 | /** |
| 249 | Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul |
| 250 | @param[in] str string to translate |
| 251 | @param[in] base the base |
| 252 | @return translated number */ |
| 253 | ulint |
| 254 | fts_ast_string_to_ul( |
| 255 | const fts_ast_string_t* ast_str, |
| 256 | int base); |
| 257 | |
| 258 | /* String of length len. |
| 259 | We always store the string of length len with a terminating '\0', |
| 260 | regardless of there is any 0x00 in the string itself */ |
| 261 | struct fts_ast_string_t { |
| 262 | /*!< Pointer to string. */ |
| 263 | byte* str; |
| 264 | |
| 265 | /*!< Length of the string. */ |
| 266 | ulint len; |
| 267 | }; |
| 268 | |
| 269 | /* Query term type */ |
| 270 | struct fts_ast_term_t { |
| 271 | fts_ast_string_t* ptr; /*!< Pointer to term string.*/ |
| 272 | ibool wildcard; /*!< TRUE if wild card set.*/ |
| 273 | }; |
| 274 | |
| 275 | /* Query text type */ |
| 276 | struct fts_ast_text_t { |
| 277 | fts_ast_string_t* ptr; /*!< Pointer to text string.*/ |
| 278 | ulint distance; /*!< > 0 if proximity distance |
| 279 | set */ |
| 280 | }; |
| 281 | |
| 282 | /* The list of nodes in an expr list */ |
| 283 | struct fts_ast_list_t { |
| 284 | fts_ast_node_t* head; /*!< Children list head */ |
| 285 | fts_ast_node_t* tail; /*!< Children list tail */ |
| 286 | }; |
| 287 | |
| 288 | /* FTS AST node to store the term, text, operator and sub-expressions.*/ |
| 289 | struct fts_ast_node_t { |
| 290 | fts_ast_type_t type; /*!< The type of node */ |
| 291 | fts_ast_text_t text; /*!< Text node */ |
| 292 | fts_ast_term_t term; /*!< Term node */ |
| 293 | fts_ast_oper_t oper; /*!< Operator value */ |
| 294 | fts_ast_list_t list; /*!< Expression list */ |
| 295 | fts_ast_node_t* next; /*!< Link for expr list */ |
| 296 | fts_ast_node_t* next_alloc; /*!< For tracking allocations */ |
| 297 | bool visited; /*!< whether this node is |
| 298 | already processed */ |
| 299 | /* Used by plugin parser */ |
| 300 | fts_ast_node_t* up_node; /*!< Direct up node */ |
| 301 | bool go_up; /*!< Flag if go one level up */ |
| 302 | }; |
| 303 | |
| 304 | /* To track state during parsing */ |
| 305 | struct fts_ast_state_t { |
| 306 | mem_heap_t* heap; /*!< Heap to use for alloc */ |
| 307 | fts_ast_node_t* root; /*!< If all goes OK, then this |
| 308 | will point to the root.*/ |
| 309 | |
| 310 | fts_ast_list_t list; /*!< List of nodes allocated */ |
| 311 | |
| 312 | fts_lexer_t* lexer; /*!< Lexer callback + arg */ |
| 313 | CHARSET_INFO* charset; /*!< charset used for |
| 314 | tokenization */ |
| 315 | /* Used by plugin parser */ |
| 316 | fts_ast_node_t* cur_node; /*!< Current node into which |
| 317 | we add new node */ |
| 318 | int depth; /*!< Depth of parsing state */ |
| 319 | }; |
| 320 | |
| 321 | /******************************************************************//** |
| 322 | Create an AST term node, makes a copy of ptr for plugin parser |
| 323 | @return node */ |
| 324 | extern |
| 325 | fts_ast_node_t* |
| 326 | fts_ast_create_node_term_for_parser( |
| 327 | /*==========i=====================*/ |
| 328 | void* arg, /*!< in: ast state */ |
| 329 | const char* ptr, /*!< in: term string */ |
| 330 | const ulint len); /*!< in: term string length */ |
| 331 | |
| 332 | /******************************************************************//** |
| 333 | Create an AST phrase list node for plugin parser |
| 334 | @return node */ |
| 335 | extern |
| 336 | fts_ast_node_t* |
| 337 | fts_ast_create_node_phrase_list( |
| 338 | /*============================*/ |
| 339 | void* arg); /*!< in: ast state */ |
| 340 | |
| 341 | #ifdef UNIV_DEBUG |
| 342 | const char* |
| 343 | fts_ast_node_type_get(fts_ast_type_t type); |
| 344 | #endif /* UNIV_DEBUG */ |
| 345 | |
| 346 | #endif /* INNOBASE_FSTS0AST_H */ |
| 347 | |