1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. |
4 | |
5 | This program is free software; you can redistribute it and/or modify it under |
6 | the terms of the GNU General Public License as published by the Free Software |
7 | Foundation; version 2 of the License. |
8 | |
9 | This program is distributed in the hope that it will be useful, but WITHOUT |
10 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
11 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License along with |
14 | this program; if not, write to the Free Software Foundation, Inc., |
15 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
16 | |
17 | *****************************************************************************/ |
18 | |
19 | /******************************************************************//** |
20 | @file include/fts0ast.h |
21 | The FTS query parser (AST) abstract syntax tree routines |
22 | |
23 | Created 2007/03/16/03 Sunny Bains |
24 | *******************************************************/ |
25 | |
26 | #ifndef INNOBASE_FST0AST_H |
27 | #define INNOBASE_FST0AST_H |
28 | |
29 | #include "ha_prototypes.h" |
30 | #include "mem0mem.h" |
31 | |
32 | #ifdef UNIV_PFS_MEMORY |
33 | |
34 | #define malloc(A) ut_malloc_nokey(A) |
35 | #define free(A) ut_free(A) |
36 | #define realloc(P, A) ut_realloc(P, A) |
37 | |
38 | #endif /* UNIV_PFS_MEMORY */ |
39 | |
40 | /* The type of AST Node */ |
41 | enum fts_ast_type_t { |
42 | FTS_AST_OPER, /*!< Operator */ |
43 | FTS_AST_NUMB, /*!< Number */ |
44 | FTS_AST_TERM, /*!< Term (or word) */ |
45 | FTS_AST_TEXT, /*!< Text string */ |
46 | FTS_AST_PARSER_PHRASE_LIST, /*!< Phase for plugin parser |
47 | The difference from text type |
48 | is that we tokenize text into |
49 | term list */ |
50 | FTS_AST_LIST, /*!< Expression list */ |
51 | FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */ |
52 | }; |
53 | |
54 | /* The FTS query operators that we support */ |
55 | enum fts_ast_oper_t { |
56 | FTS_NONE, /*!< No operator */ |
57 | |
58 | FTS_IGNORE, /*!< Ignore rows that contain |
59 | this word */ |
60 | |
61 | FTS_EXIST, /*!< Include rows that contain |
62 | this word */ |
63 | |
64 | FTS_NEGATE, /*!< Include rows that contain |
65 | this word but rank them |
66 | lower*/ |
67 | |
68 | FTS_INCR_RATING, /*!< Increase the rank for this |
69 | word*/ |
70 | |
71 | FTS_DECR_RATING, /*!< Decrease the rank for this |
72 | word*/ |
73 | |
74 | FTS_DISTANCE, /*!< Proximity distance */ |
75 | FTS_IGNORE_SKIP, /*!< Transient node operator |
76 | signifies that this is a |
77 | FTS_IGNORE node, and ignored in |
78 | the first pass of |
79 | fts_ast_visit() */ |
80 | FTS_EXIST_SKIP /*!< Transient node operator |
81 | signifies that this ia a |
82 | FTS_EXIST node, and ignored in |
83 | the first pass of |
84 | fts_ast_visit() */ |
85 | }; |
86 | |
87 | /* Data types used by the FTS parser */ |
88 | struct fts_lexer_t; |
89 | struct fts_ast_node_t; |
90 | struct fts_ast_state_t; |
91 | struct fts_ast_string_t; |
92 | |
93 | typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*); |
94 | |
95 | /******************************************************************** |
96 | Parse the string using the lexer setup within state.*/ |
97 | int |
98 | fts_parse( |
99 | /*======*/ |
100 | /* out: 0 on OK, 1 on error */ |
101 | fts_ast_state_t* state); /*!< in: ast state instance.*/ |
102 | |
103 | /******************************************************************** |
104 | Create an AST operator node */ |
105 | extern |
106 | fts_ast_node_t* |
107 | fts_ast_create_node_oper( |
108 | /*=====================*/ |
109 | void* arg, /*!< in: ast state */ |
110 | fts_ast_oper_t oper); /*!< in: ast operator */ |
111 | /******************************************************************** |
112 | Create an AST term node, makes a copy of ptr */ |
113 | extern |
114 | fts_ast_node_t* |
115 | fts_ast_create_node_term( |
116 | /*=====================*/ |
117 | void* arg, /*!< in: ast state */ |
118 | const fts_ast_string_t* ptr); /*!< in: term string */ |
119 | /******************************************************************** |
120 | Create an AST text node */ |
121 | extern |
122 | fts_ast_node_t* |
123 | fts_ast_create_node_text( |
124 | /*=====================*/ |
125 | void* arg, /*!< in: ast state */ |
126 | const fts_ast_string_t* ptr); /*!< in: text string */ |
127 | /******************************************************************** |
128 | Create an AST expr list node */ |
129 | extern |
130 | fts_ast_node_t* |
131 | fts_ast_create_node_list( |
132 | /*=====================*/ |
133 | void* arg, /*!< in: ast state */ |
134 | fts_ast_node_t* expr); /*!< in: ast expr */ |
135 | /******************************************************************** |
136 | Create a sub-expression list node. This function takes ownership of |
137 | expr and is responsible for deleting it. */ |
138 | extern |
139 | fts_ast_node_t* |
140 | fts_ast_create_node_subexp_list( |
141 | /*============================*/ |
142 | /* out: new node */ |
143 | void* arg, /*!< in: ast state instance */ |
144 | fts_ast_node_t* expr); /*!< in: ast expr instance */ |
145 | /******************************************************************** |
146 | Set the wildcard attribute of a term.*/ |
147 | extern |
148 | void |
149 | fts_ast_term_set_wildcard( |
150 | /*======================*/ |
151 | fts_ast_node_t* node); /*!< in: term to change */ |
152 | /******************************************************************** |
153 | Set the proximity attribute of a text node. */ |
154 | void |
155 | fts_ast_text_set_distance( |
156 | /*======================*/ |
157 | fts_ast_node_t* node, /*!< in/out: text node */ |
158 | ulint distance); /*!< in: the text proximity |
159 | distance */ |
160 | /********************************************************************//** |
161 | Free a fts_ast_node_t instance. |
162 | @return next node to free */ |
163 | fts_ast_node_t* |
164 | fts_ast_free_node( |
165 | /*==============*/ |
166 | fts_ast_node_t* node); /*!< in: node to free */ |
167 | /******************************************************************** |
168 | Add a sub-expression to an AST*/ |
169 | extern |
170 | fts_ast_node_t* |
171 | fts_ast_add_node( |
172 | /*=============*/ |
173 | fts_ast_node_t* list, /*!< in: list node instance */ |
174 | fts_ast_node_t* node); /*!< in: (sub) expr to add */ |
175 | /******************************************************************** |
176 | Print the AST node recursively.*/ |
177 | extern |
178 | void |
179 | fts_ast_node_print( |
180 | /*===============*/ |
181 | fts_ast_node_t* node); /*!< in: ast node to print */ |
182 | /******************************************************************** |
183 | Free node and expr allocations.*/ |
184 | extern |
185 | void |
186 | fts_ast_state_free( |
187 | /*===============*/ |
188 | fts_ast_state_t*state); /*!< in: state instance |
189 | to free */ |
190 | /** Check only union operation involved in the node |
191 | @param[in] node ast node to check |
192 | @return true if the node contains only union else false. */ |
193 | bool |
194 | fts_ast_node_check_union( |
195 | fts_ast_node_t* node); |
196 | |
197 | /******************************************************************//** |
198 | Traverse the AST - in-order traversal. |
199 | @return DB_SUCCESS if all went well */ |
200 | dberr_t |
201 | fts_ast_visit( |
202 | /*==========*/ |
203 | fts_ast_oper_t oper, /*!< in: FTS operator */ |
204 | fts_ast_node_t* node, /*!< in: instance to traverse*/ |
205 | fts_ast_callback visitor, /*!< in: callback */ |
206 | void* arg, /*!< in: callback arg */ |
207 | bool* has_ignore) /*!< out: whether we encounter |
208 | and ignored processing an |
209 | operator, currently we only |
210 | ignore FTS_IGNORE operator */ |
211 | MY_ATTRIBUTE((nonnull, warn_unused_result)); |
212 | /******************************************************************** |
213 | Create a lex instance.*/ |
214 | fts_lexer_t* |
215 | fts_lexer_create( |
216 | /*=============*/ |
217 | ibool boolean_mode, /*!< in: query type */ |
218 | const byte* query, /*!< in: query string */ |
219 | ulint query_len) /*!< in: query string len */ |
220 | MY_ATTRIBUTE((nonnull, malloc, warn_unused_result)); |
221 | /******************************************************************** |
222 | Free an fts_lexer_t instance.*/ |
223 | void |
224 | fts_lexer_free( |
225 | /*===========*/ |
226 | fts_lexer_t* fts_lexer) /*!< in: lexer instance to |
227 | free */ |
228 | MY_ATTRIBUTE((nonnull)); |
229 | |
230 | /** |
231 | Create an ast string object, with NUL-terminator, so the string |
232 | has one more byte than len |
233 | @param[in] str pointer to string |
234 | @param[in] len length of the string |
235 | @return ast string with NUL-terminator */ |
236 | fts_ast_string_t* |
237 | fts_ast_string_create( |
238 | const byte* str, |
239 | ulint len); |
240 | |
241 | /** |
242 | Free an ast string instance |
243 | @param[in,out] ast_str string to free */ |
244 | void |
245 | fts_ast_string_free( |
246 | fts_ast_string_t* ast_str); |
247 | |
248 | /** |
249 | Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul |
250 | @param[in] str string to translate |
251 | @param[in] base the base |
252 | @return translated number */ |
253 | ulint |
254 | fts_ast_string_to_ul( |
255 | const fts_ast_string_t* ast_str, |
256 | int base); |
257 | |
258 | /* String of length len. |
259 | We always store the string of length len with a terminating '\0', |
260 | regardless of there is any 0x00 in the string itself */ |
261 | struct fts_ast_string_t { |
262 | /*!< Pointer to string. */ |
263 | byte* str; |
264 | |
265 | /*!< Length of the string. */ |
266 | ulint len; |
267 | }; |
268 | |
269 | /* Query term type */ |
270 | struct fts_ast_term_t { |
271 | fts_ast_string_t* ptr; /*!< Pointer to term string.*/ |
272 | ibool wildcard; /*!< TRUE if wild card set.*/ |
273 | }; |
274 | |
275 | /* Query text type */ |
276 | struct fts_ast_text_t { |
277 | fts_ast_string_t* ptr; /*!< Pointer to text string.*/ |
278 | ulint distance; /*!< > 0 if proximity distance |
279 | set */ |
280 | }; |
281 | |
282 | /* The list of nodes in an expr list */ |
283 | struct fts_ast_list_t { |
284 | fts_ast_node_t* head; /*!< Children list head */ |
285 | fts_ast_node_t* tail; /*!< Children list tail */ |
286 | }; |
287 | |
288 | /* FTS AST node to store the term, text, operator and sub-expressions.*/ |
289 | struct fts_ast_node_t { |
290 | fts_ast_type_t type; /*!< The type of node */ |
291 | fts_ast_text_t text; /*!< Text node */ |
292 | fts_ast_term_t term; /*!< Term node */ |
293 | fts_ast_oper_t oper; /*!< Operator value */ |
294 | fts_ast_list_t list; /*!< Expression list */ |
295 | fts_ast_node_t* next; /*!< Link for expr list */ |
296 | fts_ast_node_t* next_alloc; /*!< For tracking allocations */ |
297 | bool visited; /*!< whether this node is |
298 | already processed */ |
299 | /* Used by plugin parser */ |
300 | fts_ast_node_t* up_node; /*!< Direct up node */ |
301 | bool go_up; /*!< Flag if go one level up */ |
302 | }; |
303 | |
304 | /* To track state during parsing */ |
305 | struct fts_ast_state_t { |
306 | mem_heap_t* heap; /*!< Heap to use for alloc */ |
307 | fts_ast_node_t* root; /*!< If all goes OK, then this |
308 | will point to the root.*/ |
309 | |
310 | fts_ast_list_t list; /*!< List of nodes allocated */ |
311 | |
312 | fts_lexer_t* lexer; /*!< Lexer callback + arg */ |
313 | CHARSET_INFO* charset; /*!< charset used for |
314 | tokenization */ |
315 | /* Used by plugin parser */ |
316 | fts_ast_node_t* cur_node; /*!< Current node into which |
317 | we add new node */ |
318 | int depth; /*!< Depth of parsing state */ |
319 | }; |
320 | |
321 | /******************************************************************//** |
322 | Create an AST term node, makes a copy of ptr for plugin parser |
323 | @return node */ |
324 | extern |
325 | fts_ast_node_t* |
326 | fts_ast_create_node_term_for_parser( |
327 | /*==========i=====================*/ |
328 | void* arg, /*!< in: ast state */ |
329 | const char* ptr, /*!< in: term string */ |
330 | const ulint len); /*!< in: term string length */ |
331 | |
332 | /******************************************************************//** |
333 | Create an AST phrase list node for plugin parser |
334 | @return node */ |
335 | extern |
336 | fts_ast_node_t* |
337 | fts_ast_create_node_phrase_list( |
338 | /*============================*/ |
339 | void* arg); /*!< in: ast state */ |
340 | |
341 | #ifdef UNIV_DEBUG |
342 | const char* |
343 | fts_ast_node_type_get(fts_ast_type_t type); |
344 | #endif /* UNIV_DEBUG */ |
345 | |
346 | #endif /* INNOBASE_FSTS0AST_H */ |
347 | |