1/*****************************************************************************
2
3Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free Software
7Foundation; version 2 of the License.
8
9This program is distributed in the hope that it will be useful, but WITHOUT
10ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
13You should have received a copy of the GNU General Public License along with
14this program; if not, write to the Free Software Foundation, Inc.,
1551 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
16
17*****************************************************************************/
18
19/******************************************************************//**
20@file include/fts0ast.h
21The FTS query parser (AST) abstract syntax tree routines
22
23Created 2007/03/16/03 Sunny Bains
24*******************************************************/
25
26#ifndef INNOBASE_FST0AST_H
27#define INNOBASE_FST0AST_H
28
29#include "ha_prototypes.h"
30#include "mem0mem.h"
31
32#ifdef UNIV_PFS_MEMORY
33
34#define malloc(A) ut_malloc_nokey(A)
35#define free(A) ut_free(A)
36#define realloc(P, A) ut_realloc(P, A)
37
38#endif /* UNIV_PFS_MEMORY */
39
40/* The type of AST Node */
41enum fts_ast_type_t {
42 FTS_AST_OPER, /*!< Operator */
43 FTS_AST_NUMB, /*!< Number */
44 FTS_AST_TERM, /*!< Term (or word) */
45 FTS_AST_TEXT, /*!< Text string */
46 FTS_AST_PARSER_PHRASE_LIST, /*!< Phase for plugin parser
47 The difference from text type
48 is that we tokenize text into
49 term list */
50 FTS_AST_LIST, /*!< Expression list */
51 FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */
52};
53
54/* The FTS query operators that we support */
55enum fts_ast_oper_t {
56 FTS_NONE, /*!< No operator */
57
58 FTS_IGNORE, /*!< Ignore rows that contain
59 this word */
60
61 FTS_EXIST, /*!< Include rows that contain
62 this word */
63
64 FTS_NEGATE, /*!< Include rows that contain
65 this word but rank them
66 lower*/
67
68 FTS_INCR_RATING, /*!< Increase the rank for this
69 word*/
70
71 FTS_DECR_RATING, /*!< Decrease the rank for this
72 word*/
73
74 FTS_DISTANCE, /*!< Proximity distance */
75 FTS_IGNORE_SKIP, /*!< Transient node operator
76 signifies that this is a
77 FTS_IGNORE node, and ignored in
78 the first pass of
79 fts_ast_visit() */
80 FTS_EXIST_SKIP /*!< Transient node operator
81 signifies that this ia a
82 FTS_EXIST node, and ignored in
83 the first pass of
84 fts_ast_visit() */
85};
86
87/* Data types used by the FTS parser */
88struct fts_lexer_t;
89struct fts_ast_node_t;
90struct fts_ast_state_t;
91struct fts_ast_string_t;
92
93typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
94
95/********************************************************************
96Parse the string using the lexer setup within state.*/
97int
98fts_parse(
99/*======*/
100 /* out: 0 on OK, 1 on error */
101 fts_ast_state_t* state); /*!< in: ast state instance.*/
102
103/********************************************************************
104Create an AST operator node */
105extern
106fts_ast_node_t*
107fts_ast_create_node_oper(
108/*=====================*/
109 void* arg, /*!< in: ast state */
110 fts_ast_oper_t oper); /*!< in: ast operator */
111/********************************************************************
112Create an AST term node, makes a copy of ptr */
113extern
114fts_ast_node_t*
115fts_ast_create_node_term(
116/*=====================*/
117 void* arg, /*!< in: ast state */
118 const fts_ast_string_t* ptr); /*!< in: term string */
119/********************************************************************
120Create an AST text node */
121extern
122fts_ast_node_t*
123fts_ast_create_node_text(
124/*=====================*/
125 void* arg, /*!< in: ast state */
126 const fts_ast_string_t* ptr); /*!< in: text string */
127/********************************************************************
128Create an AST expr list node */
129extern
130fts_ast_node_t*
131fts_ast_create_node_list(
132/*=====================*/
133 void* arg, /*!< in: ast state */
134 fts_ast_node_t* expr); /*!< in: ast expr */
135/********************************************************************
136Create a sub-expression list node. This function takes ownership of
137expr and is responsible for deleting it. */
138extern
139fts_ast_node_t*
140fts_ast_create_node_subexp_list(
141/*============================*/
142 /* out: new node */
143 void* arg, /*!< in: ast state instance */
144 fts_ast_node_t* expr); /*!< in: ast expr instance */
145/********************************************************************
146Set the wildcard attribute of a term.*/
147extern
148void
149fts_ast_term_set_wildcard(
150/*======================*/
151 fts_ast_node_t* node); /*!< in: term to change */
152/********************************************************************
153Set the proximity attribute of a text node. */
154void
155fts_ast_text_set_distance(
156/*======================*/
157 fts_ast_node_t* node, /*!< in/out: text node */
158 ulint distance); /*!< in: the text proximity
159 distance */
160/********************************************************************//**
161Free a fts_ast_node_t instance.
162@return next node to free */
163fts_ast_node_t*
164fts_ast_free_node(
165/*==============*/
166 fts_ast_node_t* node); /*!< in: node to free */
167/********************************************************************
168Add a sub-expression to an AST*/
169extern
170fts_ast_node_t*
171fts_ast_add_node(
172/*=============*/
173 fts_ast_node_t* list, /*!< in: list node instance */
174 fts_ast_node_t* node); /*!< in: (sub) expr to add */
175/********************************************************************
176Print the AST node recursively.*/
177extern
178void
179fts_ast_node_print(
180/*===============*/
181 fts_ast_node_t* node); /*!< in: ast node to print */
182/********************************************************************
183Free node and expr allocations.*/
184extern
185void
186fts_ast_state_free(
187/*===============*/
188 fts_ast_state_t*state); /*!< in: state instance
189 to free */
190/** Check only union operation involved in the node
191@param[in] node ast node to check
192@return true if the node contains only union else false. */
193bool
194fts_ast_node_check_union(
195 fts_ast_node_t* node);
196
197/******************************************************************//**
198Traverse the AST - in-order traversal.
199@return DB_SUCCESS if all went well */
200dberr_t
201fts_ast_visit(
202/*==========*/
203 fts_ast_oper_t oper, /*!< in: FTS operator */
204 fts_ast_node_t* node, /*!< in: instance to traverse*/
205 fts_ast_callback visitor, /*!< in: callback */
206 void* arg, /*!< in: callback arg */
207 bool* has_ignore) /*!< out: whether we encounter
208 and ignored processing an
209 operator, currently we only
210 ignore FTS_IGNORE operator */
211 MY_ATTRIBUTE((nonnull, warn_unused_result));
212/********************************************************************
213Create a lex instance.*/
214fts_lexer_t*
215fts_lexer_create(
216/*=============*/
217 ibool boolean_mode, /*!< in: query type */
218 const byte* query, /*!< in: query string */
219 ulint query_len) /*!< in: query string len */
220 MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
221/********************************************************************
222Free an fts_lexer_t instance.*/
223void
224fts_lexer_free(
225/*===========*/
226 fts_lexer_t* fts_lexer) /*!< in: lexer instance to
227 free */
228 MY_ATTRIBUTE((nonnull));
229
230/**
231Create an ast string object, with NUL-terminator, so the string
232has one more byte than len
233@param[in] str pointer to string
234@param[in] len length of the string
235@return ast string with NUL-terminator */
236fts_ast_string_t*
237fts_ast_string_create(
238 const byte* str,
239 ulint len);
240
241/**
242Free an ast string instance
243@param[in,out] ast_str string to free */
244void
245fts_ast_string_free(
246 fts_ast_string_t* ast_str);
247
248/**
249Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
250@param[in] str string to translate
251@param[in] base the base
252@return translated number */
253ulint
254fts_ast_string_to_ul(
255 const fts_ast_string_t* ast_str,
256 int base);
257
258/* String of length len.
259We always store the string of length len with a terminating '\0',
260regardless of there is any 0x00 in the string itself */
261struct fts_ast_string_t {
262 /*!< Pointer to string. */
263 byte* str;
264
265 /*!< Length of the string. */
266 ulint len;
267};
268
269/* Query term type */
270struct fts_ast_term_t {
271 fts_ast_string_t* ptr; /*!< Pointer to term string.*/
272 ibool wildcard; /*!< TRUE if wild card set.*/
273};
274
275/* Query text type */
276struct fts_ast_text_t {
277 fts_ast_string_t* ptr; /*!< Pointer to text string.*/
278 ulint distance; /*!< > 0 if proximity distance
279 set */
280};
281
282/* The list of nodes in an expr list */
283struct fts_ast_list_t {
284 fts_ast_node_t* head; /*!< Children list head */
285 fts_ast_node_t* tail; /*!< Children list tail */
286};
287
288/* FTS AST node to store the term, text, operator and sub-expressions.*/
289struct fts_ast_node_t {
290 fts_ast_type_t type; /*!< The type of node */
291 fts_ast_text_t text; /*!< Text node */
292 fts_ast_term_t term; /*!< Term node */
293 fts_ast_oper_t oper; /*!< Operator value */
294 fts_ast_list_t list; /*!< Expression list */
295 fts_ast_node_t* next; /*!< Link for expr list */
296 fts_ast_node_t* next_alloc; /*!< For tracking allocations */
297 bool visited; /*!< whether this node is
298 already processed */
299 /* Used by plugin parser */
300 fts_ast_node_t* up_node; /*!< Direct up node */
301 bool go_up; /*!< Flag if go one level up */
302};
303
304/* To track state during parsing */
305struct fts_ast_state_t {
306 mem_heap_t* heap; /*!< Heap to use for alloc */
307 fts_ast_node_t* root; /*!< If all goes OK, then this
308 will point to the root.*/
309
310 fts_ast_list_t list; /*!< List of nodes allocated */
311
312 fts_lexer_t* lexer; /*!< Lexer callback + arg */
313 CHARSET_INFO* charset; /*!< charset used for
314 tokenization */
315 /* Used by plugin parser */
316 fts_ast_node_t* cur_node; /*!< Current node into which
317 we add new node */
318 int depth; /*!< Depth of parsing state */
319};
320
321/******************************************************************//**
322Create an AST term node, makes a copy of ptr for plugin parser
323@return node */
324extern
325fts_ast_node_t*
326fts_ast_create_node_term_for_parser(
327/*==========i=====================*/
328 void* arg, /*!< in: ast state */
329 const char* ptr, /*!< in: term string */
330 const ulint len); /*!< in: term string length */
331
332/******************************************************************//**
333Create an AST phrase list node for plugin parser
334@return node */
335extern
336fts_ast_node_t*
337fts_ast_create_node_phrase_list(
338/*============================*/
339 void* arg); /*!< in: ast state */
340
341#ifdef UNIV_DEBUG
342const char*
343fts_ast_node_type_get(fts_ast_type_t type);
344#endif /* UNIV_DEBUG */
345
346#endif /* INNOBASE_FSTS0AST_H */
347