1/*****************************************************************************
2
3Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free Software
7Foundation; version 2 of the License.
8
9This program is distributed in the hope that it will be useful, but WITHOUT
10ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
13You should have received a copy of the GNU General Public License along with
14this program; if not, write to the Free Software Foundation, Inc.,
1551 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
16
17*****************************************************************************/
18
19/******************************************************************//**
20@file fts/fts0plugin.cc
21Full Text Search plugin support.
22
23Created 2013/06/04 Shaohua Wang
24***********************************************************************/
25
26#include "fts0ast.h"
27#include "fts0plugin.h"
28#include "fts0tokenize.h"
29
30#include "ft_global.h"
31
32/******************************************************************//**
33FTS default parser init
34@return 0 */
35static int fts_default_parser_init(MYSQL_FTPARSER_PARAM*) { return 0; }
36
37/******************************************************************//**
38FTS default parser deinit
39@return 0 */
40static int fts_default_parser_deinit(MYSQL_FTPARSER_PARAM*) { return 0; }
41
42/******************************************************************//**
43FTS default parser parse from ft_static.c in MYISAM.
44@return 0 if parse successfully, or return non-zero */
45static
46int
47fts_default_parser_parse(
48/*=====================*/
49 MYSQL_FTPARSER_PARAM *param) /*!< in: plugin parser param */
50{
51 return(param->mysql_parse(param, param->doc, param->length));
52}
53
54/* FTS default parser from ft_static.c in MYISAM. */
55struct st_mysql_ftparser fts_default_parser =
56{
57 MYSQL_FTPARSER_INTERFACE_VERSION,
58 fts_default_parser_parse,
59 fts_default_parser_init,
60 fts_default_parser_deinit
61};
62
63/******************************************************************//**
64Get a operator node from token boolean info
65@return node */
66static
67fts_ast_node_t*
68fts_query_get_oper_node(
69/*====================*/
70 MYSQL_FTPARSER_BOOLEAN_INFO* info, /*!< in: token info */
71 fts_ast_state_t* state) /*!< in/out: query parse state*/
72{
73 fts_ast_node_t* oper_node = NULL;
74
75 if (info->yesno > 0) {
76 oper_node = fts_ast_create_node_oper(state, FTS_EXIST);
77 } else if (info->yesno < 0) {
78 oper_node = fts_ast_create_node_oper(state, FTS_IGNORE);
79 } else if (info->weight_adjust > 0) {
80 oper_node = fts_ast_create_node_oper(state, FTS_INCR_RATING);
81 } else if (info->weight_adjust < 0) {
82 oper_node = fts_ast_create_node_oper(state, FTS_DECR_RATING);
83 } else if (info->wasign > 0) {
84 oper_node = fts_ast_create_node_oper(state, FTS_NEGATE);
85 }
86
87 return(oper_node);
88}
89
90/******************************************************************//**
91FTS plugin parser 'myql_add_word' callback function for query parse.
92Refer to 'st_mysql_ftparser_param' for more detail.
93Note:
94a. Parse logic refers to 'ftb_query_add_word' from ft_boolean_search.c in MYISAM;
95b. Parse node or tree refers to fts0pars.y.
96@return 0 if add successfully, or return non-zero. */
97static
98int
99fts_query_add_word_for_parser(
100/*==========================*/
101 MYSQL_FTPARSER_PARAM* param, /*!< in: parser param */
102 const char* word, /*!< in: token */
103 int word_len, /*!< in: token length */
104 MYSQL_FTPARSER_BOOLEAN_INFO* info) /*!< in: token info */
105{
106 fts_ast_state_t* state =
107 static_cast<fts_ast_state_t*>(param->mysql_ftparam);
108 fts_ast_node_t* cur_node = state->cur_node;
109 fts_ast_node_t* oper_node = NULL;
110 fts_ast_node_t* term_node = NULL;
111 fts_ast_node_t* node = NULL;
112
113 switch (info->type) {
114 case FT_TOKEN_STOPWORD:
115 /* We only handler stopword in phrase */
116 if (cur_node->type != FTS_AST_PARSER_PHRASE_LIST) {
117 break;
118 }
119 /* fall through */
120
121 case FT_TOKEN_WORD:
122 term_node = fts_ast_create_node_term_for_parser(
123 state, word, ulint(word_len));
124
125 if (info->trunc) {
126 fts_ast_term_set_wildcard(term_node);
127 }
128
129 if (cur_node->type == FTS_AST_PARSER_PHRASE_LIST) {
130 /* Ignore operator inside phrase */
131 fts_ast_add_node(cur_node, term_node);
132 } else {
133 ut_ad(cur_node->type == FTS_AST_LIST
134 || cur_node->type == FTS_AST_SUBEXP_LIST);
135 oper_node = fts_query_get_oper_node(info, state);
136
137 if (oper_node) {
138 node = fts_ast_create_node_list(state, oper_node);
139 fts_ast_add_node(node, term_node);
140 fts_ast_add_node(cur_node, node);
141 } else {
142 fts_ast_add_node(cur_node, term_node);
143 }
144 }
145
146 break;
147
148 case FT_TOKEN_LEFT_PAREN:
149 /* Check parse error */
150 if (cur_node->type != FTS_AST_LIST
151 && cur_node->type != FTS_AST_SUBEXP_LIST) {
152 return(1);
153 }
154
155 /* Set operator */
156 oper_node = fts_query_get_oper_node(info, state);
157 if (oper_node != NULL) {
158 node = fts_ast_create_node_list(state, oper_node);
159 fts_ast_add_node(cur_node, node);
160 node->go_up = true;
161 node->up_node = cur_node;
162 cur_node = node;
163 }
164
165 if (info->quot) {
166 /* Phrase node */
167 node = fts_ast_create_node_phrase_list(state);
168 } else {
169 /* Subexp list node */
170 node = fts_ast_create_node_subexp_list(state, NULL);
171 }
172
173 fts_ast_add_node(cur_node, node);
174
175 node->up_node = cur_node;
176 state->cur_node = node;
177 state->depth += 1;
178
179 break;
180
181 case FT_TOKEN_RIGHT_PAREN:
182 info->quot = 0;
183
184 if (cur_node->up_node != NULL) {
185 cur_node = cur_node->up_node;
186
187 if (cur_node->go_up) {
188 ut_a(cur_node->up_node
189 && !(cur_node->up_node->go_up));
190 cur_node = cur_node->up_node;
191 }
192 }
193
194 state->cur_node = cur_node;
195
196 if (state->depth > 0) {
197 state->depth--;
198 } else {
199 /* Parentheses mismatch */
200 return(1);
201 }
202
203 break;
204
205 case FT_TOKEN_EOF:
206 default:
207 break;
208 }
209
210 return(0);
211}
212
213/******************************************************************//**
214FTS plugin parser 'myql_parser' callback function for query parse.
215Refer to 'st_mysql_ftparser_param' for more detail.
216@return 0 if parse successfully */
217static
218int
219fts_parse_query_internal(
220/*=====================*/
221 MYSQL_FTPARSER_PARAM* param, /*!< in: parser param */
222 const char* query, /*!< in: query string */
223 int len) /*!< in: query length */
224{
225 MYSQL_FTPARSER_BOOLEAN_INFO info;
226 const CHARSET_INFO* cs = param->cs;
227 uchar** start = (uchar**)(&query);
228 uchar* end = (uchar*)(query + len);
229 FT_WORD w = {NULL, 0, 0};
230
231 info.prev = ' ';
232 info.quot = 0;
233 memset(&w, 0, sizeof(w));
234 /* Note: We don't handle simple parser mode here,
235 but user supplied plugin parser should handler it. */
236 while (fts_get_word(cs, start, end, &w, &info)) {
237 int ret = param->mysql_add_word(
238 param,
239 reinterpret_cast<char*>(w.pos),
240 int(w.len), &info);
241 if (ret) {
242 return(ret);
243 }
244 }
245
246 return(0);
247}
248
249/******************************************************************//**
250fts parse query by plugin parser.
251@return 0 if parse successfully, or return non-zero. */
252int
253fts_parse_by_parser(
254/*================*/
255 ibool mode, /*!< in: parse boolean mode */
256 uchar* query_str, /*!< in: query string */
257 ulint query_len, /*!< in: query string length */
258 st_mysql_ftparser* parser, /*!< in: fts plugin parser */
259 fts_ast_state_t* state) /*!< in/out: parser state */
260{
261 MYSQL_FTPARSER_PARAM param;
262 int ret;
263
264 ut_ad(parser);
265
266 /* Initial parser param */
267 param.mysql_parse = fts_parse_query_internal;
268 param.mysql_add_word = fts_query_add_word_for_parser;
269 param.mysql_ftparam = static_cast<void*>(state);
270 param.cs = state->charset;
271 param.doc = reinterpret_cast<char*>(query_str);
272 param.length = static_cast<int>(query_len);
273 param.flags = 0;
274 param.mode = mode ?
275 MYSQL_FTPARSER_FULL_BOOLEAN_INFO :
276 MYSQL_FTPARSER_SIMPLE_MODE;
277
278 PARSER_INIT(parser, &param);
279 ret = parser->parse(&param);
280 PARSER_DEINIT(parser, &param);
281
282 return(ret | state->depth);
283}
284