1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2017 Kouhei Sutou <kou@clear-code.com> |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | This library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with this library; if not, write to the Free Software |
17 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | */ |
19 | |
20 | #include "mrn_query_parser.hpp" |
21 | |
22 | #include <mrn_variables.hpp> |
23 | |
24 | extern "C" { |
25 | /* Groonga's internal functions */ |
26 | int grn_atoi(const char *nptr, const char *end, const char **rest); |
27 | uint grn_atoui(const char *nptr, const char *end, const char **rest); |
28 | } |
29 | |
30 | #define MRN_CLASS_NAME "mrn::QueryParser" |
31 | |
32 | namespace mrn { |
33 | QueryParser::QueryParser(grn_ctx *ctx, |
34 | THD *thd, |
35 | grn_obj *expression, |
36 | grn_obj *default_column, |
37 | uint n_sections, |
38 | grn_obj *match_columns) |
39 | : ctx_(ctx), |
40 | thd_(thd), |
41 | expression_(expression), |
42 | default_column_(default_column), |
43 | n_sections_(n_sections), |
44 | match_columns_(match_columns) { |
45 | } |
46 | |
47 | QueryParser::~QueryParser() { |
48 | } |
49 | |
50 | grn_rc QueryParser::parse(const char *query, size_t query_length) { |
51 | MRN_DBUG_ENTER_METHOD(); |
52 | |
53 | const char *raw_query = NULL; |
54 | size_t raw_query_length = 0; |
55 | grn_operator default_operator = GRN_OP_OR; |
56 | grn_expr_flags expression_flags = 0; |
57 | parse_pragma(query, |
58 | query_length, |
59 | &raw_query, |
60 | &raw_query_length, |
61 | &default_operator, |
62 | &expression_flags); |
63 | |
64 | grn_obj *default_column = default_column_; |
65 | if (match_columns_) { |
66 | default_column = match_columns_; |
67 | } |
68 | grn_rc rc = grn_expr_parse(ctx_, |
69 | expression_, |
70 | raw_query, |
71 | raw_query_length, |
72 | default_column, |
73 | GRN_OP_MATCH, |
74 | default_operator, |
75 | expression_flags); |
76 | if (rc != GRN_SUCCESS) { |
77 | char error_message[MRN_MESSAGE_BUFFER_SIZE]; |
78 | snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE, |
79 | "failed to parse fulltext search keyword: <%.*s>: <%s>" , |
80 | static_cast<int>(query_length), |
81 | query, |
82 | ctx_->errbuf); |
83 | variables::ActionOnError action = |
84 | variables::get_action_on_fulltext_query_error(thd_); |
85 | switch (action) { |
86 | case variables::ACTION_ON_ERROR_ERROR: |
87 | my_message(ER_PARSE_ERROR, error_message, MYF(0)); |
88 | break; |
89 | case variables::ACTION_ON_ERROR_ERROR_AND_LOG: |
90 | my_message(ER_PARSE_ERROR, error_message, MYF(0)); |
91 | GRN_LOG(ctx_, GRN_LOG_ERROR, "%s" , error_message); |
92 | break; |
93 | case variables::ACTION_ON_ERROR_IGNORE: |
94 | break; |
95 | case variables::ACTION_ON_ERROR_IGNORE_AND_LOG: |
96 | GRN_LOG(ctx_, GRN_LOG_ERROR, "%s" , error_message); |
97 | break; |
98 | } |
99 | } |
100 | |
101 | DBUG_RETURN(rc); |
102 | } |
103 | |
104 | void QueryParser::parse_pragma(const char *query, |
105 | size_t query_length, |
106 | const char **raw_query, |
107 | size_t *raw_query_length, |
108 | grn_operator *default_operator, |
109 | grn_expr_flags *flags) { |
110 | MRN_DBUG_ENTER_METHOD(); |
111 | |
112 | const char *current_query = query; |
113 | size_t current_query_length = query_length; |
114 | |
115 | *default_operator = GRN_OP_OR; |
116 | |
117 | if (current_query_length >= 4 && memcmp(current_query, "*SS " , 4) == 0) { |
118 | *raw_query = current_query + 4; |
119 | *raw_query_length = current_query_length - 4; |
120 | *flags = GRN_EXPR_SYNTAX_SCRIPT; |
121 | DBUG_VOID_RETURN; |
122 | } |
123 | |
124 | bool weight_specified = false; |
125 | *raw_query = query; |
126 | *raw_query_length = query_length; |
127 | *flags = default_expression_flags(); |
128 | if (current_query_length >= 2 && current_query[0] == '*') { |
129 | bool parsed = false; |
130 | bool done = false; |
131 | current_query++; |
132 | current_query_length--; |
133 | while (!done) { |
134 | size_t consumed_query_length = 0; |
135 | switch (current_query[0]) { |
136 | case 'D': |
137 | if (parse_pragma_d(current_query + 1, |
138 | current_query_length - 1, |
139 | default_operator, |
140 | &consumed_query_length)) { |
141 | parsed = true; |
142 | consumed_query_length += 1; |
143 | current_query += consumed_query_length; |
144 | current_query_length -= consumed_query_length; |
145 | } else { |
146 | done = true; |
147 | } |
148 | break; |
149 | case 'W': |
150 | if (parse_pragma_w(current_query + 1, |
151 | current_query_length - 1, |
152 | &consumed_query_length)) { |
153 | parsed = true; |
154 | weight_specified = true; |
155 | consumed_query_length += 1; |
156 | current_query += consumed_query_length; |
157 | current_query_length -= consumed_query_length; |
158 | } else { |
159 | done = true; |
160 | } |
161 | break; |
162 | default: |
163 | done = true; |
164 | break; |
165 | } |
166 | } |
167 | if (parsed) { |
168 | *raw_query = current_query; |
169 | *raw_query_length = current_query_length; |
170 | } |
171 | } |
172 | |
173 | // WORKAROUND: ignore the first '+' to support "+apple macintosh" pattern. |
174 | while (*raw_query_length > 0 && (*raw_query)[0] == ' ') { |
175 | (*raw_query)++; |
176 | (*raw_query_length)--; |
177 | } |
178 | if (*raw_query_length > 0 && (*raw_query)[0] == '+') { |
179 | (*raw_query)++; |
180 | (*raw_query_length)--; |
181 | } |
182 | if (!weight_specified && match_columns_) { |
183 | grn_expr_append_obj(ctx_, match_columns_, default_column_, GRN_OP_PUSH, 1); |
184 | } |
185 | |
186 | DBUG_VOID_RETURN; |
187 | } |
188 | |
189 | bool QueryParser::parse_pragma_w(const char *query, |
190 | size_t query_length, |
191 | size_t *consumed_query_length) { |
192 | MRN_DBUG_ENTER_METHOD(); |
193 | |
194 | *consumed_query_length = 0; |
195 | |
196 | grn_obj section_value_buffer; |
197 | GRN_UINT32_INIT(§ion_value_buffer, 0); |
198 | |
199 | MRN_ALLOCATE_VARIABLE_LENGTH_ARRAYS(bool, specified_sections, n_sections_); |
200 | for (uint i = 0; i < n_sections_; ++i) { |
201 | specified_sections[i] = false; |
202 | } |
203 | |
204 | uint n_weights = 0; |
205 | while (query_length >= 1) { |
206 | if (n_weights >= 1) { |
207 | if (query[0] != ',') { |
208 | break; |
209 | } |
210 | size_t n_used_query_length = 1; |
211 | *consumed_query_length += n_used_query_length; |
212 | query_length -= n_used_query_length; |
213 | query += n_used_query_length; |
214 | if (query_length == 0) { |
215 | break; |
216 | } |
217 | } |
218 | |
219 | uint section = 0; |
220 | if ('1' <= query[0] && query[0] <= '9') { |
221 | const char *section_start = query; |
222 | const char *query_end = query + query_length; |
223 | const char *query_rest; |
224 | section = grn_atoui(section_start, query_end, &query_rest); |
225 | if (section_start == query_rest) { |
226 | break; |
227 | } |
228 | if (!(0 < section && section <= n_sections_)) { |
229 | break; |
230 | } |
231 | section -= 1; |
232 | specified_sections[section] = true; |
233 | size_t n_used_query_length = query_rest - query; |
234 | *consumed_query_length += n_used_query_length; |
235 | query_length -= n_used_query_length; |
236 | query += n_used_query_length; |
237 | } else { |
238 | break; |
239 | } |
240 | |
241 | int weight = 1; |
242 | if (query_length >= 2 && query[0] == ':') { |
243 | const char *weight_start = query + 1; |
244 | const char *query_end = query + query_length; |
245 | const char *query_rest; |
246 | weight = grn_atoi(weight_start, query_end, &query_rest); |
247 | if (weight_start == query_rest) { |
248 | break; |
249 | } |
250 | size_t n_used_query_length = query_rest - query; |
251 | *consumed_query_length += n_used_query_length; |
252 | query_length -= n_used_query_length; |
253 | query += n_used_query_length; |
254 | } |
255 | |
256 | n_weights++; |
257 | |
258 | append_section(section, |
259 | §ion_value_buffer, |
260 | weight, |
261 | n_weights); |
262 | } |
263 | |
264 | for (uint section = 0; section < n_sections_; ++section) { |
265 | if (specified_sections[section]) { |
266 | continue; |
267 | } |
268 | |
269 | ++n_weights; |
270 | |
271 | int default_weight = 1; |
272 | append_section(section, |
273 | §ion_value_buffer, |
274 | default_weight, |
275 | n_weights); |
276 | } |
277 | MRN_FREE_VARIABLE_LENGTH_ARRAYS(specified_sections); |
278 | |
279 | GRN_OBJ_FIN(ctx_, §ion_value_buffer); |
280 | |
281 | DBUG_RETURN(n_weights > 0); |
282 | } |
283 | |
284 | void QueryParser::append_section(uint section, |
285 | grn_obj *section_value_buffer, |
286 | int weight, |
287 | uint n_weights) { |
288 | MRN_DBUG_ENTER_METHOD(); |
289 | |
290 | if (!match_columns_) { |
291 | DBUG_VOID_RETURN; |
292 | } |
293 | |
294 | grn_expr_append_obj(ctx_, match_columns_, default_column_, GRN_OP_PUSH, 1); |
295 | GRN_UINT32_SET(ctx_, section_value_buffer, section); |
296 | grn_expr_append_const(ctx_, match_columns_, section_value_buffer, |
297 | GRN_OP_PUSH, 1); |
298 | grn_expr_append_op(ctx_, match_columns_, GRN_OP_GET_MEMBER, 2); |
299 | |
300 | if (weight != 1) { |
301 | grn_expr_append_const_int(ctx_, match_columns_, weight, GRN_OP_PUSH, 1); |
302 | grn_expr_append_op(ctx_, match_columns_, GRN_OP_STAR, 2); |
303 | } |
304 | |
305 | if (n_weights >= 2) { |
306 | grn_expr_append_op(ctx_, match_columns_, GRN_OP_OR, 2); |
307 | } |
308 | |
309 | DBUG_VOID_RETURN; |
310 | } |
311 | |
312 | bool QueryParser::parse_pragma_d(const char *query, |
313 | size_t query_length, |
314 | grn_operator *default_operator, |
315 | size_t *consumed_query_length) { |
316 | MRN_DBUG_ENTER_METHOD(); |
317 | |
318 | bool succeeded = true; |
319 | if (query_length >= 1 && query[0] == '+') { |
320 | *default_operator = GRN_OP_AND; |
321 | *consumed_query_length = 1; |
322 | } else if (query_length >= 1 && query[0] == '-') { |
323 | *default_operator = GRN_OP_AND_NOT; |
324 | *consumed_query_length = 1; |
325 | } else if (query_length >= 2 && memcmp(query, "OR" , 2) == 0) { |
326 | *default_operator = GRN_OP_OR; |
327 | *consumed_query_length = 2; |
328 | } else { |
329 | succeeded = false; |
330 | } |
331 | |
332 | DBUG_RETURN(succeeded); |
333 | } |
334 | |
335 | grn_expr_flags QueryParser::default_expression_flags() { |
336 | MRN_DBUG_ENTER_METHOD(); |
337 | |
338 | ulonglong syntax_flags = variables::get_boolean_mode_syntax_flags(thd_); |
339 | grn_expr_flags expression_flags = 0; |
340 | if (syntax_flags == variables::BOOLEAN_MODE_SYNTAX_FLAG_DEFAULT) { |
341 | expression_flags = GRN_EXPR_SYNTAX_QUERY | GRN_EXPR_ALLOW_LEADING_NOT; |
342 | } else { |
343 | if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_SYNTAX_SCRIPT) { |
344 | expression_flags |= GRN_EXPR_SYNTAX_SCRIPT; |
345 | } else { |
346 | expression_flags |= GRN_EXPR_SYNTAX_QUERY; |
347 | } |
348 | if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_COLUMN) { |
349 | expression_flags |= GRN_EXPR_ALLOW_COLUMN; |
350 | } |
351 | if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_UPDATE) { |
352 | expression_flags |= GRN_EXPR_ALLOW_UPDATE; |
353 | } |
354 | if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_LEADING_NOT) { |
355 | expression_flags |= GRN_EXPR_ALLOW_LEADING_NOT; |
356 | } |
357 | } |
358 | |
359 | DBUG_RETURN(expression_flags); |
360 | } |
361 | } |
362 | |