1/* -*- c-basic-offset: 2 -*- */
2/*
3 Copyright(C) 2017 Kouhei Sutou <kou@clear-code.com>
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18*/
19
20#include "mrn_query_parser.hpp"
21
22#include <mrn_variables.hpp>
23
24extern "C" {
25 /* Groonga's internal functions */
26 int grn_atoi(const char *nptr, const char *end, const char **rest);
27 uint grn_atoui(const char *nptr, const char *end, const char **rest);
28}
29
30#define MRN_CLASS_NAME "mrn::QueryParser"
31
32namespace mrn {
33 QueryParser::QueryParser(grn_ctx *ctx,
34 THD *thd,
35 grn_obj *expression,
36 grn_obj *default_column,
37 uint n_sections,
38 grn_obj *match_columns)
39 : ctx_(ctx),
40 thd_(thd),
41 expression_(expression),
42 default_column_(default_column),
43 n_sections_(n_sections),
44 match_columns_(match_columns) {
45 }
46
47 QueryParser::~QueryParser() {
48 }
49
50 grn_rc QueryParser::parse(const char *query, size_t query_length) {
51 MRN_DBUG_ENTER_METHOD();
52
53 const char *raw_query = NULL;
54 size_t raw_query_length = 0;
55 grn_operator default_operator = GRN_OP_OR;
56 grn_expr_flags expression_flags = 0;
57 parse_pragma(query,
58 query_length,
59 &raw_query,
60 &raw_query_length,
61 &default_operator,
62 &expression_flags);
63
64 grn_obj *default_column = default_column_;
65 if (match_columns_) {
66 default_column = match_columns_;
67 }
68 grn_rc rc = grn_expr_parse(ctx_,
69 expression_,
70 raw_query,
71 raw_query_length,
72 default_column,
73 GRN_OP_MATCH,
74 default_operator,
75 expression_flags);
76 if (rc != GRN_SUCCESS) {
77 char error_message[MRN_MESSAGE_BUFFER_SIZE];
78 snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE,
79 "failed to parse fulltext search keyword: <%.*s>: <%s>",
80 static_cast<int>(query_length),
81 query,
82 ctx_->errbuf);
83 variables::ActionOnError action =
84 variables::get_action_on_fulltext_query_error(thd_);
85 switch (action) {
86 case variables::ACTION_ON_ERROR_ERROR:
87 my_message(ER_PARSE_ERROR, error_message, MYF(0));
88 break;
89 case variables::ACTION_ON_ERROR_ERROR_AND_LOG:
90 my_message(ER_PARSE_ERROR, error_message, MYF(0));
91 GRN_LOG(ctx_, GRN_LOG_ERROR, "%s", error_message);
92 break;
93 case variables::ACTION_ON_ERROR_IGNORE:
94 break;
95 case variables::ACTION_ON_ERROR_IGNORE_AND_LOG:
96 GRN_LOG(ctx_, GRN_LOG_ERROR, "%s", error_message);
97 break;
98 }
99 }
100
101 DBUG_RETURN(rc);
102 }
103
104 void QueryParser::parse_pragma(const char *query,
105 size_t query_length,
106 const char **raw_query,
107 size_t *raw_query_length,
108 grn_operator *default_operator,
109 grn_expr_flags *flags) {
110 MRN_DBUG_ENTER_METHOD();
111
112 const char *current_query = query;
113 size_t current_query_length = query_length;
114
115 *default_operator = GRN_OP_OR;
116
117 if (current_query_length >= 4 && memcmp(current_query, "*SS ", 4) == 0) {
118 *raw_query = current_query + 4;
119 *raw_query_length = current_query_length - 4;
120 *flags = GRN_EXPR_SYNTAX_SCRIPT;
121 DBUG_VOID_RETURN;
122 }
123
124 bool weight_specified = false;
125 *raw_query = query;
126 *raw_query_length = query_length;
127 *flags = default_expression_flags();
128 if (current_query_length >= 2 && current_query[0] == '*') {
129 bool parsed = false;
130 bool done = false;
131 current_query++;
132 current_query_length--;
133 while (!done) {
134 size_t consumed_query_length = 0;
135 switch (current_query[0]) {
136 case 'D':
137 if (parse_pragma_d(current_query + 1,
138 current_query_length - 1,
139 default_operator,
140 &consumed_query_length)) {
141 parsed = true;
142 consumed_query_length += 1;
143 current_query += consumed_query_length;
144 current_query_length -= consumed_query_length;
145 } else {
146 done = true;
147 }
148 break;
149 case 'W':
150 if (parse_pragma_w(current_query + 1,
151 current_query_length - 1,
152 &consumed_query_length)) {
153 parsed = true;
154 weight_specified = true;
155 consumed_query_length += 1;
156 current_query += consumed_query_length;
157 current_query_length -= consumed_query_length;
158 } else {
159 done = true;
160 }
161 break;
162 default:
163 done = true;
164 break;
165 }
166 }
167 if (parsed) {
168 *raw_query = current_query;
169 *raw_query_length = current_query_length;
170 }
171 }
172
173 // WORKAROUND: ignore the first '+' to support "+apple macintosh" pattern.
174 while (*raw_query_length > 0 && (*raw_query)[0] == ' ') {
175 (*raw_query)++;
176 (*raw_query_length)--;
177 }
178 if (*raw_query_length > 0 && (*raw_query)[0] == '+') {
179 (*raw_query)++;
180 (*raw_query_length)--;
181 }
182 if (!weight_specified && match_columns_) {
183 grn_expr_append_obj(ctx_, match_columns_, default_column_, GRN_OP_PUSH, 1);
184 }
185
186 DBUG_VOID_RETURN;
187 }
188
189 bool QueryParser::parse_pragma_w(const char *query,
190 size_t query_length,
191 size_t *consumed_query_length) {
192 MRN_DBUG_ENTER_METHOD();
193
194 *consumed_query_length = 0;
195
196 grn_obj section_value_buffer;
197 GRN_UINT32_INIT(&section_value_buffer, 0);
198
199 MRN_ALLOCATE_VARIABLE_LENGTH_ARRAYS(bool, specified_sections, n_sections_);
200 for (uint i = 0; i < n_sections_; ++i) {
201 specified_sections[i] = false;
202 }
203
204 uint n_weights = 0;
205 while (query_length >= 1) {
206 if (n_weights >= 1) {
207 if (query[0] != ',') {
208 break;
209 }
210 size_t n_used_query_length = 1;
211 *consumed_query_length += n_used_query_length;
212 query_length -= n_used_query_length;
213 query += n_used_query_length;
214 if (query_length == 0) {
215 break;
216 }
217 }
218
219 uint section = 0;
220 if ('1' <= query[0] && query[0] <= '9') {
221 const char *section_start = query;
222 const char *query_end = query + query_length;
223 const char *query_rest;
224 section = grn_atoui(section_start, query_end, &query_rest);
225 if (section_start == query_rest) {
226 break;
227 }
228 if (!(0 < section && section <= n_sections_)) {
229 break;
230 }
231 section -= 1;
232 specified_sections[section] = true;
233 size_t n_used_query_length = query_rest - query;
234 *consumed_query_length += n_used_query_length;
235 query_length -= n_used_query_length;
236 query += n_used_query_length;
237 } else {
238 break;
239 }
240
241 int weight = 1;
242 if (query_length >= 2 && query[0] == ':') {
243 const char *weight_start = query + 1;
244 const char *query_end = query + query_length;
245 const char *query_rest;
246 weight = grn_atoi(weight_start, query_end, &query_rest);
247 if (weight_start == query_rest) {
248 break;
249 }
250 size_t n_used_query_length = query_rest - query;
251 *consumed_query_length += n_used_query_length;
252 query_length -= n_used_query_length;
253 query += n_used_query_length;
254 }
255
256 n_weights++;
257
258 append_section(section,
259 &section_value_buffer,
260 weight,
261 n_weights);
262 }
263
264 for (uint section = 0; section < n_sections_; ++section) {
265 if (specified_sections[section]) {
266 continue;
267 }
268
269 ++n_weights;
270
271 int default_weight = 1;
272 append_section(section,
273 &section_value_buffer,
274 default_weight,
275 n_weights);
276 }
277 MRN_FREE_VARIABLE_LENGTH_ARRAYS(specified_sections);
278
279 GRN_OBJ_FIN(ctx_, &section_value_buffer);
280
281 DBUG_RETURN(n_weights > 0);
282 }
283
284 void QueryParser::append_section(uint section,
285 grn_obj *section_value_buffer,
286 int weight,
287 uint n_weights) {
288 MRN_DBUG_ENTER_METHOD();
289
290 if (!match_columns_) {
291 DBUG_VOID_RETURN;
292 }
293
294 grn_expr_append_obj(ctx_, match_columns_, default_column_, GRN_OP_PUSH, 1);
295 GRN_UINT32_SET(ctx_, section_value_buffer, section);
296 grn_expr_append_const(ctx_, match_columns_, section_value_buffer,
297 GRN_OP_PUSH, 1);
298 grn_expr_append_op(ctx_, match_columns_, GRN_OP_GET_MEMBER, 2);
299
300 if (weight != 1) {
301 grn_expr_append_const_int(ctx_, match_columns_, weight, GRN_OP_PUSH, 1);
302 grn_expr_append_op(ctx_, match_columns_, GRN_OP_STAR, 2);
303 }
304
305 if (n_weights >= 2) {
306 grn_expr_append_op(ctx_, match_columns_, GRN_OP_OR, 2);
307 }
308
309 DBUG_VOID_RETURN;
310 }
311
312 bool QueryParser::parse_pragma_d(const char *query,
313 size_t query_length,
314 grn_operator *default_operator,
315 size_t *consumed_query_length) {
316 MRN_DBUG_ENTER_METHOD();
317
318 bool succeeded = true;
319 if (query_length >= 1 && query[0] == '+') {
320 *default_operator = GRN_OP_AND;
321 *consumed_query_length = 1;
322 } else if (query_length >= 1 && query[0] == '-') {
323 *default_operator = GRN_OP_AND_NOT;
324 *consumed_query_length = 1;
325 } else if (query_length >= 2 && memcmp(query, "OR", 2) == 0) {
326 *default_operator = GRN_OP_OR;
327 *consumed_query_length = 2;
328 } else {
329 succeeded = false;
330 }
331
332 DBUG_RETURN(succeeded);
333 }
334
335 grn_expr_flags QueryParser::default_expression_flags() {
336 MRN_DBUG_ENTER_METHOD();
337
338 ulonglong syntax_flags = variables::get_boolean_mode_syntax_flags(thd_);
339 grn_expr_flags expression_flags = 0;
340 if (syntax_flags == variables::BOOLEAN_MODE_SYNTAX_FLAG_DEFAULT) {
341 expression_flags = GRN_EXPR_SYNTAX_QUERY | GRN_EXPR_ALLOW_LEADING_NOT;
342 } else {
343 if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_SYNTAX_SCRIPT) {
344 expression_flags |= GRN_EXPR_SYNTAX_SCRIPT;
345 } else {
346 expression_flags |= GRN_EXPR_SYNTAX_QUERY;
347 }
348 if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_COLUMN) {
349 expression_flags |= GRN_EXPR_ALLOW_COLUMN;
350 }
351 if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_UPDATE) {
352 expression_flags |= GRN_EXPR_ALLOW_UPDATE;
353 }
354 if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_LEADING_NOT) {
355 expression_flags |= GRN_EXPR_ALLOW_LEADING_NOT;
356 }
357 }
358
359 DBUG_RETURN(expression_flags);
360 }
361}
362