1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2015-2016 Brazil |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License version 2.1 as published by the Free Software Foundation. |
8 | |
9 | This library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with this library; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | #include "ts_expr_parser.h" |
20 | |
21 | #include <stdlib.h> |
22 | #include <string.h> |
23 | |
24 | #include "../grn_ctx.h" |
25 | |
26 | #include "ts_log.h" |
27 | #include "ts_str.h" |
28 | #include "ts_util.h" |
29 | |
30 | /*------------------------------------------------------------- |
31 | * grn_ts_expr_token. |
32 | */ |
33 | |
34 | #define GRN_TS_EXPR_TOKEN_INIT(TYPE)\ |
35 | memset(token, 0, sizeof(*token));\ |
36 | token->type = GRN_TS_EXPR_ ## TYPE ## _TOKEN;\ |
37 | token->src = src; |
38 | /* grn_ts_expr_dummy_token_init() initializes a token. */ |
39 | static void |
40 | grn_ts_expr_dummy_token_init(grn_ctx *ctx, grn_ts_expr_dummy_token *token, |
41 | grn_ts_str src) |
42 | { |
43 | GRN_TS_EXPR_TOKEN_INIT(DUMMY) |
44 | } |
45 | |
46 | /* grn_ts_expr_start_token_init() initializes a token. */ |
47 | static void |
48 | grn_ts_expr_start_token_init(grn_ctx *ctx, grn_ts_expr_start_token *token, |
49 | grn_ts_str src) |
50 | { |
51 | GRN_TS_EXPR_TOKEN_INIT(START) |
52 | } |
53 | |
54 | /* grn_ts_expr_end_token_init() initializes a token. */ |
55 | static void |
56 | grn_ts_expr_end_token_init(grn_ctx *ctx, grn_ts_expr_end_token *token, |
57 | grn_ts_str src) |
58 | { |
59 | GRN_TS_EXPR_TOKEN_INIT(END) |
60 | } |
61 | |
62 | /* grn_ts_expr_const_token_init() initializes a token. */ |
63 | static void |
64 | grn_ts_expr_const_token_init(grn_ctx *ctx, grn_ts_expr_const_token *token, |
65 | grn_ts_str src) |
66 | { |
67 | GRN_TS_EXPR_TOKEN_INIT(CONST); |
68 | grn_ts_buf_init(ctx, &token->buf); |
69 | } |
70 | |
71 | /* grn_ts_expr_name_token_init() initializes a token. */ |
72 | static void |
73 | grn_ts_expr_name_token_init(grn_ctx *ctx, grn_ts_expr_name_token *token, |
74 | grn_ts_str src) |
75 | { |
76 | GRN_TS_EXPR_TOKEN_INIT(NAME); |
77 | } |
78 | |
79 | /* grn_ts_expr_op_token_init() initializes a token. */ |
80 | static void |
81 | grn_ts_expr_op_token_init(grn_ctx *ctx, grn_ts_expr_op_token *token, |
82 | grn_ts_str src) |
83 | { |
84 | GRN_TS_EXPR_TOKEN_INIT(OP); |
85 | } |
86 | |
87 | /* grn_ts_expr_bridge_token_init() initializes a token. */ |
88 | static void |
89 | grn_ts_expr_bridge_token_init(grn_ctx *ctx, grn_ts_expr_bridge_token *token, |
90 | grn_ts_str src) |
91 | { |
92 | GRN_TS_EXPR_TOKEN_INIT(BRIDGE) |
93 | } |
94 | |
95 | /* grn_ts_expr_bracket_token_init() initializes a token. */ |
96 | static void |
97 | grn_ts_expr_bracket_token_init(grn_ctx *ctx, grn_ts_expr_bracket_token *token, |
98 | grn_ts_str src) |
99 | { |
100 | GRN_TS_EXPR_TOKEN_INIT(BRACKET) |
101 | } |
102 | #undef GRN_TS_EXPR_TOKEN_INIT |
103 | |
104 | /* grn_ts_expr_dummy_token_fin() finalizes a token. */ |
105 | static void |
106 | grn_ts_expr_dummy_token_fin(grn_ctx *ctx, grn_ts_expr_dummy_token *token) |
107 | { |
108 | /* Nothing to do. */ |
109 | } |
110 | |
111 | /* grn_ts_expr_start_token_fin() finalizes a token. */ |
112 | static void |
113 | grn_ts_expr_start_token_fin(grn_ctx *ctx, grn_ts_expr_start_token *token) |
114 | { |
115 | /* Nothing to do. */ |
116 | } |
117 | |
118 | /* grn_ts_expr_end_token_fin() finalizes a token. */ |
119 | static void |
120 | grn_ts_expr_end_token_fin(grn_ctx *ctx, grn_ts_expr_end_token *token) |
121 | { |
122 | /* Nothing to do. */ |
123 | } |
124 | |
125 | /* grn_ts_expr_const_token_fin() finalizes a token. */ |
126 | static void |
127 | grn_ts_expr_const_token_fin(grn_ctx *ctx, grn_ts_expr_const_token *token) |
128 | { |
129 | grn_ts_buf_fin(ctx, &token->buf); |
130 | } |
131 | |
132 | /* grn_ts_expr_name_token_fin() finalizes a token. */ |
133 | static void |
134 | grn_ts_expr_name_token_fin(grn_ctx *ctx, grn_ts_expr_name_token *token) |
135 | { |
136 | /* Nothing to do. */ |
137 | } |
138 | |
139 | /* grn_ts_expr_op_token_fin() finalizes a token. */ |
140 | static void |
141 | grn_ts_expr_op_token_fin(grn_ctx *ctx, grn_ts_expr_op_token *token) |
142 | { |
143 | /* Nothing to do. */ |
144 | } |
145 | |
146 | /* grn_ts_expr_bridge_token_fin() finalizes a token. */ |
147 | static void |
148 | grn_ts_expr_bridge_token_fin(grn_ctx *ctx, grn_ts_expr_bridge_token *token) |
149 | { |
150 | /* Nothing to do. */ |
151 | } |
152 | |
153 | /* grn_ts_expr_bracket_token_fin() finalizes a token. */ |
154 | static void |
155 | grn_ts_expr_bracket_token_fin(grn_ctx *ctx, grn_ts_expr_bracket_token *token) |
156 | { |
157 | /* Nothing to do. */ |
158 | } |
159 | |
160 | #define GRN_TS_EXPR_TOKEN_OPEN(TYPE, type)\ |
161 | grn_ts_expr_ ## type ## _token *new_token;\ |
162 | new_token = GRN_MALLOCN(grn_ts_expr_ ## type ## _token, 1);\ |
163 | if (!new_token) {\ |
164 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE,\ |
165 | "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x 1",\ |
166 | sizeof(grn_ts_expr_ ## type ## _token));\ |
167 | }\ |
168 | grn_ts_expr_ ## type ## _token_init(ctx, new_token, src);\ |
169 | *token = new_token; |
170 | /* grn_ts_expr_dummy_token_open() creates a token. */ |
171 | /* |
172 | static grn_rc |
173 | grn_ts_expr_dummy_token_open(grn_ctx *ctx, grn_ts_str src, |
174 | grn_ts_expr_dummy_token **token) |
175 | { |
176 | GRN_TS_EXPR_TOKEN_OPEN(DUMMY, dummy) |
177 | return GRN_SUCCESS; |
178 | } |
179 | */ |
180 | |
181 | /* grn_ts_expr_start_token_open() creates a token. */ |
182 | static grn_rc |
183 | grn_ts_expr_start_token_open(grn_ctx *ctx, grn_ts_str src, |
184 | grn_ts_expr_start_token **token) |
185 | { |
186 | GRN_TS_EXPR_TOKEN_OPEN(START, start) |
187 | return GRN_SUCCESS; |
188 | } |
189 | |
190 | /* grn_ts_expr_end_token_open() creates a token. */ |
191 | static grn_rc |
192 | grn_ts_expr_end_token_open(grn_ctx *ctx, grn_ts_str src, |
193 | grn_ts_expr_end_token **token) |
194 | { |
195 | GRN_TS_EXPR_TOKEN_OPEN(END, end) |
196 | return GRN_SUCCESS; |
197 | } |
198 | |
199 | /* grn_ts_expr_const_token_open() creates a token. */ |
200 | static grn_rc |
201 | grn_ts_expr_const_token_open(grn_ctx *ctx, grn_ts_str src, |
202 | grn_ts_expr_const_token **token) |
203 | { |
204 | GRN_TS_EXPR_TOKEN_OPEN(CONST, const) |
205 | return GRN_SUCCESS; |
206 | } |
207 | |
208 | /* grn_ts_expr_name_token_open() creates a token. */ |
209 | static grn_rc |
210 | grn_ts_expr_name_token_open(grn_ctx *ctx, grn_ts_str src, |
211 | grn_ts_expr_name_token **token) |
212 | { |
213 | GRN_TS_EXPR_TOKEN_OPEN(NAME, name) |
214 | return GRN_SUCCESS; |
215 | } |
216 | |
217 | /* grn_ts_expr_op_token_open() creates a token. */ |
218 | static grn_rc |
219 | grn_ts_expr_op_token_open(grn_ctx *ctx, grn_ts_str src, grn_ts_op_type op_type, |
220 | grn_ts_expr_op_token **token) |
221 | { |
222 | GRN_TS_EXPR_TOKEN_OPEN(OP, op) |
223 | new_token->op_type = op_type; |
224 | return GRN_SUCCESS; |
225 | } |
226 | |
227 | /* grn_ts_expr_bridge_token_open() creates a token. */ |
228 | static grn_rc |
229 | grn_ts_expr_bridge_token_open(grn_ctx *ctx, grn_ts_str src, |
230 | grn_ts_expr_bridge_token **token) |
231 | { |
232 | GRN_TS_EXPR_TOKEN_OPEN(BRIDGE, bridge) |
233 | return GRN_SUCCESS; |
234 | } |
235 | |
236 | /* grn_ts_expr_bracket_token_open() creates a token. */ |
237 | static grn_rc |
238 | grn_ts_expr_bracket_token_open(grn_ctx *ctx, grn_ts_str src, |
239 | grn_ts_expr_bracket_token **token) |
240 | { |
241 | GRN_TS_EXPR_TOKEN_OPEN(BRACKET, bracket) |
242 | return GRN_SUCCESS; |
243 | } |
244 | #undef GRN_TS_EXPR_TOKEN_OPEN |
245 | |
246 | #define GRN_TS_EXPR_TOKEN_CLOSE_CASE(TYPE, type)\ |
247 | case GRN_TS_EXPR_ ## TYPE ## _TOKEN: {\ |
248 | grn_ts_expr_ ## type ## _token *type ## _token;\ |
249 | type ## _token = (grn_ts_expr_ ## type ## _token *)token;\ |
250 | grn_ts_expr_ ## type ## _token_fin(ctx, type ## _token);\ |
251 | break;\ |
252 | } |
253 | /* grn_ts_expr_token_close() destroys a token. */ |
254 | static void |
255 | grn_ts_expr_token_close(grn_ctx *ctx, grn_ts_expr_token *token) |
256 | { |
257 | switch (token->type) { |
258 | GRN_TS_EXPR_TOKEN_CLOSE_CASE(DUMMY, dummy) |
259 | GRN_TS_EXPR_TOKEN_CLOSE_CASE(START, start) |
260 | GRN_TS_EXPR_TOKEN_CLOSE_CASE(END, end) |
261 | GRN_TS_EXPR_TOKEN_CLOSE_CASE(CONST, const) |
262 | GRN_TS_EXPR_TOKEN_CLOSE_CASE(NAME, name) |
263 | GRN_TS_EXPR_TOKEN_CLOSE_CASE(OP, op) |
264 | GRN_TS_EXPR_TOKEN_CLOSE_CASE(BRACKET, bracket) |
265 | GRN_TS_EXPR_TOKEN_CLOSE_CASE(BRIDGE, bridge) |
266 | } |
267 | GRN_FREE(token); |
268 | } |
269 | #undef GRN_TS_EXPR_TOKEN_CLOSE_CASE |
270 | |
271 | /*------------------------------------------------------------- |
272 | * grn_ts_expr_parser. |
273 | */ |
274 | |
275 | /* grn_ts_expr_parser_init() initializes a parser. */ |
276 | static void |
277 | grn_ts_expr_parser_init(grn_ctx *ctx, grn_ts_expr_parser *parser) |
278 | { |
279 | memset(parser, 0, sizeof(*parser)); |
280 | parser->builder = NULL; |
281 | grn_ts_buf_init(ctx, &parser->str_buf); |
282 | parser->tokens = NULL; |
283 | parser->dummy_tokens = NULL; |
284 | parser->stack = NULL; |
285 | } |
286 | |
287 | /* grn_ts_expr_parser_fin() finalizes a parser. */ |
288 | static void |
289 | grn_ts_expr_parser_fin(grn_ctx *ctx, grn_ts_expr_parser *parser) |
290 | { |
291 | if (parser->stack) { |
292 | GRN_FREE(parser->stack); |
293 | } |
294 | if (parser->dummy_tokens) { |
295 | size_t i; |
296 | for (i = 0; i < parser->n_dummy_tokens; i++) { |
297 | grn_ts_expr_dummy_token_fin(ctx, &parser->dummy_tokens[i]); |
298 | } |
299 | GRN_FREE(parser->dummy_tokens); |
300 | } |
301 | if (parser->tokens) { |
302 | size_t i; |
303 | for (i = 0; i < parser->n_tokens; i++) { |
304 | grn_ts_expr_token_close(ctx, parser->tokens[i]); |
305 | } |
306 | GRN_FREE(parser->tokens); |
307 | } |
308 | grn_ts_buf_fin(ctx, &parser->str_buf); |
309 | if (parser->builder) { |
310 | grn_ts_expr_builder_close(ctx, parser->builder); |
311 | } |
312 | } |
313 | |
314 | grn_rc |
315 | grn_ts_expr_parser_open(grn_ctx *ctx, grn_obj *table, |
316 | grn_ts_expr_parser **parser) |
317 | { |
318 | grn_rc rc; |
319 | grn_ts_expr_parser *new_parser; |
320 | if (!ctx) { |
321 | return GRN_INVALID_ARGUMENT; |
322 | } |
323 | if (!table || !grn_ts_obj_is_table(ctx, table) || !parser) { |
324 | GRN_TS_ERR_RETURN(GRN_INVALID_ARGUMENT, "invalid argument" ); |
325 | } |
326 | new_parser = GRN_MALLOCN(grn_ts_expr_parser, 1); |
327 | if (!new_parser) { |
328 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
329 | "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x 1" , |
330 | sizeof(grn_ts_expr_parser)); |
331 | } |
332 | grn_ts_expr_parser_init(ctx, new_parser); |
333 | rc = grn_ts_expr_builder_open(ctx, table, &new_parser->builder); |
334 | if (rc != GRN_SUCCESS) { |
335 | grn_ts_expr_parser_fin(ctx, new_parser); |
336 | GRN_FREE(new_parser); |
337 | return rc; |
338 | } |
339 | *parser = new_parser; |
340 | return GRN_SUCCESS; |
341 | } |
342 | |
343 | grn_rc |
344 | grn_ts_expr_parser_close(grn_ctx *ctx, grn_ts_expr_parser *parser) |
345 | { |
346 | if (!ctx) { |
347 | return GRN_INVALID_ARGUMENT; |
348 | } |
349 | if (!parser) { |
350 | GRN_TS_ERR_RETURN(GRN_INVALID_ARGUMENT, "invalid argument" ); |
351 | } |
352 | grn_ts_expr_parser_fin(ctx, parser); |
353 | GRN_FREE(parser); |
354 | return GRN_SUCCESS; |
355 | } |
356 | |
357 | /* grn_ts_expr_parser_tokenize_start() creates the start token. */ |
358 | static grn_rc |
359 | grn_ts_expr_parser_tokenize_start(grn_ctx *ctx, grn_ts_expr_parser *parser, |
360 | grn_ts_str str, grn_ts_expr_token **token) |
361 | { |
362 | grn_ts_str token_str = { str.ptr, 0 }; |
363 | grn_ts_expr_start_token *new_token; |
364 | grn_rc rc = grn_ts_expr_start_token_open(ctx, token_str, &new_token); |
365 | if (rc != GRN_SUCCESS) { |
366 | return rc; |
367 | } |
368 | *token = (grn_ts_expr_token *)new_token; |
369 | return GRN_SUCCESS; |
370 | } |
371 | |
372 | /* grn_ts_expr_parser_tokenize_end() creates the end token. */ |
373 | static grn_rc |
374 | grn_ts_expr_parser_tokenize_end(grn_ctx *ctx, grn_ts_expr_parser *parser, |
375 | grn_ts_str str, grn_ts_expr_token **token) |
376 | { |
377 | grn_ts_str token_str = { str.ptr, 0 }; |
378 | grn_ts_expr_end_token *new_token; |
379 | grn_rc rc = grn_ts_expr_end_token_open(ctx, token_str, &new_token); |
380 | if (rc != GRN_SUCCESS) { |
381 | return rc; |
382 | } |
383 | *token = (grn_ts_expr_token *)new_token; |
384 | return GRN_SUCCESS; |
385 | } |
386 | |
387 | /* grn_ts_expr_parser_tokenize_number() tokenizes an Int or Float literal. */ |
388 | static grn_rc |
389 | grn_ts_expr_parser_tokenize_number(grn_ctx *ctx, grn_ts_expr_parser *parser, |
390 | grn_ts_str str, grn_ts_expr_token **token) |
391 | { |
392 | char *end; |
393 | grn_rc rc; |
394 | grn_ts_int int_value; |
395 | grn_ts_str token_str; |
396 | grn_ts_expr_const_token *new_token; |
397 | |
398 | int_value = strtol(str.ptr, &end, 0); |
399 | if ((end != str.ptr) && (*end != '.') && (*end != 'e')) { |
400 | if (grn_ts_byte_is_name_char(*end)) { |
401 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, |
402 | "unterminated Int literal: \"%.*s\"" , |
403 | (int)str.size, str.ptr); |
404 | } |
405 | token_str.ptr = str.ptr; |
406 | token_str.size = end - str.ptr; |
407 | rc = grn_ts_expr_const_token_open(ctx, token_str, &new_token); |
408 | if (rc != GRN_SUCCESS) { |
409 | return rc; |
410 | } |
411 | new_token->data_kind = GRN_TS_INT; |
412 | new_token->content.as_int = int_value; |
413 | } else { |
414 | grn_ts_float float_value = strtod(str.ptr, &end); |
415 | if (end == str.ptr) { |
416 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid number literal: \"%.*s\"" , |
417 | (int)str.size, str.ptr); |
418 | } |
419 | if (grn_ts_byte_is_name_char(*end)) { |
420 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, |
421 | "unterminated Float literal: \"%.*s\"" , |
422 | (int)str.size, str.ptr); |
423 | } |
424 | token_str.ptr = str.ptr; |
425 | token_str.size = end - str.ptr; |
426 | rc = grn_ts_expr_const_token_open(ctx, token_str, &new_token); |
427 | if (rc != GRN_SUCCESS) { |
428 | return rc; |
429 | } |
430 | new_token->data_kind = GRN_TS_FLOAT; |
431 | new_token->content.as_float = float_value; |
432 | } |
433 | *token = (grn_ts_expr_token *)new_token; |
434 | return GRN_SUCCESS; |
435 | } |
436 | |
437 | /* grn_ts_expr_parser_tokenize_text() tokenizes a Text literal. */ |
438 | static grn_rc |
439 | grn_ts_expr_parser_tokenize_text(grn_ctx *ctx, grn_ts_expr_parser *parser, |
440 | grn_ts_str str, grn_ts_expr_token **token) |
441 | { |
442 | size_t i, n_escapes = 0; |
443 | grn_rc rc; |
444 | grn_ts_str token_str; |
445 | grn_ts_expr_const_token *new_token; |
446 | for (i = 1; i < str.size; i++) { |
447 | if (str.ptr[i] == '\\') { |
448 | i++; |
449 | n_escapes++; |
450 | } else if (str.ptr[i] == '"') { |
451 | break; |
452 | } |
453 | } |
454 | if (i >= str.size) { |
455 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "no closing double quote: \"%.*s\"" , |
456 | (int)str.size, str.ptr); |
457 | } |
458 | token_str.ptr = str.ptr; |
459 | token_str.size = i + 1; |
460 | rc = grn_ts_expr_const_token_open(ctx, token_str, &new_token); |
461 | if (rc != GRN_SUCCESS) { |
462 | return rc; |
463 | } |
464 | new_token->data_kind = GRN_TS_TEXT; |
465 | if (n_escapes) { |
466 | char *buf_ptr; |
467 | const char *str_ptr = str.ptr + 1; |
468 | size_t size = token_str.size - 2 - n_escapes; |
469 | rc = grn_ts_buf_resize(ctx, &new_token->buf, size); |
470 | if (rc != GRN_SUCCESS) { |
471 | grn_ts_expr_token_close(ctx, (grn_ts_expr_token *)new_token); |
472 | return rc; |
473 | } |
474 | buf_ptr = (char *)new_token->buf.ptr; |
475 | for (i = 0; i < size; i++) { |
476 | if (str_ptr[i] == '\\') { |
477 | str_ptr++; |
478 | } |
479 | buf_ptr[i] = str_ptr[i]; |
480 | } |
481 | new_token->content.as_text.ptr = buf_ptr; |
482 | new_token->content.as_text.size = size; |
483 | } else { |
484 | new_token->content.as_text.ptr = token_str.ptr + 1; |
485 | new_token->content.as_text.size = token_str.size - 2; |
486 | } |
487 | *token = (grn_ts_expr_token *)new_token; |
488 | return GRN_SUCCESS; |
489 | } |
490 | |
491 | /* grn_ts_expr_parser_tokenize_name() tokenizes a Bool literal or a name. */ |
492 | static grn_rc |
493 | grn_ts_expr_parser_tokenize_name(grn_ctx *ctx, grn_ts_expr_parser *parser, |
494 | grn_ts_str str, grn_ts_expr_token **token) |
495 | { |
496 | size_t i; |
497 | grn_ts_str token_str; |
498 | for (i = 1; i < str.size; i++) { |
499 | if (!grn_ts_byte_is_name_char(str.ptr[i])) { |
500 | break; |
501 | } |
502 | } |
503 | token_str.ptr = str.ptr; |
504 | token_str.size = i; |
505 | |
506 | if (grn_ts_str_is_bool(token_str)) { |
507 | grn_ts_expr_const_token *new_token; |
508 | grn_rc rc = grn_ts_expr_const_token_open(ctx, token_str, &new_token); |
509 | if (rc != GRN_SUCCESS) { |
510 | return rc; |
511 | } |
512 | new_token->data_kind = GRN_TS_BOOL; |
513 | if (token_str.ptr[0] == 't') { |
514 | new_token->content.as_bool = GRN_TRUE; |
515 | } else { |
516 | new_token->content.as_bool = GRN_FALSE; |
517 | } |
518 | *token = (grn_ts_expr_token *)new_token; |
519 | return GRN_SUCCESS; |
520 | } |
521 | return grn_ts_expr_name_token_open(ctx, token_str, token); |
522 | } |
523 | |
524 | /* grn_ts_expr_parser_tokenize_bridge() tokenizes a bridge. */ |
525 | static grn_rc |
526 | grn_ts_expr_parser_tokenize_bridge(grn_ctx *ctx, grn_ts_expr_parser *parser, |
527 | grn_ts_str str, grn_ts_expr_token **token) |
528 | { |
529 | grn_ts_str token_str = { str.ptr, 1 }; |
530 | grn_ts_expr_bridge_token *new_token; |
531 | grn_rc rc = grn_ts_expr_bridge_token_open(ctx, token_str, &new_token); |
532 | if (rc != GRN_SUCCESS) { |
533 | return rc; |
534 | } |
535 | *token = (grn_ts_expr_token *)new_token; |
536 | return GRN_SUCCESS; |
537 | } |
538 | |
539 | /* grn_ts_expr_parser_tokenize_bracket() tokenizes a bracket. */ |
540 | static grn_rc |
541 | grn_ts_expr_parser_tokenize_bracket(grn_ctx *ctx, grn_ts_expr_parser *parser, |
542 | grn_ts_str str, |
543 | grn_ts_expr_token **token) |
544 | { |
545 | grn_ts_str token_str = { str.ptr, 1 }; |
546 | grn_ts_expr_bracket_token *new_token; |
547 | grn_rc rc = grn_ts_expr_bracket_token_open(ctx, token_str, &new_token); |
548 | if (rc != GRN_SUCCESS) { |
549 | return rc; |
550 | } |
551 | *token = (grn_ts_expr_token *)new_token; |
552 | return GRN_SUCCESS; |
553 | } |
554 | |
555 | /* |
556 | * grn_ts_expr_parsre_tokenize_sign() tokenizes an operator '+' or '-'. |
557 | * Note that '+' and '-' have two roles each. |
558 | * '+' is GRN_TS_OP_POSITIVE or GRN_TS_OP_PLUS. |
559 | * '-' is GRN_TS_OP_NEGATIVE or GRN_TS_OP_MINUS. |
560 | */ |
561 | static grn_rc |
562 | grn_ts_expr_parser_tokenize_sign(grn_ctx *ctx, grn_ts_expr_parser *parser, |
563 | grn_ts_str str, grn_ts_expr_token **token) |
564 | { |
565 | size_t n_args; |
566 | grn_rc rc; |
567 | grn_ts_op_type op_type; |
568 | grn_ts_str token_str = { str.ptr, 1 }; |
569 | grn_ts_expr_token *prev_token = parser->tokens[parser->n_tokens - 1]; |
570 | grn_ts_expr_op_token *new_token; |
571 | switch (prev_token->type) { |
572 | case GRN_TS_EXPR_START_TOKEN: |
573 | case GRN_TS_EXPR_OP_TOKEN: { |
574 | n_args = 1; |
575 | break; |
576 | } |
577 | case GRN_TS_EXPR_CONST_TOKEN: |
578 | case GRN_TS_EXPR_NAME_TOKEN: { |
579 | n_args = 2; |
580 | break; |
581 | } |
582 | case GRN_TS_EXPR_BRACKET_TOKEN: { |
583 | grn_ts_str bracket; |
584 | const grn_ts_expr_bracket_token *bracket_token; |
585 | bracket_token = (const grn_ts_expr_bracket_token *)prev_token; |
586 | bracket = bracket_token->src; |
587 | switch (bracket.ptr[0]) { |
588 | case '(': case '[': { |
589 | n_args = 1; |
590 | break; |
591 | } |
592 | case ')': case ']': { |
593 | n_args = 2; |
594 | break; |
595 | } |
596 | default: { |
597 | GRN_TS_ERR_RETURN(GRN_OBJECT_CORRUPT, "undefined bracket: \"%.*s\"" , |
598 | (int)bracket.size, bracket.ptr); |
599 | } |
600 | } |
601 | break; |
602 | } |
603 | default: { |
604 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence: %d" , |
605 | prev_token->type); |
606 | } |
607 | } |
608 | if (token_str.ptr[0] == '+') { |
609 | op_type = (n_args == 1) ? GRN_TS_OP_POSITIVE : GRN_TS_OP_PLUS; |
610 | } else { |
611 | op_type = (n_args == 1) ? GRN_TS_OP_NEGATIVE : GRN_TS_OP_MINUS; |
612 | } |
613 | rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token); |
614 | if (rc != GRN_SUCCESS) { |
615 | return rc; |
616 | } |
617 | *token = (grn_ts_expr_token *)new_token; |
618 | return GRN_SUCCESS; |
619 | } |
620 | |
621 | /* grn_ts_expr_parser_tokenize_op() tokenizes an operator. */ |
622 | static grn_rc |
623 | grn_ts_expr_parser_tokenize_op(grn_ctx *ctx, grn_ts_expr_parser *parser, |
624 | grn_ts_str str, grn_ts_expr_token **token) |
625 | { |
626 | grn_rc rc = GRN_SUCCESS; |
627 | grn_ts_str token_str = str; |
628 | grn_ts_op_type op_type; |
629 | grn_ts_expr_op_token *new_token; |
630 | switch (str.ptr[0]) { |
631 | case '+': case '-': { |
632 | return grn_ts_expr_parser_tokenize_sign(ctx, parser, str, token); |
633 | } |
634 | case '!': { |
635 | if ((str.size >= 2) && (str.ptr[1] == '=')) { |
636 | token_str.size = 2; |
637 | op_type = GRN_TS_OP_NOT_EQUAL; |
638 | } else { |
639 | token_str.size = 1; |
640 | op_type = GRN_TS_OP_LOGICAL_NOT; |
641 | } |
642 | rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token); |
643 | break; |
644 | } |
645 | #define GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE(label, TYPE_1, TYPE_2, TYPE_3,\ |
646 | TYPE_EQUAL)\ |
647 | case label: {\ |
648 | if ((str.size >= 2) && (str.ptr[1] == '=')) {\ |
649 | token_str.size = 2;\ |
650 | op_type = GRN_TS_OP_ ## TYPE_EQUAL;\ |
651 | } else if ((str.size >= 2) && (str.ptr[1] == label)) {\ |
652 | if ((str.size >= 3) && (str.ptr[2] == label)) {\ |
653 | token_str.size = 3;\ |
654 | op_type = GRN_TS_OP_ ## TYPE_3;\ |
655 | } else {\ |
656 | token_str.size = 2;\ |
657 | op_type = GRN_TS_OP_ ## TYPE_2;\ |
658 | }\ |
659 | } else {\ |
660 | token_str.size = 1;\ |
661 | op_type = GRN_TS_OP_ ## TYPE_1;\ |
662 | }\ |
663 | rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token);\ |
664 | break;\ |
665 | } |
666 | GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('<', LESS, SHIFT_ARITHMETIC_LEFT, |
667 | SHIFT_LOGICAL_LEFT, LESS_EQUAL) |
668 | GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('>', GREATER, SHIFT_ARITHMETIC_RIGHT, |
669 | SHIFT_LOGICAL_RIGHT, GREATER_EQUAL) |
670 | #undef GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE |
671 | case '&': { |
672 | if ((str.size >= 2) && (str.ptr[1] == '&')) { |
673 | token_str.size = 2; |
674 | op_type = GRN_TS_OP_LOGICAL_AND; |
675 | } else if ((str.size >= 2) && (str.ptr[1] == '&')) { |
676 | token_str.size = 2; |
677 | op_type = GRN_TS_OP_LOGICAL_SUB; |
678 | } else { |
679 | token_str.size = 1; |
680 | op_type = GRN_TS_OP_BITWISE_AND; |
681 | } |
682 | rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token); |
683 | break; |
684 | } |
685 | case '|': { |
686 | if ((str.size >= 2) && (str.ptr[1] == '|')) { |
687 | token_str.size = 2; |
688 | op_type = GRN_TS_OP_LOGICAL_OR; |
689 | } else { |
690 | token_str.size = 1; |
691 | op_type = GRN_TS_OP_BITWISE_OR; |
692 | } |
693 | rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token); |
694 | break; |
695 | } |
696 | case '=': { |
697 | if ((str.size < 2) || (str.ptr[1] != '=')) { |
698 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, |
699 | "single equal not available: =\"%.*s\"" , |
700 | (int)str.size, str.ptr); |
701 | } |
702 | token_str.size = 2; |
703 | rc = grn_ts_expr_op_token_open(ctx, token_str, GRN_TS_OP_EQUAL, |
704 | &new_token); |
705 | break; |
706 | } |
707 | #define GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE(label, TYPE)\ |
708 | case label: {\ |
709 | token_str.size = 1;\ |
710 | rc = grn_ts_expr_op_token_open(ctx, token_str, GRN_TS_OP_ ## TYPE,\ |
711 | &new_token);\ |
712 | break;\ |
713 | } |
714 | GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('~', BITWISE_NOT) |
715 | GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('^', BITWISE_XOR) |
716 | GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('*', MULTIPLICATION) |
717 | GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('/', DIVISION) |
718 | GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('%', MODULUS) |
719 | #undef GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE |
720 | case '@': { |
721 | if ((str.size >= 2) && (str.ptr[1] == '^')) { |
722 | token_str.size = 2; |
723 | op_type = GRN_TS_OP_PREFIX_MATCH; |
724 | } else if ((str.size >= 2) && (str.ptr[1] == '$')) { |
725 | token_str.size = 2; |
726 | op_type = GRN_TS_OP_SUFFIX_MATCH; |
727 | } else { |
728 | token_str.size = 1; |
729 | op_type = GRN_TS_OP_MATCH; |
730 | } |
731 | rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token); |
732 | break; |
733 | } |
734 | default: { |
735 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid character: \"%.*s\"" , |
736 | (int)str.size, str.ptr); |
737 | } |
738 | } |
739 | if (rc != GRN_SUCCESS) { |
740 | return rc; |
741 | } |
742 | *token = (grn_ts_expr_token *)new_token; |
743 | return GRN_SUCCESS; |
744 | } |
745 | |
746 | /* grn_ts_expr_parser_tokenize_next() extracts the next token. */ |
747 | static grn_rc |
748 | grn_ts_expr_parser_tokenize_next(grn_ctx *ctx, grn_ts_expr_parser *parser, |
749 | grn_ts_str str, grn_ts_expr_token **token) |
750 | { |
751 | grn_ts_str rest; |
752 | if (!parser->n_tokens) { |
753 | return grn_ts_expr_parser_tokenize_start(ctx, parser, str, token); |
754 | } |
755 | rest = grn_ts_str_trim_left(str); |
756 | if (!rest.size) { |
757 | return grn_ts_expr_parser_tokenize_end(ctx, parser, rest, token); |
758 | } |
759 | if (grn_ts_str_has_number_prefix(rest)) { |
760 | grn_ts_expr_token *prev_token; |
761 | if ((rest.ptr[0] != '+') && (rest.ptr[0] != '-')) { |
762 | return grn_ts_expr_parser_tokenize_number(ctx, parser, rest, token); |
763 | } |
764 | prev_token = parser->tokens[parser->n_tokens - 1]; |
765 | switch (prev_token->type) { |
766 | case GRN_TS_EXPR_START_TOKEN: |
767 | case GRN_TS_EXPR_OP_TOKEN: { |
768 | return grn_ts_expr_parser_tokenize_number(ctx, parser, rest, token); |
769 | } |
770 | case GRN_TS_EXPR_BRACKET_TOKEN: { |
771 | if ((prev_token->src.ptr[0] == '(') || |
772 | (prev_token->src.ptr[0] == '[')) { |
773 | return grn_ts_expr_parser_tokenize_number(ctx, parser, rest, token); |
774 | } |
775 | break; |
776 | } |
777 | default: { |
778 | break; |
779 | } |
780 | } |
781 | } |
782 | if (rest.ptr[0] == '"') { |
783 | return grn_ts_expr_parser_tokenize_text(ctx, parser, rest, token); |
784 | } |
785 | if (grn_ts_byte_is_name_char(rest.ptr[0])) { |
786 | return grn_ts_expr_parser_tokenize_name(ctx, parser, rest, token); |
787 | } |
788 | switch (rest.ptr[0]) { |
789 | case '(': case ')': case '[': case ']': { |
790 | return grn_ts_expr_parser_tokenize_bracket(ctx, parser, rest, token); |
791 | } |
792 | case '.': { |
793 | return grn_ts_expr_parser_tokenize_bridge(ctx, parser, rest, token); |
794 | } |
795 | default: { |
796 | return grn_ts_expr_parser_tokenize_op(ctx, parser, rest, token); |
797 | } |
798 | } |
799 | } |
800 | |
801 | /* |
802 | * grn_ts_expr_parser_reserve_tokens() extends a token buffer for a new token. |
803 | */ |
804 | static grn_rc |
805 | grn_ts_expr_parser_reserve_tokens(grn_ctx *ctx, grn_ts_expr_parser *parser) |
806 | { |
807 | size_t i, n_bytes, new_max_n_tokens; |
808 | grn_ts_expr_token **new_tokens; |
809 | if (parser->n_tokens < parser->max_n_tokens) { |
810 | return GRN_SUCCESS; |
811 | } |
812 | new_max_n_tokens = parser->n_tokens * 2; |
813 | if (!new_max_n_tokens) { |
814 | new_max_n_tokens = 1; |
815 | } |
816 | n_bytes = sizeof(grn_ts_expr_token *) * new_max_n_tokens; |
817 | new_tokens = (grn_ts_expr_token **)GRN_REALLOC(parser->tokens, n_bytes); |
818 | if (!new_tokens) { |
819 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
820 | "GRN_REALLOC failed: %" GRN_FMT_SIZE, |
821 | n_bytes); |
822 | } |
823 | for (i = parser->n_tokens; i < new_max_n_tokens; i++) { |
824 | new_tokens[i] = NULL; |
825 | } |
826 | parser->tokens = new_tokens; |
827 | parser->max_n_tokens = new_max_n_tokens; |
828 | return GRN_SUCCESS; |
829 | } |
830 | |
831 | /* grn_ts_expr_parser_tokenize() tokenizes a string. */ |
832 | static grn_rc |
833 | grn_ts_expr_parser_tokenize(grn_ctx *ctx, grn_ts_expr_parser *parser, |
834 | grn_ts_str str) |
835 | { |
836 | grn_ts_str rest = str; |
837 | const char *end = str.ptr + str.size; |
838 | grn_ts_expr_token *token = NULL; |
839 | GRN_TS_DEBUG("str = \"%.*s\"" , (int)str.size, str.ptr); |
840 | do { |
841 | grn_rc rc = grn_ts_expr_parser_reserve_tokens(ctx, parser); |
842 | if (rc != GRN_SUCCESS) { |
843 | return rc; |
844 | } |
845 | rc = grn_ts_expr_parser_tokenize_next(ctx, parser, rest, &token); |
846 | if (rc != GRN_SUCCESS) { |
847 | return rc; |
848 | } |
849 | if ((token->type != GRN_TS_EXPR_START_TOKEN) && |
850 | (token->type != GRN_TS_EXPR_END_TOKEN)) { |
851 | GRN_TS_DEBUG("token = \"%.*s\"" , (int)token->src.size, token->src.ptr); |
852 | } |
853 | parser->tokens[parser->n_tokens++] = token; |
854 | rest.ptr = token->src.ptr + token->src.size; |
855 | rest.size = end - rest.ptr; |
856 | } while (token->type != GRN_TS_EXPR_END_TOKEN); |
857 | return GRN_SUCCESS; |
858 | } |
859 | |
860 | /* grn_ts_expr_parser_push_const() pushes a token to an expression. */ |
861 | static grn_rc |
862 | grn_ts_expr_parser_push_const(grn_ctx *ctx, grn_ts_expr_parser *parser, |
863 | grn_ts_expr_const_token *token) |
864 | { |
865 | return grn_ts_expr_builder_push_const(ctx, parser->builder, token->data_kind, |
866 | GRN_DB_VOID, token->content); |
867 | } |
868 | |
869 | /* grn_ts_expr_parser_push_name() pushes a token to an expression. */ |
870 | static grn_rc |
871 | grn_ts_expr_parser_push_name(grn_ctx *ctx, grn_ts_expr_parser *parser, |
872 | grn_ts_expr_name_token *token) |
873 | { |
874 | return grn_ts_expr_builder_push_name(ctx, parser->builder, token->src); |
875 | } |
876 | |
877 | /* grn_ts_expr_parser_push_op() pushes a token to an expression. */ |
878 | static grn_rc |
879 | grn_ts_expr_parser_push_op(grn_ctx *ctx, grn_ts_expr_parser *parser, |
880 | grn_ts_expr_op_token *token) |
881 | { |
882 | return grn_ts_expr_builder_push_op(ctx, parser->builder, token->op_type); |
883 | } |
884 | |
885 | /* |
886 | * grn_ts_expr_parser_apply_one() applies a bridge or prior operator. |
887 | * If there is no target, this function returns GRN_END_OF_DATA. |
888 | */ |
889 | // FIXME: Support a ternary operator. |
890 | static grn_rc |
891 | grn_ts_expr_parser_apply_one(grn_ctx *ctx, grn_ts_expr_parser *parser, |
892 | grn_ts_op_precedence precedence_threshold) |
893 | { |
894 | grn_rc rc; |
895 | grn_ts_str src; |
896 | grn_ts_expr_token **stack = parser->stack; |
897 | grn_ts_expr_dummy_token *dummy_token; |
898 | size_t n_args, depth = parser->stack_depth; |
899 | if (depth < 2) { |
900 | return GRN_END_OF_DATA; |
901 | } |
902 | if (stack[depth - 1]->type != GRN_TS_EXPR_DUMMY_TOKEN) { |
903 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "argument must be dummy token" ); |
904 | } |
905 | |
906 | /* Check the number of arguments. */ |
907 | switch (stack[depth - 2]->type) { |
908 | case GRN_TS_EXPR_BRIDGE_TOKEN: { |
909 | rc = grn_ts_expr_builder_end_subexpr(ctx, parser->builder); |
910 | if (rc != GRN_SUCCESS) { |
911 | return rc; |
912 | } |
913 | n_args = 2; |
914 | break; |
915 | } |
916 | case GRN_TS_EXPR_OP_TOKEN: { |
917 | grn_ts_expr_op_token *op_token; |
918 | grn_ts_op_precedence precedence; |
919 | op_token = (grn_ts_expr_op_token *)stack[depth - 2]; |
920 | precedence = grn_ts_op_get_precedence(op_token->op_type); |
921 | if (precedence < precedence_threshold) { |
922 | return GRN_END_OF_DATA; |
923 | } |
924 | rc = grn_ts_expr_parser_push_op(ctx, parser, op_token); |
925 | if (rc != GRN_SUCCESS) { |
926 | return rc; |
927 | } |
928 | n_args = grn_ts_op_get_n_args(op_token->op_type); |
929 | break; |
930 | } |
931 | default: { |
932 | return GRN_END_OF_DATA; |
933 | } |
934 | } |
935 | |
936 | /* Concatenate the source strings. */ |
937 | switch (n_args) { |
938 | case 1: { |
939 | grn_ts_expr_token *arg = stack[depth - 1]; |
940 | src.ptr = stack[depth - 2]->src.ptr; |
941 | src.size = (arg->src.ptr + arg->src.size) - src.ptr; |
942 | break; |
943 | } |
944 | case 2: { |
945 | grn_ts_expr_token *args[2] = { stack[depth - 3], stack[depth - 1] }; |
946 | src.ptr = args[0]->src.ptr; |
947 | src.size = (args[1]->src.ptr + args[1]->src.size) - src.ptr; |
948 | break; |
949 | } |
950 | default: { |
951 | GRN_TS_ERR_RETURN(GRN_OPERATION_NOT_SUPPORTED, |
952 | "invalid #arguments: %" GRN_FMT_SIZE, |
953 | n_args); |
954 | } |
955 | } |
956 | |
957 | /* Replace the operator and argument tokens with a dummy token. */ |
958 | dummy_token = &parser->dummy_tokens[parser->n_dummy_tokens++]; |
959 | GRN_TS_DEBUG("dummy token: \"%.*s\"" , (int)src.size, src.ptr); |
960 | grn_ts_expr_dummy_token_init(ctx, dummy_token, src); |
961 | depth -= n_args + 1; |
962 | stack[depth++] = dummy_token; |
963 | parser->stack_depth = depth; |
964 | return GRN_SUCCESS; |
965 | } |
966 | |
967 | /* grn_ts_expr_parser_apply() applies bridges and prior operators. */ |
968 | static grn_rc |
969 | grn_ts_expr_parser_apply(grn_ctx *ctx, grn_ts_expr_parser *parser, |
970 | grn_ts_op_precedence precedence_threshold) |
971 | { |
972 | for ( ; ; ) { |
973 | grn_rc rc = grn_ts_expr_parser_apply_one(ctx, parser, |
974 | precedence_threshold); |
975 | if (rc == GRN_END_OF_DATA) { |
976 | return GRN_SUCCESS; |
977 | } else if (rc != GRN_SUCCESS) { |
978 | return rc; |
979 | } |
980 | } |
981 | } |
982 | |
983 | /* grn_ts_expr_parser_analyze_op() analyzes a token. */ |
984 | static grn_rc |
985 | grn_ts_expr_parser_analyze_op(grn_ctx *ctx, grn_ts_expr_parser *parser, |
986 | grn_ts_expr_op_token *token) |
987 | { |
988 | size_t n_args = grn_ts_op_get_n_args(token->op_type); |
989 | grn_ts_expr_token *ex_token = parser->stack[parser->stack_depth - 1]; |
990 | if (n_args == 1) { |
991 | if (ex_token->type == GRN_TS_EXPR_DUMMY_TOKEN) { |
992 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence" ); |
993 | } |
994 | } else if (n_args == 2) { |
995 | grn_ts_op_precedence precedence = grn_ts_op_get_precedence(token->op_type); |
996 | grn_rc rc = grn_ts_expr_parser_apply(ctx, parser, precedence); |
997 | if (rc != GRN_SUCCESS) { |
998 | return rc; |
999 | } |
1000 | } |
1001 | parser->stack[parser->stack_depth++] = (grn_ts_expr_token *)token; |
1002 | return GRN_SUCCESS; |
1003 | } |
1004 | |
1005 | /* grn_ts_expr_parser_analyze_bridge() analyzes a token. */ |
1006 | static grn_rc |
1007 | grn_ts_expr_parser_analyze_bridge(grn_ctx *ctx, grn_ts_expr_parser *parser, |
1008 | grn_ts_expr_bridge_token *token) |
1009 | { |
1010 | grn_rc rc = grn_ts_expr_builder_begin_subexpr(ctx, parser->builder); |
1011 | if (rc != GRN_SUCCESS) { |
1012 | return rc; |
1013 | } |
1014 | parser->stack[parser->stack_depth++] = (grn_ts_expr_token *)token; |
1015 | return GRN_SUCCESS; |
1016 | } |
1017 | |
1018 | /* grn_ts_expr_parser_analyze_bracket() analyzes a token. */ |
1019 | static grn_rc |
1020 | grn_ts_expr_parser_analyze_bracket(grn_ctx *ctx, grn_ts_expr_parser *parser, |
1021 | grn_ts_expr_bracket_token *token) |
1022 | { |
1023 | grn_ts_expr_token *ex_token = parser->stack[parser->stack_depth - 1]; |
1024 | switch (token->src.ptr[0]) { |
1025 | case '(': { |
1026 | if (ex_token->type == GRN_TS_EXPR_DUMMY_TOKEN) { |
1027 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence" ); |
1028 | } |
1029 | parser->stack[parser->stack_depth++] = (grn_ts_expr_token *)token; |
1030 | return GRN_SUCCESS; |
1031 | } |
1032 | case '[': { |
1033 | if (ex_token->type != GRN_TS_EXPR_DUMMY_TOKEN) { |
1034 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence" ); |
1035 | } |
1036 | parser->stack[parser->stack_depth++] = (grn_ts_expr_token *)token; |
1037 | return GRN_SUCCESS; |
1038 | } |
1039 | case ')': case ']': { |
1040 | grn_ts_expr_token *ex_ex_token; |
1041 | grn_rc rc = grn_ts_expr_parser_apply(ctx, parser, 0); |
1042 | if (rc != GRN_SUCCESS) { |
1043 | return rc; |
1044 | } |
1045 | if (parser->stack_depth < 2) { |
1046 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence" ); |
1047 | } |
1048 | ex_ex_token = parser->stack[parser->stack_depth - 2]; |
1049 | if (ex_ex_token->type != GRN_TS_EXPR_BRACKET_TOKEN) { |
1050 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence" ); |
1051 | } |
1052 | if (token->src.ptr[0] == ')') { |
1053 | size_t depth = parser->stack_depth; |
1054 | grn_ts_str src; |
1055 | grn_ts_expr_dummy_token *dummy_token; |
1056 | if (ex_ex_token->src.ptr[0] != '(') { |
1057 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence" ); |
1058 | } |
1059 | src.ptr = ex_ex_token->src.ptr; |
1060 | src.size = (token->src.ptr + token->src.size) - src.ptr; |
1061 | dummy_token = &parser->dummy_tokens[parser->n_dummy_tokens++]; |
1062 | GRN_TS_DEBUG("dummy token: \"%.*s\"" , (int)src.size, src.ptr); |
1063 | grn_ts_expr_dummy_token_init(ctx, dummy_token, src); |
1064 | parser->stack[depth - 2] = dummy_token; |
1065 | parser->stack_depth--; |
1066 | // TODO: Apply a function. |
1067 | } else if (token->src.ptr[0] == ']') { |
1068 | size_t depth = parser->stack_depth; |
1069 | if (ex_ex_token->src.ptr[0] != '[') { |
1070 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence" ); |
1071 | } |
1072 | parser->stack[depth - 2] = parser->stack[depth - 1]; |
1073 | parser->stack_depth--; |
1074 | // TODO: Push a subscript operator. |
1075 | } |
1076 | return GRN_SUCCESS; |
1077 | } |
1078 | default: { |
1079 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "undefined bracket: \"%.*s\"" , |
1080 | (int)token->src.size, token->src.ptr); |
1081 | } |
1082 | } |
1083 | } |
1084 | |
1085 | /* grn_ts_expr_parser_analyze_token() analyzes a token. */ |
1086 | static grn_rc |
1087 | grn_ts_expr_parser_analyze_token(grn_ctx *ctx, grn_ts_expr_parser *parser, |
1088 | grn_ts_expr_token *token) |
1089 | { |
1090 | switch (token->type) { |
1091 | case GRN_TS_EXPR_START_TOKEN: { |
1092 | parser->stack[parser->stack_depth++] = token; |
1093 | return GRN_SUCCESS; |
1094 | } |
1095 | case GRN_TS_EXPR_END_TOKEN: { |
1096 | return grn_ts_expr_parser_apply(ctx, parser, 0); |
1097 | } |
1098 | case GRN_TS_EXPR_CONST_TOKEN: { |
1099 | grn_ts_expr_const_token *const_token = (grn_ts_expr_const_token *)token; |
1100 | grn_ts_expr_dummy_token *dummy_token; |
1101 | grn_rc rc = grn_ts_expr_parser_push_const(ctx, parser, const_token); |
1102 | if (rc != GRN_SUCCESS) { |
1103 | return rc; |
1104 | } |
1105 | dummy_token = &parser->dummy_tokens[parser->n_dummy_tokens++]; |
1106 | grn_ts_expr_dummy_token_init(ctx, dummy_token, token->src); |
1107 | parser->stack[parser->stack_depth++] = dummy_token; |
1108 | return GRN_SUCCESS; |
1109 | } |
1110 | case GRN_TS_EXPR_NAME_TOKEN: { |
1111 | grn_ts_expr_name_token *name_token = (grn_ts_expr_name_token *)token; |
1112 | grn_ts_expr_dummy_token *dummy_token; |
1113 | grn_rc rc = grn_ts_expr_parser_push_name(ctx, parser, name_token); |
1114 | if (rc != GRN_SUCCESS) { |
1115 | return rc; |
1116 | } |
1117 | dummy_token = &parser->dummy_tokens[parser->n_dummy_tokens++]; |
1118 | grn_ts_expr_dummy_token_init(ctx, dummy_token, token->src); |
1119 | parser->stack[parser->stack_depth++] = dummy_token; |
1120 | return GRN_SUCCESS; |
1121 | } |
1122 | case GRN_TS_EXPR_OP_TOKEN: { |
1123 | grn_ts_expr_op_token *op_token = (grn_ts_expr_op_token *)token; |
1124 | return grn_ts_expr_parser_analyze_op(ctx, parser, op_token); |
1125 | } |
1126 | case GRN_TS_EXPR_BRIDGE_TOKEN: { |
1127 | grn_ts_expr_bridge_token *bridge_token; |
1128 | bridge_token = (grn_ts_expr_bridge_token *)token; |
1129 | return grn_ts_expr_parser_analyze_bridge(ctx, parser, bridge_token); |
1130 | } |
1131 | case GRN_TS_EXPR_BRACKET_TOKEN: { |
1132 | grn_ts_expr_bracket_token *bracket_token; |
1133 | bracket_token = (grn_ts_expr_bracket_token *)token; |
1134 | return grn_ts_expr_parser_analyze_bracket(ctx, parser, bracket_token); |
1135 | } |
1136 | default: { |
1137 | GRN_TS_ERR_RETURN(GRN_OBJECT_CORRUPT, "invalid token type: %d" , |
1138 | token->type); |
1139 | } |
1140 | } |
1141 | } |
1142 | |
1143 | /* grn_ts_expr_parser_analyze() analyzes tokens. */ |
1144 | static grn_rc |
1145 | grn_ts_expr_parser_analyze(grn_ctx *ctx, grn_ts_expr_parser *parser) |
1146 | { |
1147 | size_t i; |
1148 | |
1149 | /* Reserve temporary work spaces. */ |
1150 | if (parser->n_tokens > parser->max_n_dummy_tokens) { |
1151 | size_t n_bytes = sizeof(grn_ts_expr_dummy_token) * parser->n_tokens; |
1152 | grn_ts_expr_dummy_token *dummy_tokens = parser->dummy_tokens; |
1153 | grn_ts_expr_dummy_token *new_dummy_tokens; |
1154 | new_dummy_tokens = (grn_ts_expr_dummy_token *)GRN_REALLOC(dummy_tokens, |
1155 | n_bytes); |
1156 | if (!new_dummy_tokens) { |
1157 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
1158 | "GRN_REALLOC failed: %" GRN_FMT_SIZE, n_bytes); |
1159 | } |
1160 | parser->dummy_tokens = new_dummy_tokens; |
1161 | parser->max_n_dummy_tokens = parser->n_tokens; |
1162 | } |
1163 | if (parser->n_tokens > parser->stack_size) { |
1164 | size_t n_bytes = sizeof(grn_ts_expr_token *) * parser->n_tokens; |
1165 | grn_ts_expr_token **new_stack; |
1166 | new_stack = (grn_ts_expr_token **)GRN_REALLOC(parser->stack, n_bytes); |
1167 | if (!new_stack) { |
1168 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
1169 | "GRN_REALLOC failed: %" GRN_FMT_SIZE, n_bytes); |
1170 | } |
1171 | parser->stack = new_stack; |
1172 | parser->stack_size = parser->n_tokens; |
1173 | } |
1174 | |
1175 | /* Analyze tokens. */ |
1176 | for (i = 0; i < parser->n_tokens; i++) { |
1177 | grn_rc rc; |
1178 | rc = grn_ts_expr_parser_analyze_token(ctx, parser, parser->tokens[i]); |
1179 | if (rc != GRN_SUCCESS) { |
1180 | return rc; |
1181 | } |
1182 | } |
1183 | if (parser->stack_depth != 2) { |
1184 | GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, |
1185 | "tokens left in stack: %" GRN_FMT_SIZE, |
1186 | parser->stack_depth); |
1187 | } |
1188 | return GRN_SUCCESS; |
1189 | } |
1190 | |
1191 | /* |
1192 | * grn_ts_expr_parser_clear() clears the internal states for parsing the next |
1193 | * string. |
1194 | */ |
1195 | static void |
1196 | grn_ts_expr_parser_clear(grn_ctx *ctx, grn_ts_expr_parser *parser) |
1197 | { |
1198 | parser->stack_depth = 0; |
1199 | if (parser->dummy_tokens) { |
1200 | size_t i; |
1201 | for (i = 0; i < parser->n_dummy_tokens; i++) { |
1202 | grn_ts_expr_dummy_token_fin(ctx, &parser->dummy_tokens[i]); |
1203 | } |
1204 | parser->n_dummy_tokens = 0; |
1205 | } |
1206 | if (parser->tokens) { |
1207 | size_t i; |
1208 | for (i = 0; i < parser->n_tokens; i++) { |
1209 | grn_ts_expr_token_close(ctx, parser->tokens[i]); |
1210 | } |
1211 | parser->n_tokens = 0; |
1212 | } |
1213 | grn_ts_expr_builder_clear(ctx, parser->builder); |
1214 | } |
1215 | |
1216 | grn_rc |
1217 | grn_ts_expr_parser_parse(grn_ctx *ctx, grn_ts_expr_parser *parser, |
1218 | grn_ts_str str, grn_ts_expr **expr) |
1219 | { |
1220 | grn_rc rc; |
1221 | if (!ctx) { |
1222 | return GRN_INVALID_ARGUMENT; |
1223 | } |
1224 | if (!parser || (!str.ptr && str.size)) { |
1225 | GRN_TS_ERR_RETURN(GRN_INVALID_ARGUMENT, "invalid argument" ); |
1226 | } |
1227 | grn_ts_expr_parser_clear(ctx, parser); |
1228 | rc = grn_ts_buf_reserve(ctx, &parser->str_buf, str.size + 1); |
1229 | if (rc != GRN_SUCCESS) { |
1230 | return rc; |
1231 | } |
1232 | grn_memcpy(parser->str_buf.ptr, str.ptr, str.size); |
1233 | ((char *)parser->str_buf.ptr)[str.size] = '\0'; |
1234 | str.ptr = (const char *)parser->str_buf.ptr; |
1235 | rc = grn_ts_expr_parser_tokenize(ctx, parser, str); |
1236 | if (rc != GRN_SUCCESS) { |
1237 | return rc; |
1238 | } |
1239 | rc = grn_ts_expr_parser_analyze(ctx, parser); |
1240 | if (rc != GRN_SUCCESS) { |
1241 | return rc; |
1242 | } |
1243 | return grn_ts_expr_builder_complete(ctx, parser->builder, expr); |
1244 | } |
1245 | |
1246 | grn_rc |
1247 | grn_ts_expr_parser_split(grn_ctx *ctx, grn_ts_expr_parser *parser, |
1248 | grn_ts_str str, grn_ts_str *first, grn_ts_str *rest) |
1249 | { |
1250 | size_t i; |
1251 | char stack_top; |
1252 | grn_rc rc = GRN_SUCCESS; |
1253 | grn_ts_buf stack; |
1254 | |
1255 | // FIXME: `stack` should be a member of `parser`. |
1256 | grn_ts_buf_init(ctx, &stack); |
1257 | for ( ; ; ) { |
1258 | str = grn_ts_str_trim_left(str); |
1259 | if (!str.size) { |
1260 | rc = GRN_END_OF_DATA; |
1261 | break; |
1262 | } |
1263 | for (i = 0; i < str.size; i++) { |
1264 | if (stack.pos) { |
1265 | if (str.ptr[i] == stack_top) { |
1266 | if (--stack.pos) { |
1267 | stack_top = ((char *)stack.ptr)[stack.pos - 1]; |
1268 | } |
1269 | continue; |
1270 | } |
1271 | if (stack_top == '"') { |
1272 | /* Skip the next byte of an escape character. */ |
1273 | if ((str.ptr[i] == '\\') && (i < (str.size - 1))) { |
1274 | i++; |
1275 | } |
1276 | continue; |
1277 | } |
1278 | } else if (str.ptr[i] == ',') { |
1279 | /* An expression delimiter. */ |
1280 | break; |
1281 | } |
1282 | switch (str.ptr[i]) { |
1283 | case '(': { |
1284 | stack_top = ')'; |
1285 | rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1); |
1286 | break; |
1287 | } |
1288 | case '[': { |
1289 | stack_top = ']'; |
1290 | rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1); |
1291 | break; |
1292 | } |
1293 | case '{': { |
1294 | stack_top = '}'; |
1295 | rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1); |
1296 | break; |
1297 | } |
1298 | case '"': { |
1299 | stack_top = '"'; |
1300 | rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1); |
1301 | break; |
1302 | } |
1303 | } |
1304 | if (rc != GRN_SUCCESS) { |
1305 | break; |
1306 | } |
1307 | } |
1308 | if (rc != GRN_SUCCESS) { |
1309 | break; |
1310 | } |
1311 | if (i) { |
1312 | /* Set the result. */ |
1313 | first->ptr = str.ptr; |
1314 | first->size = i; |
1315 | if (first->size == str.size) { |
1316 | rest->ptr = str.ptr + str.size; |
1317 | rest->size = 0; |
1318 | } else { |
1319 | rest->ptr = str.ptr + first->size + 1; |
1320 | rest->size = str.size - first->size - 1; |
1321 | } |
1322 | break; |
1323 | } |
1324 | str.ptr++; |
1325 | str.size--; |
1326 | } |
1327 | grn_ts_buf_fin(ctx, &stack); |
1328 | return rc; |
1329 | } |
1330 | |