1/* -*- c-basic-offset: 2 -*- */
2/*
3 Copyright(C) 2014-2017 Brazil
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License version 2.1 as published by the Free Software Foundation.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19#include "grn.h"
20#include "grn_db.h"
21#include "grn_str.h"
22#include "grn_normalizer.h"
23
24#include <string.h>
25
26#ifdef GRN_WITH_ONIGMO
27# define GRN_SUPPORT_REGEXP
28#endif
29
30#ifdef GRN_SUPPORT_REGEXP
31# include <onigmo.h>
32#endif
33
34static const char *operator_names[] = {
35 "push",
36 "pop",
37 "nop",
38 "call",
39 "intern",
40 "get_ref",
41 "get_value",
42 "and",
43 "and_not",
44 "or",
45 "assign",
46 "star_assign",
47 "slash_assign",
48 "mod_assign",
49 "plus_assign",
50 "minus_assign",
51 "shiftl_assign",
52 "shiftr_assign",
53 "shiftrr_assign",
54 "and_assign",
55 "xor_assign",
56 "or_assign",
57 "jump",
58 "cjump",
59 "comma",
60 "bitwise_or",
61 "bitwise_xor",
62 "bitwise_and",
63 "bitwise_not",
64 "equal",
65 "not_equal",
66 "less",
67 "greater",
68 "less_equal",
69 "greater_equal",
70 "in",
71 "match",
72 "near",
73 "near2",
74 "similar",
75 "term_extract",
76 "shiftl",
77 "shiftr",
78 "shiftrr",
79 "plus",
80 "minus",
81 "star",
82 "slash",
83 "mod",
84 "delete",
85 "incr",
86 "decr",
87 "incr_post",
88 "decr_post",
89 "not",
90 "adjust",
91 "exact",
92 "lcp",
93 "partial",
94 "unsplit",
95 "prefix",
96 "suffix",
97 "geo_distance1",
98 "geo_distance2",
99 "geo_distance3",
100 "geo_distance4",
101 "geo_withinp5",
102 "geo_withinp6",
103 "geo_withinp8",
104 "obj_search",
105 "expr_get_var",
106 "table_create",
107 "table_select",
108 "table_sort",
109 "table_group",
110 "json_put",
111 "get_member",
112 "regexp",
113 "fuzzy"
114};
115
116#define GRN_OP_LAST GRN_OP_FUZZY
117
118const char *
119grn_operator_to_string(grn_operator op)
120{
121 if (op <= GRN_OP_LAST) {
122 return operator_names[op];
123 } else {
124 return "unknown";
125 }
126}
127
128grn_operator_exec_func *
129grn_operator_to_exec_func(grn_operator op)
130{
131 grn_operator_exec_func *func = NULL;
132
133 switch (op) {
134 case GRN_OP_EQUAL :
135 func = grn_operator_exec_equal;
136 break;
137 case GRN_OP_NOT_EQUAL :
138 func = grn_operator_exec_not_equal;
139 break;
140 case GRN_OP_LESS :
141 func = grn_operator_exec_less;
142 break;
143 case GRN_OP_GREATER :
144 func = grn_operator_exec_greater;
145 break;
146 case GRN_OP_LESS_EQUAL :
147 func = grn_operator_exec_less_equal;
148 break;
149 case GRN_OP_GREATER_EQUAL :
150 func = grn_operator_exec_greater_equal;
151 break;
152 case GRN_OP_MATCH :
153 func = grn_operator_exec_match;
154 break;
155 case GRN_OP_PREFIX :
156 func = grn_operator_exec_prefix;
157 break;
158 case GRN_OP_REGEXP :
159 func = grn_operator_exec_regexp;
160 break;
161 default :
162 break;
163 }
164
165 return func;
166}
167
168#define DO_EQ_SUB do {\
169 switch (y->header.domain) {\
170 case GRN_DB_INT8 :\
171 r = (x_ == GRN_INT8_VALUE(y));\
172 break;\
173 case GRN_DB_UINT8 :\
174 r = (x_ == GRN_UINT8_VALUE(y));\
175 break;\
176 case GRN_DB_INT16 :\
177 r = (x_ == GRN_INT16_VALUE(y));\
178 break;\
179 case GRN_DB_UINT16 :\
180 r = (x_ == GRN_UINT16_VALUE(y));\
181 break;\
182 case GRN_DB_INT32 :\
183 r = (x_ == GRN_INT32_VALUE(y));\
184 break;\
185 case GRN_DB_UINT32 :\
186 r = (x_ == GRN_UINT32_VALUE(y));\
187 break;\
188 case GRN_DB_INT64 :\
189 r = (x_ == GRN_INT64_VALUE(y));\
190 break;\
191 case GRN_DB_TIME :\
192 r = (GRN_TIME_PACK(x_,0) == GRN_INT64_VALUE(y));\
193 break;\
194 case GRN_DB_UINT64 :\
195 r = (x_ == GRN_UINT64_VALUE(y));\
196 break;\
197 case GRN_DB_FLOAT :\
198 r = ((x_ <= GRN_FLOAT_VALUE(y)) && (x_ >= GRN_FLOAT_VALUE(y)));\
199 break;\
200 case GRN_DB_SHORT_TEXT :\
201 case GRN_DB_TEXT :\
202 case GRN_DB_LONG_TEXT :\
203 {\
204 const char *p_ = GRN_TEXT_VALUE(y);\
205 int i_ = grn_atoi(p_, p_ + GRN_TEXT_LEN(y), NULL);\
206 r = (x_ == i_);\
207 }\
208 break;\
209 default :\
210 r = GRN_FALSE;\
211 break;\
212 }\
213} while (0)
214
215#define DO_EQ(x,y,r) do {\
216 switch (x->header.domain) {\
217 case GRN_DB_VOID :\
218 r = GRN_FALSE;\
219 break;\
220 case GRN_DB_INT8 :\
221 {\
222 int8_t x_ = GRN_INT8_VALUE(x);\
223 DO_EQ_SUB;\
224 }\
225 break;\
226 case GRN_DB_UINT8 :\
227 {\
228 uint8_t x_ = GRN_UINT8_VALUE(x);\
229 DO_EQ_SUB;\
230 }\
231 break;\
232 case GRN_DB_INT16 :\
233 {\
234 int16_t x_ = GRN_INT16_VALUE(x);\
235 DO_EQ_SUB;\
236 }\
237 break;\
238 case GRN_DB_UINT16 :\
239 {\
240 uint16_t x_ = GRN_UINT16_VALUE(x);\
241 DO_EQ_SUB;\
242 }\
243 break;\
244 case GRN_DB_INT32 :\
245 {\
246 int32_t x_ = GRN_INT32_VALUE(x);\
247 DO_EQ_SUB;\
248 }\
249 break;\
250 case GRN_DB_UINT32 :\
251 {\
252 uint32_t x_ = GRN_UINT32_VALUE(x);\
253 DO_EQ_SUB;\
254 }\
255 break;\
256 case GRN_DB_INT64 :\
257 {\
258 int64_t x_ = GRN_INT64_VALUE(x);\
259 DO_EQ_SUB;\
260 }\
261 break;\
262 case GRN_DB_TIME :\
263 {\
264 int64_t x_ = GRN_INT64_VALUE(x);\
265 switch (y->header.domain) {\
266 case GRN_DB_INT32 :\
267 r = (x_ == GRN_TIME_PACK(GRN_INT32_VALUE(y), 0));\
268 break;\
269 case GRN_DB_UINT32 :\
270 r = (x_ == GRN_TIME_PACK(GRN_UINT32_VALUE(y), 0));\
271 break;\
272 case GRN_DB_INT64 :\
273 case GRN_DB_TIME :\
274 r = (x_ == GRN_INT64_VALUE(y));\
275 break;\
276 case GRN_DB_UINT64 :\
277 r = (x_ == GRN_UINT64_VALUE(y));\
278 break;\
279 case GRN_DB_FLOAT :\
280 r = (x_ == GRN_TIME_PACK(GRN_FLOAT_VALUE(y), 0));\
281 break;\
282 case GRN_DB_SHORT_TEXT :\
283 case GRN_DB_TEXT :\
284 case GRN_DB_LONG_TEXT :\
285 {\
286 grn_obj time_value_;\
287 GRN_TIME_INIT(&time_value_, 0);\
288 if (grn_obj_cast(ctx, y, &time_value_, GRN_FALSE) == GRN_SUCCESS) {\
289 r = (x_ == GRN_TIME_VALUE(&time_value_));\
290 } else {\
291 r = GRN_FALSE;\
292 }\
293 GRN_OBJ_FIN(ctx, &time_value_);\
294 }\
295 break;\
296 default :\
297 r = GRN_FALSE;\
298 break;\
299 }\
300 }\
301 break;\
302 case GRN_DB_UINT64 :\
303 {\
304 uint64_t x_ = GRN_UINT64_VALUE(x);\
305 DO_EQ_SUB;\
306 }\
307 break;\
308 case GRN_DB_FLOAT :\
309 {\
310 double x_ = GRN_FLOAT_VALUE(x);\
311 switch (y->header.domain) {\
312 case GRN_DB_INT32 :\
313 r = ((x_ <= GRN_INT32_VALUE(y)) && (x_ >= GRN_INT32_VALUE(y)));\
314 break;\
315 case GRN_DB_UINT32 :\
316 r = ((x_ <= GRN_UINT32_VALUE(y)) && (x_ >= GRN_UINT32_VALUE(y)));\
317 break;\
318 case GRN_DB_INT64 :\
319 case GRN_DB_TIME :\
320 r = ((x_ <= GRN_INT64_VALUE(y)) && (x_ >= GRN_INT64_VALUE(y)));\
321 break;\
322 case GRN_DB_UINT64 :\
323 r = ((x_ <= GRN_UINT64_VALUE(y)) && (x_ >= GRN_UINT64_VALUE(y)));\
324 break;\
325 case GRN_DB_FLOAT :\
326 r = ((x_ <= GRN_FLOAT_VALUE(y)) && (x_ >= GRN_FLOAT_VALUE(y)));\
327 break;\
328 case GRN_DB_SHORT_TEXT :\
329 case GRN_DB_TEXT :\
330 case GRN_DB_LONG_TEXT :\
331 {\
332 const char *p_ = GRN_TEXT_VALUE(y);\
333 int i_ = grn_atoi(p_, p_ + GRN_TEXT_LEN(y), NULL);\
334 r = (x_ <= i_ && x_ >= i_);\
335 }\
336 break;\
337 default :\
338 r = GRN_FALSE;\
339 break;\
340 }\
341 }\
342 break;\
343 case GRN_DB_SHORT_TEXT :\
344 case GRN_DB_TEXT :\
345 case GRN_DB_LONG_TEXT :\
346 if (GRN_DB_SHORT_TEXT <= y->header.domain && y->header.domain <= GRN_DB_LONG_TEXT) {\
347 uint32_t la = GRN_TEXT_LEN(x), lb = GRN_TEXT_LEN(y);\
348 r = (la == lb && !memcmp(GRN_TEXT_VALUE(x), GRN_TEXT_VALUE(y), lb));\
349 } else {\
350 const char *q_ = GRN_TEXT_VALUE(x);\
351 int x_ = grn_atoi(q_, q_ + GRN_TEXT_LEN(x), NULL);\
352 DO_EQ_SUB;\
353 }\
354 break;\
355 default :\
356 if ((x->header.domain == y->header.domain)) {\
357 r = (GRN_BULK_VSIZE(x) == GRN_BULK_VSIZE(y) &&\
358 !(memcmp(GRN_BULK_HEAD(x), GRN_BULK_HEAD(y), GRN_BULK_VSIZE(x))));\
359 } else {\
360 grn_obj dest;\
361 if (x->header.domain < y->header.domain) {\
362 GRN_OBJ_INIT(&dest, GRN_BULK, 0, y->header.domain);\
363 if (!grn_obj_cast(ctx, x, &dest, GRN_FALSE)) {\
364 r = (GRN_BULK_VSIZE(&dest) == GRN_BULK_VSIZE(y) &&\
365 !memcmp(GRN_BULK_HEAD(&dest), GRN_BULK_HEAD(y), GRN_BULK_VSIZE(y))); \
366 } else {\
367 r = GRN_FALSE;\
368 }\
369 } else {\
370 GRN_OBJ_INIT(&dest, GRN_BULK, 0, x->header.domain);\
371 if (!grn_obj_cast(ctx, y, &dest, GRN_FALSE)) {\
372 r = (GRN_BULK_VSIZE(&dest) == GRN_BULK_VSIZE(x) &&\
373 !memcmp(GRN_BULK_HEAD(&dest), GRN_BULK_HEAD(x), GRN_BULK_VSIZE(x))); \
374 } else {\
375 r = GRN_FALSE;\
376 }\
377 }\
378 GRN_OBJ_FIN(ctx, &dest);\
379 }\
380 break;\
381 }\
382} while (0)
383
384grn_bool
385grn_operator_exec_equal(grn_ctx *ctx, grn_obj *x, grn_obj *y)
386{
387 grn_bool r = GRN_FALSE;
388 GRN_API_ENTER;
389 DO_EQ(x, y, r);
390 GRN_API_RETURN(r);
391}
392
393grn_bool
394grn_operator_exec_not_equal(grn_ctx *ctx, grn_obj *x, grn_obj *y)
395{
396 grn_bool r = GRN_FALSE;
397 GRN_API_ENTER;
398 DO_EQ(x, y, r);
399 GRN_API_RETURN(!r);
400}
401
402#define DO_COMPARE_SCALAR_SUB_NUMERIC(y,op) do {\
403 switch ((y)->header.domain) {\
404 case GRN_DB_BOOL :\
405 r = (x_ op (uint8_t)(GRN_BOOL_VALUE(y) ? 1 : 0));\
406 break;\
407 case GRN_DB_INT8 :\
408 r = (x_ op GRN_INT8_VALUE(y));\
409 break;\
410 case GRN_DB_UINT8 :\
411 r = (x_ op GRN_UINT8_VALUE(y));\
412 break;\
413 case GRN_DB_INT16 :\
414 r = (x_ op GRN_INT16_VALUE(y));\
415 break;\
416 case GRN_DB_UINT16 :\
417 r = (x_ op GRN_UINT16_VALUE(y));\
418 break;\
419 case GRN_DB_INT32 :\
420 r = (x_ op GRN_INT32_VALUE(y));\
421 break;\
422 case GRN_DB_UINT32 :\
423 r = (x_ op GRN_UINT32_VALUE(y));\
424 break;\
425 case GRN_DB_INT64 :\
426 r = (x_ op GRN_INT64_VALUE(y));\
427 break;\
428 case GRN_DB_TIME :\
429 r = (GRN_TIME_PACK(x_,0) op GRN_INT64_VALUE(y));\
430 break;\
431 case GRN_DB_UINT64 :\
432 r = (x_ op GRN_UINT64_VALUE(y));\
433 break;\
434 case GRN_DB_FLOAT :\
435 r = (x_ op GRN_FLOAT_VALUE(y));\
436 break;\
437 default :\
438 r = GRN_FALSE;\
439 break;\
440 }\
441} while (0)
442
443#define DO_COMPARE_SCALAR_SUB_BUILTIN(op) do {\
444 switch (y->header.domain) {\
445 case GRN_DB_SHORT_TEXT :\
446 case GRN_DB_TEXT :\
447 case GRN_DB_LONG_TEXT :\
448 {\
449 grn_obj y_;\
450 GRN_OBJ_INIT(&y_, GRN_BULK, 0, x->header.domain);\
451 if (grn_obj_cast(ctx, y, &y_, GRN_FALSE)) {\
452 r = GRN_FALSE;\
453 } else {\
454 DO_COMPARE_SCALAR_SUB_NUMERIC(&y_, op);\
455 }\
456 GRN_OBJ_FIN(ctx, &y_);\
457 }\
458 break;\
459 default :\
460 DO_COMPARE_SCALAR_SUB_NUMERIC(y,op);\
461 break;\
462 }\
463} while (0)
464
465#define DO_COMPARE_SCALAR_SUB(op) do {\
466 if (y->header.domain >= GRN_N_RESERVED_TYPES) {\
467 grn_obj *y_table;\
468 y_table = grn_ctx_at(ctx, y->header.domain);\
469 switch (y_table->header.type) {\
470 case GRN_TABLE_HASH_KEY :\
471 case GRN_TABLE_PAT_KEY :\
472 case GRN_TABLE_DAT_KEY :\
473 {\
474 grn_obj y_key;\
475 int length;\
476 GRN_OBJ_INIT(&y_key, GRN_BULK, 0, y_table->header.domain);\
477 length = grn_table_get_key2(ctx, y_table, GRN_RECORD_VALUE(y), &y_key);\
478 if (length > 0) {\
479 grn_obj *y_original = y;\
480 y = &y_key;\
481 DO_COMPARE_SCALAR_SUB_BUILTIN(op);\
482 y = y_original;\
483 } else {\
484 r = GRN_FALSE;\
485 }\
486 GRN_OBJ_FIN(ctx, &y_key);\
487 }\
488 break;\
489 default :\
490 r = GRN_FALSE;\
491 break;\
492 }\
493 grn_obj_unlink(ctx, y_table);\
494 } else {\
495 DO_COMPARE_SCALAR_SUB_BUILTIN(op);\
496 }\
497} while (0)
498
499#define DO_COMPARE_SCALAR_BUILTIN(x,y,r,op) do {\
500 switch (x->header.domain) {\
501 case GRN_DB_BOOL :\
502 {\
503 uint8_t x_ = GRN_BOOL_VALUE(x) ? 1 : 0;\
504 DO_COMPARE_SCALAR_SUB(op);\
505 }\
506 break;\
507 case GRN_DB_INT8 :\
508 {\
509 int8_t x_ = GRN_INT8_VALUE(x);\
510 DO_COMPARE_SCALAR_SUB(op);\
511 }\
512 break;\
513 case GRN_DB_UINT8 :\
514 {\
515 uint8_t x_ = GRN_UINT8_VALUE(x);\
516 DO_COMPARE_SCALAR_SUB(op);\
517 }\
518 break;\
519 case GRN_DB_INT16 :\
520 {\
521 int16_t x_ = GRN_INT16_VALUE(x);\
522 DO_COMPARE_SCALAR_SUB(op);\
523 }\
524 break;\
525 case GRN_DB_UINT16 :\
526 {\
527 uint16_t x_ = GRN_UINT16_VALUE(x);\
528 DO_COMPARE_SCALAR_SUB(op);\
529 }\
530 break;\
531 case GRN_DB_INT32 :\
532 {\
533 int32_t x_ = GRN_INT32_VALUE(x);\
534 DO_COMPARE_SCALAR_SUB(op);\
535 }\
536 break;\
537 case GRN_DB_UINT32 :\
538 {\
539 uint32_t x_ = GRN_UINT32_VALUE(x);\
540 DO_COMPARE_SCALAR_SUB(op);\
541 }\
542 break;\
543 case GRN_DB_TIME :\
544 {\
545 int64_t x_ = GRN_INT64_VALUE(x);\
546 switch (y->header.domain) {\
547 case GRN_DB_INT32 :\
548 r = (x_ op GRN_TIME_PACK(GRN_INT32_VALUE(y), 0));\
549 break;\
550 case GRN_DB_UINT32 :\
551 r = (x_ op GRN_TIME_PACK(GRN_UINT32_VALUE(y), 0));\
552 break;\
553 case GRN_DB_INT64 :\
554 case GRN_DB_TIME :\
555 r = (x_ op GRN_INT64_VALUE(y));\
556 break;\
557 case GRN_DB_UINT64 :\
558 r = (x_ op GRN_UINT64_VALUE(y));\
559 break;\
560 case GRN_DB_FLOAT :\
561 r = (x_ op GRN_TIME_PACK(GRN_FLOAT_VALUE(y), 0));\
562 break;\
563 case GRN_DB_SHORT_TEXT :\
564 case GRN_DB_TEXT :\
565 case GRN_DB_LONG_TEXT :\
566 {\
567 grn_obj time_value_;\
568 GRN_TIME_INIT(&time_value_, 0);\
569 if (grn_obj_cast(ctx, y, &time_value_, GRN_FALSE) == GRN_SUCCESS) {\
570 r = (x_ op GRN_TIME_VALUE(&time_value_));\
571 } else {\
572 r = GRN_FALSE;\
573 }\
574 GRN_OBJ_FIN(ctx, &time_value_);\
575 }\
576 break;\
577 default :\
578 r = GRN_FALSE;\
579 break;\
580 }\
581 }\
582 break;\
583 case GRN_DB_INT64 :\
584 {\
585 int64_t x_ = GRN_INT64_VALUE(x);\
586 DO_COMPARE_SCALAR_SUB(op);\
587 }\
588 break;\
589 case GRN_DB_UINT64 :\
590 {\
591 uint64_t x_ = GRN_UINT64_VALUE(x);\
592 DO_COMPARE_SCALAR_SUB(op);\
593 }\
594 break;\
595 case GRN_DB_FLOAT :\
596 {\
597 double x_ = GRN_FLOAT_VALUE(x);\
598 DO_COMPARE_SCALAR_SUB(op);\
599 }\
600 break;\
601 case GRN_DB_SHORT_TEXT :\
602 case GRN_DB_TEXT :\
603 case GRN_DB_LONG_TEXT :\
604 if (GRN_DB_SHORT_TEXT <= y->header.domain && y->header.domain <= GRN_DB_LONG_TEXT) {\
605 int r_;\
606 uint32_t la = GRN_TEXT_LEN(x), lb = GRN_TEXT_LEN(y);\
607 if (la > lb) {\
608 if (!(r_ = memcmp(GRN_TEXT_VALUE(x), GRN_TEXT_VALUE(y), lb))) {\
609 r_ = 1;\
610 }\
611 } else {\
612 if (!(r_ = memcmp(GRN_TEXT_VALUE(x), GRN_TEXT_VALUE(y), la))) {\
613 r_ = la == lb ? 0 : -1;\
614 }\
615 }\
616 r = (r_ op 0);\
617 } else {\
618 const char *q_ = GRN_TEXT_VALUE(x);\
619 int x_ = grn_atoi(q_, q_ + GRN_TEXT_LEN(x), NULL);\
620 DO_COMPARE_SCALAR_SUB(op);\
621 }\
622 break;\
623 default :\
624 r = GRN_FALSE;\
625 break;\
626 }\
627} while (0)
628
629#define DO_COMPARE_SCALAR(x, y, r, op) do {\
630 if (x->header.domain >= GRN_N_RESERVED_TYPES) {\
631 grn_obj *x_table;\
632 x_table = grn_ctx_at(ctx, x->header.domain);\
633 switch (x_table->header.type) {\
634 case GRN_TABLE_HASH_KEY :\
635 case GRN_TABLE_PAT_KEY :\
636 case GRN_TABLE_DAT_KEY :\
637 {\
638 grn_obj x_key;\
639 int length;\
640 GRN_OBJ_INIT(&x_key, GRN_BULK, 0, x_table->header.domain);\
641 length = grn_table_get_key2(ctx, x_table, GRN_RECORD_VALUE(x), &x_key);\
642 if (length > 0) {\
643 grn_obj *x_original = x;\
644 x = &x_key;\
645 DO_COMPARE_SCALAR_BUILTIN((&x_key), y, r, op);\
646 x = x_original;\
647 } else {\
648 r = GRN_FALSE;\
649 }\
650 GRN_OBJ_FIN(ctx, &x_key);\
651 }\
652 break;\
653 default :\
654 r = GRN_FALSE;\
655 break;\
656 }\
657 grn_obj_unlink(ctx, x_table);\
658 } else {\
659 DO_COMPARE_SCALAR_BUILTIN(x, y, r, op);\
660 }\
661} while (0)
662
663#define DO_COMPARE(x, y, r, op) do {\
664 if (x->header.type == GRN_UVECTOR) {\
665 grn_obj element_buffer;\
666 unsigned int i, n;\
667 unsigned int element_size;\
668 GRN_VALUE_FIX_SIZE_INIT(&element_buffer, 0, x->header.domain);\
669 n = grn_uvector_size(ctx, x);\
670 element_size = grn_uvector_element_size(ctx, x);\
671 for (i = 0; i < n; i++) {\
672 grn_obj *element = &element_buffer;\
673 GRN_BULK_REWIND(element);\
674 grn_bulk_write(ctx, element,\
675 ((uint8_t *)GRN_BULK_HEAD(x)) + (element_size * i),\
676 element_size);\
677 DO_COMPARE_SCALAR(element, y, r, op);\
678 if (r) {\
679 break;\
680 }\
681 }\
682 GRN_OBJ_FIN(ctx, &element_buffer);\
683 } else {\
684 if (GRN_BULK_VSIZE(x) == 0 || GRN_BULK_VSIZE(y) == 0) {\
685 r = GRN_FALSE;\
686 } else {\
687 DO_COMPARE_SCALAR(x, y, r, op);\
688 }\
689 }\
690} while (0)
691
692grn_bool
693grn_operator_exec_less(grn_ctx *ctx, grn_obj *x, grn_obj *y)
694{
695 grn_bool r = GRN_FALSE;
696 GRN_API_ENTER;
697 DO_COMPARE(x, y, r, <);
698 GRN_API_RETURN(r);
699}
700
701grn_bool
702grn_operator_exec_greater(grn_ctx *ctx, grn_obj *x, grn_obj *y)
703{
704 grn_bool r = GRN_FALSE;
705 GRN_API_ENTER;
706 DO_COMPARE(x, y, r, >);
707 GRN_API_RETURN(r);
708}
709
710grn_bool
711grn_operator_exec_less_equal(grn_ctx *ctx, grn_obj *x, grn_obj *y)
712{
713 grn_bool r = GRN_FALSE;
714 GRN_API_ENTER;
715 DO_COMPARE(x, y, r, <=);
716 GRN_API_RETURN(r);
717}
718
719grn_bool
720grn_operator_exec_greater_equal(grn_ctx *ctx, grn_obj *x, grn_obj *y)
721{
722 grn_bool r = GRN_FALSE;
723 GRN_API_ENTER;
724 DO_COMPARE(x, y, r, >=);
725 GRN_API_RETURN(r);
726}
727
728static grn_bool
729exec_match_uvector_bulk(grn_ctx *ctx, grn_obj *uvector, grn_obj *query)
730{
731 grn_bool matched = GRN_FALSE;
732 unsigned int i, size;
733 grn_obj element;
734 unsigned int element_size;
735
736 size = grn_uvector_size(ctx, uvector);
737 element_size = grn_uvector_element_size(ctx, uvector);
738 GRN_VALUE_FIX_SIZE_INIT(&element, 0, uvector->header.domain);
739 for (i = 0; i < size; i++) {
740 GRN_BULK_REWIND(&element);
741 grn_bulk_write(ctx, &element,
742 GRN_BULK_HEAD(uvector) + (element_size * i),
743 element_size);
744 if (grn_operator_exec_equal(ctx, &element, query)) {
745 matched = GRN_TRUE;
746 break;
747 }
748 }
749 GRN_OBJ_FIN(ctx, &element);
750
751 return matched;
752}
753
754static grn_bool
755exec_match_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *query)
756{
757 grn_bool matched = GRN_FALSE;
758 unsigned int i, size;
759 grn_obj element;
760
761 size = grn_vector_size(ctx, vector);
762 GRN_VOID_INIT(&element);
763 for (i = 0; i < size; i++) {
764 const char *content;
765 unsigned int content_size;
766 grn_id domain_id;
767
768 content_size = grn_vector_get_element(ctx, vector, i,
769 &content, NULL, &domain_id);
770 grn_obj_reinit(ctx, &element, domain_id, 0);
771 grn_bulk_write(ctx, &element, content, content_size);
772 if (grn_operator_exec_equal(ctx, &element, query)) {
773 matched = GRN_TRUE;
774 break;
775 }
776 }
777 GRN_OBJ_FIN(ctx, &element);
778
779 return matched;
780}
781
782#ifdef GRN_SUPPORT_REGEXP
783static OnigRegex
784regexp_compile(grn_ctx *ctx,
785 const char *pattern,
786 unsigned int pattern_len,
787 const OnigSyntaxType *syntax)
788{
789 OnigRegex regex;
790 OnigEncoding onig_encoding;
791 int onig_result;
792 OnigErrorInfo onig_error_info;
793
794 if (ctx->encoding == GRN_ENC_NONE) {
795 return NULL;
796 }
797
798 switch (ctx->encoding) {
799 case GRN_ENC_EUC_JP :
800 onig_encoding = ONIG_ENCODING_EUC_JP;
801 break;
802 case GRN_ENC_UTF8 :
803 onig_encoding = ONIG_ENCODING_UTF8;
804 break;
805 case GRN_ENC_SJIS :
806 onig_encoding = ONIG_ENCODING_CP932;
807 break;
808 case GRN_ENC_LATIN1 :
809 onig_encoding = ONIG_ENCODING_ISO_8859_1;
810 break;
811 case GRN_ENC_KOI8R :
812 onig_encoding = ONIG_ENCODING_KOI8_R;
813 break;
814 default :
815 return NULL;
816 }
817
818 onig_result = onig_new(&regex,
819 pattern,
820 pattern + pattern_len,
821 ONIG_OPTION_ASCII_RANGE |
822 ONIG_OPTION_MULTILINE,
823 onig_encoding,
824 syntax,
825 &onig_error_info);
826 if (onig_result != ONIG_NORMAL) {
827 char message[ONIG_MAX_ERROR_MESSAGE_LEN];
828 onig_error_code_to_str(message, onig_result, onig_error_info);
829 ERR(GRN_INVALID_ARGUMENT,
830 "[operator][regexp] "
831 "failed to create regular expression object: <%.*s>: %s",
832 pattern_len, pattern,
833 message);
834 return NULL;
835 }
836
837 return regex;
838}
839
840static grn_bool
841regexp_is_match(grn_ctx *ctx, OnigRegex regex,
842 const char *target, unsigned int target_len)
843{
844 OnigPosition position;
845
846 position = onig_search(regex,
847 target,
848 target + target_len,
849 target,
850 target + target_len,
851 NULL,
852 ONIG_OPTION_NONE);
853 return position != ONIG_MISMATCH;
854}
855#endif /* GRN_SUPPORT_REGEXP */
856
857static grn_bool
858string_have_sub_text(grn_ctx *ctx,
859 const char *text, unsigned int text_len,
860 const char *sub_text, unsigned int sub_text_len)
861{
862 if (sub_text_len == 0) {
863 return GRN_FALSE;
864 }
865
866 if (sub_text_len > text_len) {
867 return GRN_FALSE;
868 }
869
870#ifdef GRN_SUPPORT_REGEXP
871 {
872 OnigRegex regex;
873 grn_bool matched;
874
875 regex = regexp_compile(ctx, sub_text, sub_text_len, ONIG_SYNTAX_ASIS);
876 if (!regex) {
877 return GRN_FALSE;
878 }
879
880 matched = regexp_is_match(ctx, regex, text, text_len);
881 onig_free(regex);
882 return matched;
883 }
884#else /* GRN_SUPPORT_REGEXP */
885 {
886 const char *text_current = text;
887 const char *text_end = text + text_len;
888 const char *sub_text_current = sub_text;
889 const char *sub_text_end = sub_text + sub_text_len;
890 int sub_text_start_char_len;
891 int sub_text_char_len;
892
893 sub_text_start_char_len = grn_charlen(ctx, sub_text, sub_text_end);
894 if (sub_text_start_char_len == 0) {
895 return GRN_FALSE;
896 }
897 sub_text_char_len = sub_text_start_char_len;
898
899 while (text_current < text_end) {
900 int text_char_len;
901
902 text_char_len = grn_charlen(ctx, text_current, text_end);
903 if (text_char_len == 0) {
904 return GRN_FALSE;
905 }
906
907 if (text_char_len == sub_text_char_len &&
908 memcmp(text_current, sub_text_current, text_char_len) == 0) {
909 sub_text_current += sub_text_char_len;
910 if (sub_text_current == sub_text_end) {
911 return GRN_TRUE;
912 }
913
914 sub_text_char_len = grn_charlen(ctx, sub_text_current, sub_text_end);
915 if (sub_text_char_len == 0) {
916 return GRN_FALSE;
917 }
918 } else {
919 if (sub_text_current != sub_text) {
920 sub_text_current = sub_text;
921 sub_text_char_len = sub_text_start_char_len;
922 continue;
923 }
924 }
925
926 text_current += text_char_len;
927 }
928
929 return GRN_FALSE;
930 }
931#endif /* GRN_SUPPORT_REGEXP */
932}
933
934static grn_bool
935string_have_prefix(grn_ctx *ctx,
936 const char *target, unsigned int target_len,
937 const char *prefix, unsigned int prefix_len)
938{
939 return (target_len >= prefix_len &&
940 strncmp(target, prefix, prefix_len) == 0);
941}
942
943static grn_bool
944string_match_regexp(grn_ctx *ctx,
945 const char *target, unsigned int target_len,
946 const char *pattern, unsigned int pattern_len)
947{
948#ifdef GRN_SUPPORT_REGEXP
949 OnigRegex regex;
950 grn_bool matched;
951
952 regex = regexp_compile(ctx, pattern, pattern_len, ONIG_SYNTAX_RUBY);
953 if (!regex) {
954 return GRN_FALSE;
955 }
956
957 matched = regexp_is_match(ctx, regex, target, target_len);
958 onig_free(regex);
959 return matched;
960#else /* GRN_SUPPORT_REGEXP */
961 return GRN_FALSE;
962#endif /* GRN_SUPPORT_REGEXP */
963}
964
965static grn_bool
966exec_text_operator(grn_ctx *ctx,
967 grn_operator op,
968 const char *target,
969 unsigned int target_len,
970 const char *query,
971 unsigned int query_len)
972{
973 grn_bool matched = GRN_FALSE;
974
975 if (target_len == 0 || query_len == 0) {
976 return GRN_FALSE;
977 }
978
979 switch (op) {
980 case GRN_OP_MATCH :
981 matched = string_have_sub_text(ctx, target, target_len, query, query_len);
982 break;
983 case GRN_OP_PREFIX :
984 matched = string_have_prefix(ctx, target, target_len, query, query_len);
985 break;
986 case GRN_OP_REGEXP :
987 matched = string_match_regexp(ctx, target, target_len, query, query_len);
988 break;
989 default :
990 matched = GRN_FALSE;
991 break;
992 }
993
994 return matched;
995}
996
997static grn_bool
998exec_text_operator_raw_text_raw_text(grn_ctx *ctx,
999 grn_operator op,
1000 const char *target,
1001 unsigned int target_len,
1002 const char *query,
1003 unsigned int query_len)
1004{
1005 grn_obj *normalizer;
1006 grn_obj *norm_target;
1007 grn_obj *norm_query;
1008 const char *norm_target_raw;
1009 const char *norm_query_raw;
1010 unsigned int norm_target_raw_length_in_bytes;
1011 unsigned int norm_query_raw_length_in_bytes;
1012 grn_bool matched = GRN_FALSE;
1013
1014 if (target_len == 0 || query_len == 0) {
1015 return GRN_FALSE;
1016 }
1017
1018 normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
1019 norm_target = grn_string_open(ctx, target, target_len, normalizer, 0);
1020 grn_string_get_normalized(ctx, norm_target,
1021 &norm_target_raw,
1022 &norm_target_raw_length_in_bytes,
1023 NULL);
1024
1025 if (op == GRN_OP_REGEXP) {
1026 norm_query = NULL;
1027 norm_query_raw = query;
1028 norm_query_raw_length_in_bytes = query_len;
1029 } else {
1030 norm_query = grn_string_open(ctx, query, query_len, normalizer, 0);
1031 grn_string_get_normalized(ctx, norm_query,
1032 &norm_query_raw,
1033 &norm_query_raw_length_in_bytes,
1034 NULL);
1035 }
1036
1037 matched = exec_text_operator(ctx, op,
1038 norm_target_raw,
1039 norm_target_raw_length_in_bytes,
1040 norm_query_raw,
1041 norm_query_raw_length_in_bytes);
1042
1043 grn_obj_close(ctx, norm_target);
1044 if (norm_query) {
1045 grn_obj_close(ctx, norm_query);
1046 }
1047 grn_obj_unlink(ctx, normalizer);
1048
1049 return matched;
1050}
1051
1052static grn_bool
1053exec_text_operator_record_text(grn_ctx *ctx,
1054 grn_operator op,
1055 grn_obj *record, grn_obj *table,
1056 grn_obj *query)
1057{
1058 grn_obj *normalizer;
1059 char record_key[GRN_TABLE_MAX_KEY_SIZE];
1060 int record_key_len;
1061 grn_bool matched = GRN_FALSE;
1062
1063 if (table->header.domain != GRN_DB_SHORT_TEXT) {
1064 return GRN_FALSE;
1065 }
1066
1067 if (GRN_TEXT_LEN(query) == 0) {
1068 return GRN_FALSE;
1069 }
1070
1071 record_key_len = grn_table_get_key(ctx, table, GRN_RECORD_VALUE(record),
1072 record_key, GRN_TABLE_MAX_KEY_SIZE);
1073 grn_table_get_info(ctx, table, NULL, NULL, NULL, &normalizer, NULL);
1074 if (normalizer) {
1075 grn_obj *norm_query;
1076 const char *norm_query_raw;
1077 unsigned int norm_query_raw_length_in_bytes;
1078
1079 if (op == GRN_OP_REGEXP) {
1080 norm_query = NULL;
1081 norm_query_raw = GRN_TEXT_VALUE(query);
1082 norm_query_raw_length_in_bytes = GRN_TEXT_LEN(query);
1083 } else {
1084 norm_query = grn_string_open(ctx,
1085 GRN_TEXT_VALUE(query),
1086 GRN_TEXT_LEN(query),
1087 normalizer,
1088 0);
1089 grn_string_get_normalized(ctx, norm_query,
1090 &norm_query_raw,
1091 &norm_query_raw_length_in_bytes,
1092 NULL);
1093 }
1094 matched = exec_text_operator(ctx,
1095 op,
1096 record_key,
1097 record_key_len,
1098 norm_query_raw,
1099 norm_query_raw_length_in_bytes);
1100 if (norm_query) {
1101 grn_obj_close(ctx, norm_query);
1102 }
1103 } else {
1104 matched = exec_text_operator_raw_text_raw_text(ctx,
1105 op,
1106 record_key,
1107 record_key_len,
1108 GRN_TEXT_VALUE(query),
1109 GRN_TEXT_LEN(query));
1110 }
1111
1112 return matched;
1113}
1114
1115static grn_bool
1116exec_text_operator_text_text(grn_ctx *ctx,
1117 grn_operator op,
1118 grn_obj *target,
1119 grn_obj *query)
1120{
1121 return exec_text_operator_raw_text_raw_text(ctx,
1122 op,
1123 GRN_TEXT_VALUE(target),
1124 GRN_TEXT_LEN(target),
1125 GRN_TEXT_VALUE(query),
1126 GRN_TEXT_LEN(query));
1127}
1128
1129static grn_bool
1130exec_text_operator_bulk_bulk(grn_ctx *ctx,
1131 grn_operator op,
1132 grn_obj *target,
1133 grn_obj *query)
1134{
1135 switch (target->header.domain) {
1136 case GRN_DB_SHORT_TEXT :
1137 case GRN_DB_TEXT :
1138 case GRN_DB_LONG_TEXT :
1139 switch (query->header.domain) {
1140 case GRN_DB_SHORT_TEXT :
1141 case GRN_DB_TEXT :
1142 case GRN_DB_LONG_TEXT :
1143 return exec_text_operator_text_text(ctx, op, target, query);
1144 default :
1145 break;
1146 }
1147 return GRN_FALSE;
1148 default:
1149 {
1150 grn_obj *domain;
1151 domain = grn_ctx_at(ctx, target->header.domain);
1152 if (GRN_OBJ_TABLEP(domain)) {
1153 switch (query->header.domain) {
1154 case GRN_DB_SHORT_TEXT :
1155 case GRN_DB_TEXT :
1156 case GRN_DB_LONG_TEXT :
1157 return exec_text_operator_record_text(ctx, op, target, domain, query);
1158 default :
1159 break;
1160 }
1161 }
1162 }
1163 return GRN_FALSE;
1164 }
1165}
1166
1167grn_bool
1168grn_operator_exec_match(grn_ctx *ctx, grn_obj *target, grn_obj *sub_text)
1169{
1170 grn_bool matched;
1171 GRN_API_ENTER;
1172 switch (target->header.type) {
1173 case GRN_UVECTOR :
1174 matched = exec_match_uvector_bulk(ctx, target, sub_text);
1175 break;
1176 case GRN_VECTOR :
1177 matched = exec_match_vector_bulk(ctx, target, sub_text);
1178 break;
1179 default :
1180 matched = exec_text_operator_bulk_bulk(ctx, GRN_OP_MATCH, target, sub_text);
1181 break;
1182 }
1183 GRN_API_RETURN(matched);
1184}
1185
1186grn_bool
1187grn_operator_exec_prefix(grn_ctx *ctx, grn_obj *target, grn_obj *prefix)
1188{
1189 grn_bool matched;
1190 GRN_API_ENTER;
1191 matched = exec_text_operator_bulk_bulk(ctx, GRN_OP_PREFIX, target, prefix);
1192 GRN_API_RETURN(matched);
1193}
1194
1195static grn_bool
1196exec_regexp_uvector_bulk(grn_ctx *ctx, grn_obj *uvector, grn_obj *pattern)
1197{
1198#ifdef GRN_SUPPORT_REGEXP
1199 grn_bool matched = GRN_FALSE;
1200 unsigned int i, size;
1201 OnigRegex regex;
1202 grn_obj *domain;
1203 grn_obj *normalizer;
1204 grn_obj *normalizer_auto = NULL;
1205
1206 size = grn_uvector_size(ctx, uvector);
1207 if (size == 0) {
1208 return GRN_FALSE;
1209 }
1210
1211 regex = regexp_compile(ctx,
1212 GRN_TEXT_VALUE(pattern),
1213 GRN_TEXT_LEN(pattern),
1214 ONIG_SYNTAX_RUBY);
1215 if (!regex) {
1216 return GRN_FALSE;
1217 }
1218
1219 domain = grn_ctx_at(ctx, uvector->header.domain);
1220 if (!domain) {
1221 onig_free(regex);
1222 return GRN_FALSE;
1223 }
1224
1225 grn_table_get_info(ctx, domain, NULL, NULL, NULL, &normalizer, NULL);
1226 if (!normalizer) {
1227 normalizer_auto = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
1228 }
1229
1230 for (i = 0; i < size; i++) {
1231 grn_id record_id;
1232 char key[GRN_TABLE_MAX_KEY_SIZE];
1233 int key_size;
1234
1235 record_id = grn_uvector_get_element(ctx, uvector, i, NULL);
1236 key_size = grn_table_get_key(ctx, domain, record_id,
1237 key, GRN_TABLE_MAX_KEY_SIZE);
1238 if (key_size == 0) {
1239 continue;
1240 }
1241
1242 if (normalizer) {
1243 matched = regexp_is_match(ctx, regex, key, key_size);
1244 } else {
1245 grn_obj *norm_key;
1246 const char *norm_key_raw;
1247 unsigned int norm_key_raw_length_in_bytes;
1248
1249 norm_key = grn_string_open(ctx, key, key_size, normalizer_auto, 0);
1250 grn_string_get_normalized(ctx, norm_key,
1251 &norm_key_raw,
1252 &norm_key_raw_length_in_bytes,
1253 NULL);
1254 matched = regexp_is_match(ctx, regex,
1255 norm_key_raw,
1256 norm_key_raw_length_in_bytes);
1257 grn_obj_unlink(ctx, norm_key);
1258 }
1259
1260 if (matched) {
1261 break;
1262 }
1263 }
1264
1265 if (normalizer_auto) {
1266 grn_obj_unlink(ctx, normalizer_auto);
1267 }
1268
1269 grn_obj_unlink(ctx, domain);
1270
1271 onig_free(regex);
1272
1273 return matched;
1274#else /* GRN_SUPPORT_REGEXP */
1275 return GRN_FALSE;
1276#endif /* GRN_SUPPORT_REGEXP */
1277}
1278
1279static grn_bool
1280exec_regexp_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *pattern)
1281{
1282#ifdef GRN_SUPPORT_REGEXP
1283 grn_obj *normalizer = NULL;
1284 grn_bool matched = GRN_FALSE;
1285 unsigned int i, size;
1286 OnigRegex regex;
1287
1288 size = grn_vector_size(ctx, vector);
1289 if (size == 0) {
1290 return GRN_FALSE;
1291 }
1292
1293 regex = regexp_compile(ctx,
1294 GRN_TEXT_VALUE(pattern),
1295 GRN_TEXT_LEN(pattern),
1296 ONIG_SYNTAX_RUBY);
1297 if (!regex) {
1298 return GRN_FALSE;
1299 }
1300
1301 normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
1302 for (i = 0; i < size; i++) {
1303 const char *content;
1304 unsigned int content_size;
1305 grn_id domain_id;
1306 grn_obj *norm_content;
1307 const char *norm_content_raw;
1308 unsigned int norm_content_raw_length_in_bytes;
1309
1310 content_size = grn_vector_get_element(ctx, vector, i,
1311 &content, NULL, &domain_id);
1312 if (content_size == 0) {
1313 continue;
1314 }
1315
1316 norm_content = grn_string_open(ctx, content, content_size, normalizer, 0);
1317 grn_string_get_normalized(ctx, norm_content,
1318 &norm_content_raw,
1319 &norm_content_raw_length_in_bytes,
1320 NULL);
1321
1322 matched = regexp_is_match(ctx, regex,
1323 norm_content_raw,
1324 norm_content_raw_length_in_bytes);
1325
1326 grn_obj_unlink(ctx, norm_content);
1327
1328 if (matched) {
1329 break;
1330 }
1331 }
1332 grn_obj_unlink(ctx, normalizer);
1333
1334 onig_free(regex);
1335
1336 return matched;
1337#else /* GRN_SUPPORT_REGEXP */
1338 return GRN_FALSE;
1339#endif /* GRN_SUPPORT_REGEXP */
1340}
1341
1342grn_bool
1343grn_operator_exec_regexp(grn_ctx *ctx, grn_obj *target, grn_obj *pattern)
1344{
1345 grn_bool matched = GRN_FALSE;
1346 GRN_API_ENTER;
1347 switch (target->header.type) {
1348 case GRN_UVECTOR :
1349 matched = exec_regexp_uvector_bulk(ctx, target, pattern);
1350 break;
1351 case GRN_VECTOR :
1352 matched = exec_regexp_vector_bulk(ctx, target, pattern);
1353 break;
1354 case GRN_BULK :
1355 matched = exec_text_operator_bulk_bulk(ctx, GRN_OP_REGEXP, target, pattern);
1356 break;
1357 default :
1358 matched = GRN_FALSE;
1359 break;
1360 }
1361 GRN_API_RETURN(matched);
1362}
1363