1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2009-2016 Brazil |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License version 2.1 as published by the Free Software Foundation. |
8 | |
9 | This library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with this library; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | #pragma once |
20 | |
21 | #include "grn_ctx.h" |
22 | #include "grn_db.h" |
23 | |
24 | #include <groonga/tokenizer.h> |
25 | |
26 | #ifdef __cplusplus |
27 | extern "C" { |
28 | #endif |
29 | |
30 | #define GRN_TOKENIZER_BEGIN_MARK_UTF8 "\xEF\xBF\xAF" |
31 | #define GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN 3 |
32 | #define GRN_TOKENIZER_END_MARK_UTF8 "\xEF\xBF\xB0" |
33 | #define GRN_TOKENIZER_END_MARK_UTF8_LEN 3 |
34 | |
35 | typedef enum { |
36 | GRN_TOKEN_CURSOR_DOING = 0, |
37 | GRN_TOKEN_CURSOR_DONE, |
38 | GRN_TOKEN_CURSOR_DONE_SKIP, |
39 | GRN_TOKEN_CURSOR_NOT_FOUND |
40 | } grn_token_cursor_status; |
41 | |
42 | struct _grn_token { |
43 | grn_obj data; |
44 | grn_token_status status; |
45 | }; |
46 | |
47 | typedef struct { |
48 | grn_obj *table; |
49 | const unsigned char *orig; |
50 | const unsigned char *curr; |
51 | uint32_t orig_blen; |
52 | uint32_t curr_size; |
53 | int32_t pos; |
54 | grn_tokenize_mode mode; |
55 | grn_token_cursor_status status; |
56 | grn_bool force_prefix; |
57 | grn_obj_flags table_flags; |
58 | grn_encoding encoding; |
59 | grn_obj *tokenizer; |
60 | grn_proc_ctx pctx; |
61 | struct { |
62 | grn_obj *objects; |
63 | void **data; |
64 | } token_filter; |
65 | uint32_t variant; |
66 | grn_obj *nstr; |
67 | } grn_token_cursor; |
68 | |
69 | #define GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER (0x01L<<0) |
70 | |
71 | GRN_API grn_token_cursor *grn_token_cursor_open(grn_ctx *ctx, grn_obj *table, |
72 | const char *str, size_t str_len, |
73 | grn_tokenize_mode mode, |
74 | unsigned int flags); |
75 | |
76 | GRN_API grn_id grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor); |
77 | GRN_API grn_rc grn_token_cursor_close(grn_ctx *ctx, grn_token_cursor *token_cursor); |
78 | |
79 | #ifdef __cplusplus |
80 | } |
81 | #endif |
82 | |