| 1 | /* -*- c-basic-offset: 2 -*- */ |
| 2 | /* |
| 3 | Copyright(C) 2009-2016 Brazil |
| 4 | |
| 5 | This library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License version 2.1 as published by the Free Software Foundation. |
| 8 | |
| 9 | This library is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | Lesser General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU Lesser General Public |
| 15 | License along with this library; if not, write to the Free Software |
| 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | */ |
| 18 | |
| 19 | #pragma once |
| 20 | |
| 21 | #include "grn_ctx.h" |
| 22 | #include "grn_db.h" |
| 23 | |
| 24 | #include <groonga/tokenizer.h> |
| 25 | |
| 26 | #ifdef __cplusplus |
| 27 | extern "C" { |
| 28 | #endif |
| 29 | |
| 30 | #define GRN_TOKENIZER_BEGIN_MARK_UTF8 "\xEF\xBF\xAF" |
| 31 | #define GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN 3 |
| 32 | #define GRN_TOKENIZER_END_MARK_UTF8 "\xEF\xBF\xB0" |
| 33 | #define GRN_TOKENIZER_END_MARK_UTF8_LEN 3 |
| 34 | |
| 35 | typedef enum { |
| 36 | GRN_TOKEN_CURSOR_DOING = 0, |
| 37 | GRN_TOKEN_CURSOR_DONE, |
| 38 | GRN_TOKEN_CURSOR_DONE_SKIP, |
| 39 | GRN_TOKEN_CURSOR_NOT_FOUND |
| 40 | } grn_token_cursor_status; |
| 41 | |
| 42 | struct _grn_token { |
| 43 | grn_obj data; |
| 44 | grn_token_status status; |
| 45 | }; |
| 46 | |
| 47 | typedef struct { |
| 48 | grn_obj *table; |
| 49 | const unsigned char *orig; |
| 50 | const unsigned char *curr; |
| 51 | uint32_t orig_blen; |
| 52 | uint32_t curr_size; |
| 53 | int32_t pos; |
| 54 | grn_tokenize_mode mode; |
| 55 | grn_token_cursor_status status; |
| 56 | grn_bool force_prefix; |
| 57 | grn_obj_flags table_flags; |
| 58 | grn_encoding encoding; |
| 59 | grn_obj *tokenizer; |
| 60 | grn_proc_ctx pctx; |
| 61 | struct { |
| 62 | grn_obj *objects; |
| 63 | void **data; |
| 64 | } token_filter; |
| 65 | uint32_t variant; |
| 66 | grn_obj *nstr; |
| 67 | } grn_token_cursor; |
| 68 | |
| 69 | #define GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER (0x01L<<0) |
| 70 | |
| 71 | GRN_API grn_token_cursor *grn_token_cursor_open(grn_ctx *ctx, grn_obj *table, |
| 72 | const char *str, size_t str_len, |
| 73 | grn_tokenize_mode mode, |
| 74 | unsigned int flags); |
| 75 | |
| 76 | GRN_API grn_id grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor); |
| 77 | GRN_API grn_rc grn_token_cursor_close(grn_ctx *ctx, grn_token_cursor *token_cursor); |
| 78 | |
| 79 | #ifdef __cplusplus |
| 80 | } |
| 81 | #endif |
| 82 | |