1/* -*- c-basic-offset: 2 -*- */
2/*
3 Copyright(C) 2009-2016 Brazil
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License version 2.1 as published by the Free Software Foundation.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19#pragma once
20
21#include "grn_ctx.h"
22#include "grn_db.h"
23
24#include <groonga/tokenizer.h>
25
26#ifdef __cplusplus
27extern "C" {
28#endif
29
30#define GRN_TOKENIZER_BEGIN_MARK_UTF8 "\xEF\xBF\xAF"
31#define GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN 3
32#define GRN_TOKENIZER_END_MARK_UTF8 "\xEF\xBF\xB0"
33#define GRN_TOKENIZER_END_MARK_UTF8_LEN 3
34
35typedef enum {
36 GRN_TOKEN_CURSOR_DOING = 0,
37 GRN_TOKEN_CURSOR_DONE,
38 GRN_TOKEN_CURSOR_DONE_SKIP,
39 GRN_TOKEN_CURSOR_NOT_FOUND
40} grn_token_cursor_status;
41
42struct _grn_token {
43 grn_obj data;
44 grn_token_status status;
45};
46
47typedef struct {
48 grn_obj *table;
49 const unsigned char *orig;
50 const unsigned char *curr;
51 uint32_t orig_blen;
52 uint32_t curr_size;
53 int32_t pos;
54 grn_tokenize_mode mode;
55 grn_token_cursor_status status;
56 grn_bool force_prefix;
57 grn_obj_flags table_flags;
58 grn_encoding encoding;
59 grn_obj *tokenizer;
60 grn_proc_ctx pctx;
61 struct {
62 grn_obj *objects;
63 void **data;
64 } token_filter;
65 uint32_t variant;
66 grn_obj *nstr;
67} grn_token_cursor;
68
69#define GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER (0x01L<<0)
70
71GRN_API grn_token_cursor *grn_token_cursor_open(grn_ctx *ctx, grn_obj *table,
72 const char *str, size_t str_len,
73 grn_tokenize_mode mode,
74 unsigned int flags);
75
76GRN_API grn_id grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor);
77GRN_API grn_rc grn_token_cursor_close(grn_ctx *ctx, grn_token_cursor *token_cursor);
78
79#ifdef __cplusplus
80}
81#endif
82