1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2009-2016 Brazil |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License version 2.1 as published by the Free Software Foundation. |
8 | |
9 | This library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with this library; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | #pragma once |
20 | |
21 | /* "ii" is for inverted index */ |
22 | |
23 | #include "grn.h" |
24 | #include "grn_hash.h" |
25 | #include "grn_io.h" |
26 | #include "grn_store.h" |
27 | |
28 | #ifdef __cplusplus |
29 | extern "C" { |
30 | #endif |
31 | |
32 | struct _grn_ii { |
33 | grn_db_obj obj; |
34 | grn_io *seg; /* I/O for a variety of segments */ |
35 | grn_io *chunk; /* I/O for posting chunks */ |
36 | grn_obj *lexicon; /* Lexicon table */ |
37 | grn_table_flags lflags; |
38 | grn_encoding encoding; /* Character encoding */ |
39 | /* This member is used for matching */ |
40 | uint32_t n_elements; /* Number of elements in postings */ |
41 | /* rid, [sid], tf, [weight] and [pos] */ |
42 | struct grn_ii_header *; |
43 | }; |
44 | |
45 | /* BGQ is buffer garbage queue? */ |
46 | #define GRN_II_BGQSIZE 16 |
47 | #define GRN_II_MAX_LSEG 0x10000 |
48 | #define GRN_II_W_TOTAL_CHUNK 40 |
49 | #define GRN_II_W_CHUNK 22 |
50 | #define GRN_II_W_LEAST_CHUNK (GRN_II_W_TOTAL_CHUNK - 32) |
51 | #define GRN_II_MAX_CHUNK (1 << (GRN_II_W_TOTAL_CHUNK - GRN_II_W_CHUNK)) |
52 | #define GRN_II_N_CHUNK_VARIATION (GRN_II_W_CHUNK - GRN_II_W_LEAST_CHUNK) |
53 | |
54 | #define GRN_II_MAX_CHUNK_SMALL (1 << (GRN_II_W_TOTAL_CHUNK - GRN_II_W_CHUNK - 8)) |
55 | /* GRN_II_MAX_CHUNK_MEDIUM has enough space for the following source: |
56 | * * Single source. |
57 | * * Source is a fixed size column or _key of a table. |
58 | * * Source column is a scalar column. |
59 | * * Lexicon doesn't have tokenizer. |
60 | */ |
61 | #define GRN_II_MAX_CHUNK_MEDIUM (1 << (GRN_II_W_TOTAL_CHUNK - GRN_II_W_CHUNK - 4)) |
62 | |
63 | #define GRN_II_PSEG_NOT_ASSIGNED 0xffffffff |
64 | |
65 | struct { |
66 | uint64_t ; |
67 | uint64_t ; |
68 | uint32_t ; |
69 | uint32_t ; |
70 | uint32_t ; |
71 | uint32_t ; |
72 | uint32_t ; |
73 | uint32_t ; |
74 | uint32_t ; |
75 | uint32_t ; |
76 | uint32_t bgqbody[GRN_II_BGQSIZE]; |
77 | uint32_t [288]; |
78 | uint32_t [GRN_II_MAX_LSEG]; /* array info */ |
79 | uint32_t [GRN_II_MAX_LSEG]; /* buffer info */ |
80 | uint32_t [GRN_II_N_CHUNK_VARIATION + 1]; |
81 | uint32_t [GRN_II_N_CHUNK_VARIATION + 1]; |
82 | uint32_t [GRN_II_N_CHUNK_VARIATION + 1]; |
83 | uint8_t [GRN_II_MAX_CHUNK >> 3]; |
84 | }; |
85 | |
86 | struct _grn_ii_pos { |
87 | struct _grn_ii_pos *next; |
88 | uint32_t pos; |
89 | }; |
90 | |
91 | struct _grn_ii_updspec { |
92 | uint32_t rid; |
93 | uint32_t sid; |
94 | int32_t weight; |
95 | int32_t tf; /* number of postings successfully stored to index */ |
96 | int32_t atf; /* actual number of postings */ |
97 | int32_t offset; |
98 | struct _grn_ii_pos *pos; |
99 | struct _grn_ii_pos *tail; |
100 | /* grn_vgram_vnode *vnodes; */ |
101 | }; |
102 | |
103 | typedef struct _grn_ii_updspec grn_ii_updspec; |
104 | |
105 | void grn_ii_init_from_env(void); |
106 | |
107 | GRN_API grn_ii *grn_ii_create(grn_ctx *ctx, const char *path, grn_obj *lexicon, |
108 | uint32_t flags); |
109 | GRN_API grn_ii *grn_ii_open(grn_ctx *ctx, const char *path, grn_obj *lexicon); |
110 | GRN_API grn_rc grn_ii_close(grn_ctx *ctx, grn_ii *ii); |
111 | GRN_API grn_rc grn_ii_remove(grn_ctx *ctx, const char *path); |
112 | grn_rc grn_ii_info(grn_ctx *ctx, grn_ii *ii, uint64_t *seg_size, uint64_t *chunk_size); |
113 | grn_column_flags grn_ii_get_flags(grn_ctx *ctx, grn_ii *ii); |
114 | grn_rc grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, uint32_t key, grn_ii_updspec *u, |
115 | grn_hash *h); |
116 | grn_rc grn_ii_delete_one(grn_ctx *ctx, grn_ii *ii, uint32_t key, grn_ii_updspec *u, |
117 | grn_hash *h); |
118 | grn_ii_updspec *grn_ii_updspec_open(grn_ctx *ctx, uint32_t rid, uint32_t sid); |
119 | grn_rc grn_ii_updspec_close(grn_ctx *ctx, grn_ii_updspec *u); |
120 | grn_rc grn_ii_updspec_add(grn_ctx *ctx, grn_ii_updspec *u, int pos, int32_t weight); |
121 | int grn_ii_updspec_cmp(grn_ii_updspec *a, grn_ii_updspec *b); |
122 | |
123 | void grn_ii_expire(grn_ctx *ctx, grn_ii *ii); |
124 | grn_rc grn_ii_flush(grn_ctx *ctx, grn_ii *ii); |
125 | size_t grn_ii_get_disk_usage(grn_ctx *ctx, grn_ii *ii); |
126 | |
127 | grn_ii_cursor *grn_ii_cursor_openv1(grn_ii *ii, uint32_t key); |
128 | grn_rc grn_ii_cursor_openv2(grn_ii_cursor **cursors, int ncursors); |
129 | |
130 | uint32_t grn_ii_max_section(grn_ii *ii); |
131 | |
132 | const char *grn_ii_path(grn_ii *ii); |
133 | grn_obj *grn_ii_lexicon(grn_ii *ii); |
134 | |
135 | /* |
136 | grn_rc grn_ii_upd(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, |
137 | const char *oldvalue, unsigned int oldvalue_len, |
138 | const char *newvalue, unsigned int newvalue_len); |
139 | grn_rc grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, |
140 | unsigned int section, |
141 | grn_values *oldvalues, grn_values *newvalues); |
142 | */ |
143 | |
144 | typedef struct _grn_select_optarg grn_select_optarg; |
145 | |
146 | struct _grn_select_optarg { |
147 | grn_operator mode; |
148 | int similarity_threshold; |
149 | int max_interval; |
150 | int *weight_vector; |
151 | int vector_size; |
152 | int (*func)(grn_ctx *, grn_hash *, const void *, int, void *); |
153 | void *func_arg; |
154 | int max_size; |
155 | grn_obj *scorer; |
156 | grn_obj *scorer_args_expr; |
157 | unsigned int scorer_args_expr_offset; |
158 | grn_fuzzy_search_optarg fuzzy; |
159 | grn_match_info *match_info; |
160 | }; |
161 | |
162 | GRN_API grn_rc grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id id, |
163 | unsigned int section, grn_obj *oldvalue, |
164 | grn_obj *newvalue, grn_obj *posting); |
165 | grn_rc (grn_ctx *ctx, grn_ii *ii, const char *string, |
166 | unsigned int string_len, grn_hash *s, |
167 | grn_operator op, grn_select_optarg *optarg); |
168 | grn_rc grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len, |
169 | grn_hash *s, grn_operator op, grn_select_optarg *optarg); |
170 | GRN_API grn_rc grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len, |
171 | grn_hash *s, grn_operator op, grn_select_optarg *optarg); |
172 | grn_rc grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len, |
173 | grn_hash *s, grn_operator op, grn_search_optarg *optarg); |
174 | |
175 | void grn_ii_resolve_sel_and(grn_ctx *ctx, grn_hash *s, grn_operator op); |
176 | |
177 | grn_rc grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator op); |
178 | |
179 | void grn_ii_inspect_values(grn_ctx *ctx, grn_ii *ii, grn_obj *buf); |
180 | void grn_ii_cursor_inspect(grn_ctx *ctx, grn_ii_cursor *c, grn_obj *buf); |
181 | |
182 | grn_rc grn_ii_truncate(grn_ctx *ctx, grn_ii *ii); |
183 | grn_rc grn_ii_build(grn_ctx *ctx, grn_ii *ii, uint64_t sparsity); |
184 | |
185 | typedef struct grn_ii_builder_options grn_ii_builder_options; |
186 | |
187 | grn_rc grn_ii_build2(grn_ctx *ctx, grn_ii *ii, |
188 | const grn_ii_builder_options *options); |
189 | |
190 | #ifdef __cplusplus |
191 | } |
192 | #endif |
193 | |