1/* -*- c-basic-offset: 2 -*- */
2/*
3 Copyright(C) 2017 Brazil
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License version 2.1 as published by the Free Software Foundation.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19#ifdef GRN_EMBEDDED
20# define GRN_PLUGIN_FUNCTION_TAG functions_time
21#endif
22
23#include <groonga/plugin.h>
24
25static grn_rc
26selector_index_column_df_ratio_between(grn_ctx *ctx,
27 grn_obj *table,
28 grn_obj *index,
29 int n_args,
30 grn_obj **args,
31 grn_obj *res,
32 grn_operator op)
33{
34 grn_rc rc = GRN_SUCCESS;
35 grn_obj *index_column;
36 grn_ii *ii;
37 double min;
38 double max;
39 grn_obj *source_table;
40 unsigned int n_documents;
41 grn_posting posting;
42
43 if ((n_args - 1) != 3) {
44 GRN_PLUGIN_ERROR(ctx,
45 GRN_INVALID_ARGUMENT,
46 "index_column_df_ratio_between(): "
47 "wrong number of arguments (%d for 3)", n_args - 1);
48 rc = ctx->rc;
49 goto exit;
50 }
51
52 index_column = args[1];
53 ii = (grn_ii *)index_column;
54 min = GRN_FLOAT_VALUE(args[2]);
55 max = GRN_FLOAT_VALUE(args[3]);
56
57 source_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index_column));
58 n_documents = grn_table_size(ctx, source_table);
59 memset(&posting, 0, sizeof(grn_posting));
60 posting.sid = 1;
61
62 if (op == GRN_OP_AND) {
63 GRN_TABLE_EACH_BEGIN(ctx, res, cursor, record_id) {
64 void *key;
65 grn_id term_id;
66 uint32_t n_match_documents;
67 double df_ratio;
68
69 grn_table_cursor_get_key(ctx, cursor, &key);
70 term_id = *(grn_id *)key;
71 n_match_documents = grn_ii_estimate_size(ctx, ii, term_id);
72 if (n_match_documents > n_documents) {
73 n_match_documents = n_documents;
74 }
75 df_ratio = (double)n_match_documents / (double)n_documents;
76 if (min <= df_ratio && df_ratio <= max) {
77 posting.rid = term_id;
78 grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op);
79 }
80 } GRN_TABLE_EACH_END(ctx, cursor);
81 grn_ii_resolve_sel_and(ctx, (grn_hash *)res, op);
82 } else {
83 GRN_TABLE_EACH_BEGIN(ctx, table, cursor, term_id) {
84 uint32_t n_match_documents;
85 double df_ratio;
86
87 n_match_documents = grn_ii_estimate_size(ctx, ii, term_id);
88 if (n_match_documents > n_documents) {
89 n_match_documents = n_documents;
90 }
91 df_ratio = (double)n_match_documents / (double)n_documents;
92 {
93 void *key;
94 int key_size;
95 key_size = grn_table_cursor_get_key(ctx, cursor, &key);
96 }
97 if (min <= df_ratio && df_ratio <= max) {
98 posting.rid = term_id;
99 grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op);
100 }
101 } GRN_TABLE_EACH_END(ctx, cursor);
102 }
103
104exit :
105 return rc;
106}
107
108static grn_obj *
109func_index_column_df_ratio(grn_ctx *ctx,
110 int n_args,
111 grn_obj **args,
112 grn_user_data *user_data)
113{
114 grn_obj *term_table;
115 grn_obj *index_column_name;
116 grn_obj *index_column;
117 grn_ii *ii;
118 grn_id term_id;
119
120 if (n_args != 1) {
121 GRN_PLUGIN_ERROR(ctx,
122 GRN_INVALID_ARGUMENT,
123 "index_column_df_ratio(): "
124 "wrong number of arguments (%d for 1)", n_args - 1);
125 return NULL;
126 }
127
128 {
129 grn_obj *expr;
130 grn_obj *variable;
131
132 expr = grn_plugin_proc_get_caller(ctx, user_data);
133 if (!expr) {
134 GRN_PLUGIN_ERROR(ctx,
135 GRN_INVALID_ARGUMENT,
136 "index_column_df_ratio(): "
137 "called directly");
138 return NULL;
139 }
140
141 variable = grn_expr_get_var_by_offset(ctx, expr, 0);
142 if (!variable) {
143 GRN_PLUGIN_ERROR(ctx,
144 GRN_INVALID_ARGUMENT,
145 "index_column_df_ratio(): "
146 "caller expression must have target record information");
147 return NULL;
148 }
149
150 term_table = grn_ctx_at(ctx, variable->header.domain);
151 term_id = GRN_RECORD_VALUE(variable);
152 while (GRN_TRUE) {
153 grn_obj *key_type;
154
155 key_type = grn_ctx_at(ctx, term_table->header.domain);
156 if (!grn_obj_is_table(ctx, key_type)) {
157 break;
158 }
159
160 grn_table_get_key(ctx, term_table, term_id, &term_id, sizeof(grn_id));
161 term_table = key_type;
162 }
163 }
164
165 index_column_name = args[0];
166 if (!grn_obj_is_text_family_bulk(ctx, index_column_name)) {
167 grn_obj inspected;
168 GRN_TEXT_INIT(&inspected, 0);
169 grn_inspect(ctx, &inspected, index_column_name);
170 GRN_PLUGIN_ERROR(ctx,
171 GRN_INVALID_ARGUMENT,
172 "index_column_df_ratio(): "
173 "the first argument must be index column name: %.*s",
174 (int)GRN_TEXT_LEN(&inspected),
175 GRN_TEXT_VALUE(&inspected));
176 GRN_OBJ_FIN(ctx, &inspected);
177 return NULL;
178 }
179
180 index_column = grn_obj_column(ctx,
181 term_table,
182 GRN_TEXT_VALUE(index_column_name),
183 GRN_TEXT_LEN(index_column_name));
184 if (!index_column) {
185 GRN_PLUGIN_ERROR(ctx,
186 GRN_INVALID_ARGUMENT,
187 "index_column_df_ratio(): "
188 "nonexistent object: <%.*s>",
189 (int)GRN_TEXT_LEN(index_column_name),
190 GRN_TEXT_VALUE(index_column_name));
191 return NULL;
192 }
193
194 if (!grn_obj_is_index_column(ctx, index_column)) {
195 grn_obj inspected;
196 GRN_TEXT_INIT(&inspected, 0);
197 grn_inspect(ctx, &inspected, index_column);
198 GRN_PLUGIN_ERROR(ctx,
199 GRN_INVALID_ARGUMENT,
200 "index_column_df_ratio(): "
201 "the first argument must be index column: %.*s",
202 (int)GRN_TEXT_LEN(&inspected),
203 GRN_TEXT_VALUE(&inspected));
204 GRN_OBJ_FIN(ctx, &inspected);
205 if (grn_obj_is_accessor(ctx, index_column)) {
206 grn_obj_unlink(ctx, index_column);
207 }
208 return NULL;
209 }
210
211 ii = (grn_ii *)index_column;
212
213 {
214 grn_obj *source_table;
215 unsigned int n_documents;
216 uint32_t n_match_documents;
217 double df_ratio;
218 grn_obj *df_ratio_value;
219
220 source_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index_column));
221 n_documents = grn_table_size(ctx, source_table);
222 n_match_documents = grn_ii_estimate_size(ctx, ii, term_id);
223 if (n_match_documents > n_documents) {
224 n_match_documents = n_documents;
225 }
226 df_ratio = (double)n_match_documents / (double)n_documents;
227
228 df_ratio_value = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_FLOAT, 0);
229 if (!df_ratio_value) {
230 return NULL;
231 }
232 GRN_FLOAT_SET(ctx, df_ratio_value, df_ratio);
233 return df_ratio_value;
234 }
235}
236
237grn_rc
238GRN_PLUGIN_INIT(grn_ctx *ctx)
239{
240 return ctx->rc;
241}
242
243grn_rc
244GRN_PLUGIN_REGISTER(grn_ctx *ctx)
245{
246 grn_obj *selector_proc;
247
248 selector_proc = grn_proc_create(ctx, "index_column_df_ratio_between", -1,
249 GRN_PROC_FUNCTION,
250 NULL, NULL, NULL, 0, NULL);
251 grn_proc_set_selector(ctx, selector_proc,
252 selector_index_column_df_ratio_between);
253 grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_NOP);
254
255 grn_proc_create(ctx, "index_column_df_ratio", -1,
256 GRN_PROC_FUNCTION,
257 func_index_column_df_ratio, NULL, NULL, 0, NULL);
258
259 return ctx->rc;
260}
261
262grn_rc
263GRN_PLUGIN_FIN(grn_ctx *ctx)
264{
265 return GRN_SUCCESS;
266}
267