1 | /* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ |
2 | /* Copyright(C) 2010-2014 Brazil |
3 | |
4 | This library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License version 2.1 as published by the Free Software Foundation. |
7 | |
8 | This library is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | Lesser General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU Lesser General Public |
14 | License along with this library; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
16 | */ |
17 | |
18 | #ifdef GRN_EMBEDDED |
19 | # define GRN_PLUGIN_FUNCTION_TAG suggest_suggest |
20 | #endif |
21 | |
22 | #include <string.h> |
23 | |
24 | #include "grn_ctx.h" |
25 | #include "grn_db.h" |
26 | #include "grn_ii.h" |
27 | #include "grn_token_cursor.h" |
28 | #include "grn_output.h" |
29 | #include <groonga/plugin.h> |
30 | |
31 | #define VAR GRN_PROC_GET_VAR_BY_OFFSET |
32 | #define CONST_STR_LEN(x) x, x ? sizeof(x) - 1 : 0 |
33 | #define TEXT_VALUE_LEN(x) GRN_TEXT_VALUE(x), GRN_TEXT_LEN(x) |
34 | |
35 | #define MIN_LEARN_DISTANCE (60 * GRN_TIME_USEC_PER_SEC) |
36 | |
37 | #define COMPLETE 1 |
38 | #define CORRECT 2 |
39 | #define SUGGEST 4 |
40 | |
41 | typedef enum { |
42 | GRN_SUGGEST_SEARCH_YES, |
43 | GRN_SUGGEST_SEARCH_NO, |
44 | GRN_SUGGEST_SEARCH_AUTO |
45 | } grn_suggest_search_mode; |
46 | |
47 | typedef struct { |
48 | grn_obj *post_event; |
49 | grn_obj *post_type; |
50 | grn_obj *post_item; |
51 | grn_obj *seq; |
52 | grn_obj *post_time; |
53 | grn_obj *pairs; |
54 | |
55 | int learn_distance_in_seconds; |
56 | |
57 | grn_id post_event_id; |
58 | grn_id post_type_id; |
59 | grn_id post_item_id; |
60 | grn_id seq_id; |
61 | int64_t post_time_value; |
62 | |
63 | grn_obj *seqs; |
64 | grn_obj *seqs_events; |
65 | grn_obj *events; |
66 | grn_obj *events_item; |
67 | grn_obj *events_type; |
68 | grn_obj *events_time; |
69 | grn_obj *event_types; |
70 | grn_obj *items; |
71 | grn_obj *items_freq; |
72 | grn_obj *items_freq2; |
73 | grn_obj *items_last; |
74 | grn_obj *pairs_pre; |
75 | grn_obj *pairs_post; |
76 | grn_obj *pairs_freq0; |
77 | grn_obj *pairs_freq1; |
78 | grn_obj *pairs_freq2; |
79 | |
80 | grn_obj dataset_name; |
81 | |
82 | grn_obj *configuration; |
83 | |
84 | grn_obj weight; |
85 | grn_obj pre_events; |
86 | |
87 | uint64_t key_prefix; |
88 | grn_obj pre_item; |
89 | } grn_suggest_learner; |
90 | |
91 | static int |
92 | grn_parse_suggest_types(grn_obj *text) |
93 | { |
94 | const char *nptr = GRN_TEXT_VALUE(text); |
95 | const char *end = GRN_BULK_CURR(text); |
96 | int types = 0; |
97 | while (nptr < end) { |
98 | if (*nptr == '|') { |
99 | nptr += 1; |
100 | continue; |
101 | } |
102 | { |
103 | const char string[] = "complete" ; |
104 | size_t length = sizeof(string) - 1; |
105 | if (nptr + length <= end && memcmp(nptr, string, length) == 0) { |
106 | types |= COMPLETE; |
107 | nptr += length; |
108 | continue; |
109 | } |
110 | } |
111 | { |
112 | const char string[] = "correct" ; |
113 | size_t length = sizeof(string) - 1; |
114 | if (nptr + length <= end && memcmp(nptr, string, length) == 0) { |
115 | types |= CORRECT; |
116 | nptr += length; |
117 | continue; |
118 | } |
119 | } |
120 | { |
121 | const char string[] = "suggest" ; |
122 | size_t length = sizeof(string) - 1; |
123 | if (nptr + length <= end && memcmp(nptr, string, length) == 0) { |
124 | types |= SUGGEST; |
125 | nptr += length; |
126 | continue; |
127 | } |
128 | } |
129 | break; |
130 | } |
131 | return types; |
132 | } |
133 | |
134 | static double |
135 | cooccurrence_search(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_id id, |
136 | grn_obj *res, int query_type, int frequency_threshold, |
137 | double conditional_probability_threshold) |
138 | { |
139 | double max_score = 0.0; |
140 | if (id) { |
141 | grn_ii_cursor *c; |
142 | grn_obj *co = grn_obj_column(ctx, items, CONST_STR_LEN("co" )); |
143 | grn_obj *pairs = grn_ctx_at(ctx, grn_obj_get_range(ctx, co)); |
144 | grn_obj *items_freq = grn_obj_column(ctx, items, CONST_STR_LEN("freq" )); |
145 | grn_obj *items_freq2 = grn_obj_column(ctx, items, CONST_STR_LEN("freq2" )); |
146 | grn_obj *pairs_freq, *pairs_post = grn_obj_column(ctx, pairs, CONST_STR_LEN("post" )); |
147 | switch (query_type) { |
148 | case COMPLETE : |
149 | pairs_freq = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq0" )); |
150 | break; |
151 | case CORRECT : |
152 | pairs_freq = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq1" )); |
153 | break; |
154 | case SUGGEST : |
155 | pairs_freq = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq2" )); |
156 | break; |
157 | default : |
158 | return max_score; |
159 | } |
160 | if ((c = grn_ii_cursor_open(ctx, (grn_ii *)co, id, GRN_ID_NIL, GRN_ID_MAX, |
161 | ((grn_ii *)co)->n_elements - 1, 0))) { |
162 | grn_posting *p; |
163 | grn_obj post, pair_freq, item_freq, item_freq2, item_boost; |
164 | GRN_RECORD_INIT(&post, 0, grn_obj_id(ctx, items)); |
165 | GRN_INT32_INIT(&pair_freq, 0); |
166 | GRN_INT32_INIT(&item_freq, 0); |
167 | GRN_INT32_INIT(&item_freq2, 0); |
168 | GRN_INT32_INIT(&item_boost, 0); |
169 | while ((p = grn_ii_cursor_next(ctx, c))) { |
170 | grn_id post_id; |
171 | int pfreq, ifreq, ifreq2, boost; |
172 | double conditional_probability; |
173 | GRN_BULK_REWIND(&post); |
174 | GRN_BULK_REWIND(&pair_freq); |
175 | GRN_BULK_REWIND(&item_freq); |
176 | GRN_BULK_REWIND(&item_freq2); |
177 | GRN_BULK_REWIND(&item_boost); |
178 | grn_obj_get_value(ctx, pairs_post, p->rid, &post); |
179 | grn_obj_get_value(ctx, pairs_freq, p->rid, &pair_freq); |
180 | post_id = GRN_RECORD_VALUE(&post); |
181 | grn_obj_get_value(ctx, items_freq, post_id, &item_freq); |
182 | grn_obj_get_value(ctx, items_freq2, post_id, &item_freq2); |
183 | grn_obj_get_value(ctx, items_boost, post_id, &item_boost); |
184 | pfreq = GRN_INT32_VALUE(&pair_freq); |
185 | ifreq = GRN_INT32_VALUE(&item_freq); |
186 | ifreq2 = GRN_INT32_VALUE(&item_freq2); |
187 | if (ifreq2 > 0) { |
188 | conditional_probability = (double)pfreq / (double)ifreq2; |
189 | } else { |
190 | conditional_probability = 0.0; |
191 | } |
192 | boost = GRN_INT32_VALUE(&item_boost); |
193 | if (pfreq >= frequency_threshold && ifreq >= frequency_threshold && |
194 | conditional_probability >= conditional_probability_threshold && |
195 | boost >= 0) { |
196 | grn_rset_recinfo *ri; |
197 | void *value; |
198 | double score = pfreq; |
199 | int added; |
200 | if (max_score < score + boost) { max_score = score + boost; } |
201 | /* put any formula if desired */ |
202 | if (grn_hash_add(ctx, (grn_hash *)res, |
203 | &post_id, sizeof(grn_id), &value, &added)) { |
204 | ri = value; |
205 | ri->score += score; |
206 | if (added) { |
207 | ri->score += boost; |
208 | } |
209 | } |
210 | } |
211 | } |
212 | GRN_OBJ_FIN(ctx, &post); |
213 | GRN_OBJ_FIN(ctx, &pair_freq); |
214 | GRN_OBJ_FIN(ctx, &item_freq); |
215 | GRN_OBJ_FIN(ctx, &item_freq2); |
216 | GRN_OBJ_FIN(ctx, &item_boost); |
217 | grn_ii_cursor_close(ctx, c); |
218 | } |
219 | } |
220 | return max_score; |
221 | } |
222 | |
223 | #define DEFAULT_LIMIT 10 |
224 | #define DEFAULT_SORTBY "-_score" |
225 | #define DEFAULT_OUTPUT_COLUMNS "_key,_score" |
226 | #define DEFAULT_FREQUENCY_THRESHOLD 100 |
227 | #define DEFAULT_CONDITIONAL_PROBABILITY_THRESHOLD 0.2 |
228 | |
229 | static void |
230 | output(grn_ctx *ctx, grn_obj *table, grn_obj *res, grn_id tid, |
231 | grn_obj *sortby, grn_obj *output_columns, int offset, int limit) |
232 | { |
233 | grn_obj *sorted; |
234 | if ((sorted = grn_table_create(ctx, NULL, 0, NULL, GRN_OBJ_TABLE_NO_KEY, NULL, res))) { |
235 | uint32_t nkeys; |
236 | grn_obj_format format; |
237 | grn_table_sort_key *keys; |
238 | const char *sortby_val = GRN_TEXT_VALUE(sortby); |
239 | unsigned int sortby_len = GRN_TEXT_LEN(sortby); |
240 | const char *oc_val = GRN_TEXT_VALUE(output_columns); |
241 | unsigned int oc_len = GRN_TEXT_LEN(output_columns); |
242 | if (!sortby_val || !sortby_len) { |
243 | sortby_val = DEFAULT_SORTBY; |
244 | sortby_len = sizeof(DEFAULT_SORTBY) - 1; |
245 | } |
246 | if (!oc_val || !oc_len) { |
247 | oc_val = DEFAULT_OUTPUT_COLUMNS; |
248 | oc_len = sizeof(DEFAULT_OUTPUT_COLUMNS) - 1; |
249 | } |
250 | if ((keys = grn_table_sort_key_from_str(ctx, sortby_val, sortby_len, res, &nkeys))) { |
251 | grn_table_sort(ctx, res, offset, limit, sorted, keys, nkeys); |
252 | GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SIZE, |
253 | ":" , "sort(%d)" , limit); |
254 | GRN_OBJ_FORMAT_INIT(&format, grn_table_size(ctx, res), 0, limit, offset); |
255 | format.flags = |
256 | GRN_OBJ_FORMAT_WITH_COLUMN_NAMES| |
257 | GRN_OBJ_FORMAT_XML_ELEMENT_RESULTSET; |
258 | grn_obj_columns(ctx, sorted, oc_val, oc_len, &format.columns); |
259 | GRN_OUTPUT_OBJ(sorted, &format); |
260 | GRN_OBJ_FORMAT_FIN(ctx, &format); |
261 | grn_table_sort_key_close(ctx, keys, nkeys); |
262 | } |
263 | grn_obj_unlink(ctx, sorted); |
264 | } else { |
265 | ERR(GRN_UNKNOWN_ERROR, "cannot create temporary sort table." ); |
266 | } |
267 | } |
268 | |
269 | static inline void |
270 | complete_add_item(grn_ctx *ctx, grn_id id, grn_obj *res, int frequency_threshold, |
271 | grn_obj *items_freq, grn_obj *items_boost, |
272 | grn_obj *item_freq, grn_obj *item_boost) |
273 | { |
274 | GRN_BULK_REWIND(item_freq); |
275 | GRN_BULK_REWIND(item_boost); |
276 | grn_obj_get_value(ctx, items_freq, id, item_freq); |
277 | grn_obj_get_value(ctx, items_boost, id, item_boost); |
278 | if (GRN_INT32_VALUE(item_boost) >= 0) { |
279 | double score; |
280 | score = 1 + |
281 | GRN_INT32_VALUE(item_freq) + |
282 | GRN_INT32_VALUE(item_boost); |
283 | if (score >= frequency_threshold) { |
284 | void *value; |
285 | if (grn_hash_add(ctx, (grn_hash *)res, &id, sizeof(grn_id), |
286 | &value, NULL)) { |
287 | grn_rset_recinfo *ri; |
288 | ri = value; |
289 | ri->score += score; |
290 | } |
291 | } |
292 | } |
293 | } |
294 | |
295 | static void |
296 | complete(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_obj *col, |
297 | grn_obj *query, grn_obj *sortby, |
298 | grn_obj *output_columns, int offset, int limit, |
299 | int frequency_threshold, double conditional_probability_threshold, |
300 | grn_suggest_search_mode prefix_search_mode) |
301 | { |
302 | grn_obj *res; |
303 | grn_obj *items_freq = grn_obj_column(ctx, items, CONST_STR_LEN("freq" )); |
304 | grn_obj item_freq, item_boost; |
305 | GRN_INT32_INIT(&item_freq, 0); |
306 | GRN_INT32_INIT(&item_boost, 0); |
307 | if ((res = grn_table_create(ctx, NULL, 0, NULL, |
308 | GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) { |
309 | grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query)); |
310 | if (GRN_TEXT_LEN(query)) { |
311 | grn_table_cursor *cur; |
312 | /* RK search + prefix search */ |
313 | grn_obj *index; |
314 | /* FIXME: support index selection */ |
315 | if (grn_column_index(ctx, col, GRN_OP_PREFIX, &index, 1, NULL)) { |
316 | if ((cur = grn_table_cursor_open(ctx, grn_ctx_at(ctx, index->header.domain), |
317 | GRN_TEXT_VALUE(query), |
318 | GRN_TEXT_LEN(query), |
319 | NULL, 0, 0, -1, |
320 | GRN_CURSOR_PREFIX|GRN_CURSOR_RK))) { |
321 | grn_id id; |
322 | while ((id = grn_table_cursor_next(ctx, cur))) { |
323 | grn_ii_cursor *icur; |
324 | if ((icur = grn_ii_cursor_open(ctx, (grn_ii *)index, id, |
325 | GRN_ID_NIL, GRN_ID_MAX, 1, 0))) { |
326 | grn_posting *p; |
327 | while ((p = grn_ii_cursor_next(ctx, icur))) { |
328 | complete_add_item(ctx, p->rid, res, frequency_threshold, |
329 | items_freq, items_boost, |
330 | &item_freq, &item_boost); |
331 | } |
332 | grn_ii_cursor_close(ctx, icur); |
333 | } |
334 | } |
335 | grn_table_cursor_close(ctx, cur); |
336 | } else { |
337 | ERR(GRN_UNKNOWN_ERROR, "cannot open cursor for prefix RK search." ); |
338 | } |
339 | } else { |
340 | ERR(GRN_UNKNOWN_ERROR, "cannot find index for prefix RK search." ); |
341 | } |
342 | cooccurrence_search(ctx, items, items_boost, tid, res, COMPLETE, |
343 | frequency_threshold, |
344 | conditional_probability_threshold); |
345 | if (((prefix_search_mode == GRN_SUGGEST_SEARCH_YES) || |
346 | (prefix_search_mode == GRN_SUGGEST_SEARCH_AUTO && |
347 | !grn_table_size(ctx, res))) && |
348 | (cur = grn_table_cursor_open(ctx, items, |
349 | GRN_TEXT_VALUE(query), |
350 | GRN_TEXT_LEN(query), |
351 | NULL, 0, 0, -1, GRN_CURSOR_PREFIX))) { |
352 | grn_id id; |
353 | while ((id = grn_table_cursor_next(ctx, cur))) { |
354 | complete_add_item(ctx, id, res, frequency_threshold, |
355 | items_freq, items_boost, &item_freq, &item_boost); |
356 | } |
357 | grn_table_cursor_close(ctx, cur); |
358 | } |
359 | } |
360 | output(ctx, items, res, tid, sortby, output_columns, offset, limit); |
361 | grn_obj_close(ctx, res); |
362 | } else { |
363 | ERR(GRN_UNKNOWN_ERROR, "cannot create temporary table." ); |
364 | } |
365 | GRN_OBJ_FIN(ctx, &item_boost); |
366 | GRN_OBJ_FIN(ctx, &item_freq); |
367 | } |
368 | |
369 | static void |
370 | correct(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, |
371 | grn_obj *query, grn_obj *sortby, |
372 | grn_obj *output_columns, int offset, int limit, |
373 | int frequency_threshold, double conditional_probability_threshold, |
374 | grn_suggest_search_mode similar_search_mode) |
375 | { |
376 | grn_obj *res; |
377 | grn_obj *items_freq2 = grn_obj_column(ctx, items, CONST_STR_LEN("freq2" )); |
378 | grn_obj item_freq2, item_boost; |
379 | GRN_INT32_INIT(&item_freq2, 0); |
380 | GRN_INT32_INIT(&item_boost, 0); |
381 | if ((res = grn_table_create(ctx, NULL, 0, NULL, |
382 | GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) { |
383 | grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query)); |
384 | double max_score; |
385 | max_score = cooccurrence_search(ctx, items, items_boost, tid, res, CORRECT, |
386 | frequency_threshold, |
387 | conditional_probability_threshold); |
388 | GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SCORE, |
389 | ":" , "cooccur(%f)" , max_score); |
390 | if (GRN_TEXT_LEN(query) && |
391 | ((similar_search_mode == GRN_SUGGEST_SEARCH_YES) || |
392 | (similar_search_mode == GRN_SUGGEST_SEARCH_AUTO && |
393 | max_score < frequency_threshold))) { |
394 | grn_obj *key, *index; |
395 | if ((key = grn_obj_column(ctx, items, |
396 | GRN_COLUMN_NAME_KEY, |
397 | GRN_COLUMN_NAME_KEY_LEN))) { |
398 | if (grn_column_index(ctx, key, GRN_OP_MATCH, &index, 1, NULL)) { |
399 | grn_select_optarg optarg; |
400 | memset(&optarg, 0, sizeof(grn_select_optarg)); |
401 | optarg.mode = GRN_OP_SIMILAR; |
402 | optarg.similarity_threshold = 0; |
403 | optarg.max_size = 2; |
404 | grn_ii_select(ctx, (grn_ii *)index, TEXT_VALUE_LEN(query), |
405 | (grn_hash *)res, GRN_OP_OR, &optarg); |
406 | grn_obj_unlink(ctx, index); |
407 | GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SIZE, |
408 | ":" , "similar(%d)" , grn_table_size(ctx, res)); |
409 | { |
410 | grn_hash_cursor *hc = grn_hash_cursor_open(ctx, (grn_hash *)res, NULL, |
411 | 0, NULL, 0, 0, -1, 0); |
412 | if (hc) { |
413 | while (grn_hash_cursor_next(ctx, hc)) { |
414 | void *key, *value; |
415 | if (grn_hash_cursor_get_key_value(ctx, hc, &key, NULL, &value)) { |
416 | grn_id *rp; |
417 | rp = key; |
418 | GRN_BULK_REWIND(&item_freq2); |
419 | GRN_BULK_REWIND(&item_boost); |
420 | grn_obj_get_value(ctx, items_freq2, *rp, &item_freq2); |
421 | grn_obj_get_value(ctx, items_boost, *rp, &item_boost); |
422 | if (GRN_INT32_VALUE(&item_boost) >= 0) { |
423 | double score; |
424 | grn_rset_recinfo *ri; |
425 | score = 1 + |
426 | (GRN_INT32_VALUE(&item_freq2) >> 4) + |
427 | GRN_INT32_VALUE(&item_boost); |
428 | ri = value; |
429 | ri->score += score; |
430 | if (score >= frequency_threshold) { continue; } |
431 | } |
432 | /* score < frequency_threshold || item_boost < 0 */ |
433 | grn_hash_cursor_delete(ctx, hc, NULL); |
434 | } |
435 | } |
436 | grn_hash_cursor_close(ctx, hc); |
437 | } |
438 | } |
439 | GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SIZE, |
440 | ":" , "filter(%d)" , grn_table_size(ctx, res)); |
441 | { |
442 | /* exec _score -= edit_distance(_key, "query string") for all records */ |
443 | grn_obj *var; |
444 | grn_obj *expr; |
445 | |
446 | GRN_EXPR_CREATE_FOR_QUERY(ctx, res, expr, var); |
447 | if (expr) { |
448 | grn_table_cursor *tc; |
449 | grn_obj *score = grn_obj_column(ctx, res, |
450 | GRN_COLUMN_NAME_SCORE, |
451 | GRN_COLUMN_NAME_SCORE_LEN); |
452 | grn_obj *key = grn_obj_column(ctx, res, |
453 | GRN_COLUMN_NAME_KEY, |
454 | GRN_COLUMN_NAME_KEY_LEN); |
455 | grn_expr_append_obj(ctx, expr, |
456 | score, |
457 | GRN_OP_GET_VALUE, 1); |
458 | grn_expr_append_obj(ctx, expr, |
459 | grn_ctx_get(ctx, CONST_STR_LEN("edit_distance" )), |
460 | GRN_OP_PUSH, 1); |
461 | grn_expr_append_obj(ctx, expr, |
462 | key, |
463 | GRN_OP_GET_VALUE, 1); |
464 | grn_expr_append_const(ctx, expr, query, GRN_OP_PUSH, 1); |
465 | grn_expr_append_op(ctx, expr, GRN_OP_CALL, 2); |
466 | grn_expr_append_op(ctx, expr, GRN_OP_MINUS_ASSIGN, 2); |
467 | |
468 | if ((tc = grn_table_cursor_open(ctx, res, NULL, 0, NULL, 0, 0, -1, 0))) { |
469 | grn_id id; |
470 | grn_obj score_value; |
471 | GRN_FLOAT_INIT(&score_value, 0); |
472 | while ((id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) { |
473 | GRN_RECORD_SET(ctx, var, id); |
474 | grn_expr_exec(ctx, expr, 0); |
475 | GRN_BULK_REWIND(&score_value); |
476 | grn_obj_get_value(ctx, score, id, &score_value); |
477 | if (GRN_FLOAT_VALUE(&score_value) < frequency_threshold) { |
478 | grn_table_cursor_delete(ctx, tc); |
479 | } |
480 | } |
481 | grn_obj_unlink(ctx, &score_value); |
482 | grn_table_cursor_close(ctx, tc); |
483 | } |
484 | grn_obj_unlink(ctx, score); |
485 | grn_obj_unlink(ctx, key); |
486 | grn_obj_unlink(ctx, expr); |
487 | } else { |
488 | ERR(GRN_UNKNOWN_ERROR, |
489 | "error on building expr. for calicurating edit distance" ); |
490 | } |
491 | } |
492 | } |
493 | grn_obj_unlink(ctx, key); |
494 | } |
495 | } |
496 | output(ctx, items, res, tid, sortby, output_columns, offset, limit); |
497 | grn_obj_close(ctx, res); |
498 | } else { |
499 | ERR(GRN_UNKNOWN_ERROR, "cannot create temporary table." ); |
500 | } |
501 | GRN_OBJ_FIN(ctx, &item_boost); |
502 | GRN_OBJ_FIN(ctx, &item_freq2); |
503 | } |
504 | |
505 | static void |
506 | suggest(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, |
507 | grn_obj *query, grn_obj *sortby, |
508 | grn_obj *output_columns, int offset, int limit, |
509 | int frequency_threshold, double conditional_probability_threshold) |
510 | { |
511 | grn_obj *res; |
512 | if ((res = grn_table_create(ctx, NULL, 0, NULL, |
513 | GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) { |
514 | grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query)); |
515 | cooccurrence_search(ctx, items, items_boost, tid, res, SUGGEST, |
516 | frequency_threshold, conditional_probability_threshold); |
517 | output(ctx, items, res, tid, sortby, output_columns, offset, limit); |
518 | grn_obj_close(ctx, res); |
519 | } else { |
520 | ERR(GRN_UNKNOWN_ERROR, "cannot create temporary table." ); |
521 | } |
522 | } |
523 | |
524 | static grn_suggest_search_mode |
525 | parse_search_mode(grn_ctx *ctx, grn_obj *mode_text) |
526 | { |
527 | grn_suggest_search_mode mode; |
528 | int mode_length; |
529 | |
530 | mode_length = GRN_TEXT_LEN(mode_text); |
531 | if (mode_length == 3 && |
532 | grn_strncasecmp("yes" , GRN_TEXT_VALUE(mode_text), 3) == 0) { |
533 | mode = GRN_SUGGEST_SEARCH_YES; |
534 | } else if (mode_length == 2 && |
535 | grn_strncasecmp("no" , GRN_TEXT_VALUE(mode_text), 2) == 0) { |
536 | mode = GRN_SUGGEST_SEARCH_NO; |
537 | } else { |
538 | mode = GRN_SUGGEST_SEARCH_AUTO; |
539 | } |
540 | |
541 | return mode; |
542 | } |
543 | |
544 | static grn_obj * |
545 | command_suggest(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) |
546 | { |
547 | grn_obj *items, *col, *items_boost; |
548 | int types; |
549 | int offset = 0; |
550 | int limit = DEFAULT_LIMIT; |
551 | int frequency_threshold = DEFAULT_FREQUENCY_THRESHOLD; |
552 | double conditional_probability_threshold = |
553 | DEFAULT_CONDITIONAL_PROBABILITY_THRESHOLD; |
554 | grn_suggest_search_mode prefix_search_mode; |
555 | grn_suggest_search_mode similar_search_mode; |
556 | |
557 | types = grn_parse_suggest_types(VAR(0)); |
558 | if (GRN_TEXT_LEN(VAR(6)) > 0) { |
559 | offset = grn_atoi(GRN_TEXT_VALUE(VAR(6)), GRN_BULK_CURR(VAR(6)), NULL); |
560 | } |
561 | if (GRN_TEXT_LEN(VAR(7)) > 0) { |
562 | limit = grn_atoi(GRN_TEXT_VALUE(VAR(7)), GRN_BULK_CURR(VAR(7)), NULL); |
563 | } |
564 | if (GRN_TEXT_LEN(VAR(8)) > 0) { |
565 | frequency_threshold = grn_atoi(GRN_TEXT_VALUE(VAR(8)), GRN_BULK_CURR(VAR(8)), NULL); |
566 | } |
567 | if (GRN_TEXT_LEN(VAR(9)) > 0) { |
568 | GRN_TEXT_PUTC(ctx, VAR(9), '\0'); |
569 | conditional_probability_threshold = strtod(GRN_TEXT_VALUE(VAR(9)), NULL); |
570 | } |
571 | |
572 | prefix_search_mode = parse_search_mode(ctx, VAR(10)); |
573 | similar_search_mode = parse_search_mode(ctx, VAR(11)); |
574 | |
575 | if ((items = grn_ctx_get(ctx, TEXT_VALUE_LEN(VAR(1))))) { |
576 | if ((items_boost = grn_obj_column(ctx, items, CONST_STR_LEN("boost" )))) { |
577 | int n_outputs = 0; |
578 | if (types & COMPLETE) { |
579 | n_outputs++; |
580 | } |
581 | if (types & CORRECT) { |
582 | n_outputs++; |
583 | } |
584 | if (types & SUGGEST) { |
585 | n_outputs++; |
586 | } |
587 | GRN_OUTPUT_MAP_OPEN("RESULT_SET" , n_outputs); |
588 | |
589 | if (types & COMPLETE) { |
590 | if ((col = grn_obj_column(ctx, items, TEXT_VALUE_LEN(VAR(2))))) { |
591 | GRN_OUTPUT_CSTR("complete" ); |
592 | complete(ctx, items, items_boost, col, VAR(3), VAR(4), |
593 | VAR(5), offset, limit, |
594 | frequency_threshold, conditional_probability_threshold, |
595 | prefix_search_mode); |
596 | } else { |
597 | ERR(GRN_INVALID_ARGUMENT, "invalid column." ); |
598 | } |
599 | } |
600 | if (types & CORRECT) { |
601 | GRN_OUTPUT_CSTR("correct" ); |
602 | correct(ctx, items, items_boost, VAR(3), VAR(4), |
603 | VAR(5), offset, limit, |
604 | frequency_threshold, conditional_probability_threshold, |
605 | similar_search_mode); |
606 | } |
607 | if (types & SUGGEST) { |
608 | GRN_OUTPUT_CSTR("suggest" ); |
609 | suggest(ctx, items, items_boost, VAR(3), VAR(4), |
610 | VAR(5), offset, limit, |
611 | frequency_threshold, conditional_probability_threshold); |
612 | } |
613 | GRN_OUTPUT_MAP_CLOSE(); |
614 | } else { |
615 | ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s.boost>" , |
616 | (int)GRN_TEXT_LEN(VAR(1)), GRN_TEXT_VALUE(VAR(1))); |
617 | } |
618 | grn_obj_unlink(ctx, items); |
619 | } else { |
620 | ERR(GRN_INVALID_ARGUMENT, "nonexistent table: <%.*s>" , |
621 | (int)GRN_TEXT_LEN(VAR(1)), GRN_TEXT_VALUE(VAR(1))); |
622 | } |
623 | return NULL; |
624 | } |
625 | |
626 | static void |
627 | learner_init_values(grn_ctx *ctx, grn_suggest_learner *learner) |
628 | { |
629 | learner->post_event_id = GRN_RECORD_VALUE(learner->post_event); |
630 | learner->post_type_id = GRN_RECORD_VALUE(learner->post_type); |
631 | learner->post_item_id = GRN_RECORD_VALUE(learner->post_item); |
632 | learner->seq_id = GRN_RECORD_VALUE(learner->seq); |
633 | learner->post_time_value = GRN_TIME_VALUE(learner->post_time); |
634 | } |
635 | |
636 | static void |
637 | learner_init(grn_ctx *ctx, grn_suggest_learner *learner, |
638 | grn_obj *post_event, grn_obj *post_type, grn_obj *post_item, |
639 | grn_obj *seq, grn_obj *post_time, grn_obj *pairs) |
640 | { |
641 | learner->post_event = post_event; |
642 | learner->post_type = post_type; |
643 | learner->post_item = post_item; |
644 | learner->seq = seq; |
645 | learner->post_time = post_time; |
646 | learner->pairs = pairs; |
647 | |
648 | learner->learn_distance_in_seconds = 0; |
649 | |
650 | learner_init_values(ctx, learner); |
651 | } |
652 | |
653 | static void |
654 | learner_init_columns(grn_ctx *ctx, grn_suggest_learner *learner) |
655 | { |
656 | grn_id events_id, event_types_id; |
657 | grn_obj *seqs, *events, *post_item, *items, *pairs; |
658 | |
659 | learner->seqs = seqs = grn_ctx_at(ctx, GRN_OBJ_GET_DOMAIN(learner->seq)); |
660 | learner->seqs_events = grn_obj_column(ctx, seqs, CONST_STR_LEN("events" )); |
661 | |
662 | events_id = grn_obj_get_range(ctx, learner->seqs_events); |
663 | learner->events = events = grn_ctx_at(ctx, events_id); |
664 | learner->events_item = grn_obj_column(ctx, events, CONST_STR_LEN("item" )); |
665 | learner->events_type = grn_obj_column(ctx, events, CONST_STR_LEN("type" )); |
666 | learner->events_time = grn_obj_column(ctx, events, CONST_STR_LEN("time" )); |
667 | |
668 | event_types_id = grn_obj_get_range(ctx, learner->events_type); |
669 | learner->event_types = grn_obj_column(ctx, events, CONST_STR_LEN("time" )); |
670 | |
671 | post_item = learner->post_item; |
672 | learner->items = items = grn_ctx_at(ctx, GRN_OBJ_GET_DOMAIN(post_item)); |
673 | learner->items_freq = grn_obj_column(ctx, items, CONST_STR_LEN("freq" )); |
674 | learner->items_freq2 = grn_obj_column(ctx, items, CONST_STR_LEN("freq2" )); |
675 | learner->items_last = grn_obj_column(ctx, items, CONST_STR_LEN("last" )); |
676 | |
677 | pairs = learner->pairs; |
678 | learner->pairs_pre = grn_obj_column(ctx, pairs, CONST_STR_LEN("pre" )); |
679 | learner->pairs_post = grn_obj_column(ctx, pairs, CONST_STR_LEN("post" )); |
680 | learner->pairs_freq0 = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq0" )); |
681 | learner->pairs_freq1 = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq1" )); |
682 | learner->pairs_freq2 = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq2" )); |
683 | } |
684 | |
685 | static void |
686 | learner_fin_columns(grn_ctx *ctx, grn_suggest_learner *learner) |
687 | { |
688 | grn_obj_unlink(ctx, learner->seqs); |
689 | grn_obj_unlink(ctx, learner->seqs_events); |
690 | |
691 | grn_obj_unlink(ctx, learner->events); |
692 | grn_obj_unlink(ctx, learner->events_item); |
693 | grn_obj_unlink(ctx, learner->events_type); |
694 | grn_obj_unlink(ctx, learner->events_time); |
695 | |
696 | grn_obj_unlink(ctx, learner->event_types); |
697 | |
698 | grn_obj_unlink(ctx, learner->items); |
699 | grn_obj_unlink(ctx, learner->items_freq); |
700 | grn_obj_unlink(ctx, learner->items_freq2); |
701 | grn_obj_unlink(ctx, learner->items_last); |
702 | |
703 | grn_obj_unlink(ctx, learner->pairs_pre); |
704 | grn_obj_unlink(ctx, learner->pairs_post); |
705 | grn_obj_unlink(ctx, learner->pairs_freq0); |
706 | grn_obj_unlink(ctx, learner->pairs_freq1); |
707 | grn_obj_unlink(ctx, learner->pairs_freq2); |
708 | } |
709 | |
710 | static void |
711 | learner_init_weight(grn_ctx *ctx, grn_suggest_learner *learner) |
712 | { |
713 | grn_obj *weight_column = NULL; |
714 | unsigned int weight = 1; |
715 | |
716 | if (learner->configuration) { |
717 | weight_column = grn_obj_column(ctx, |
718 | learner->configuration, |
719 | CONST_STR_LEN("weight" )); |
720 | } |
721 | if (weight_column) { |
722 | grn_id id; |
723 | id = grn_table_get(ctx, learner->configuration, |
724 | GRN_TEXT_VALUE(&(learner->dataset_name)), |
725 | GRN_TEXT_LEN(&(learner->dataset_name))); |
726 | if (id != GRN_ID_NIL) { |
727 | grn_obj weight_value; |
728 | GRN_UINT32_INIT(&weight_value, 0); |
729 | grn_obj_get_value(ctx, weight_column, id, &weight_value); |
730 | weight = GRN_UINT32_VALUE(&weight_value); |
731 | GRN_OBJ_FIN(ctx, &weight_value); |
732 | } |
733 | grn_obj_unlink(ctx, weight_column); |
734 | } |
735 | |
736 | GRN_UINT32_INIT(&(learner->weight), 0); |
737 | GRN_UINT32_SET(ctx, &(learner->weight), weight); |
738 | } |
739 | |
740 | static void |
741 | learner_init_dataset_name(grn_ctx *ctx, grn_suggest_learner *learner) |
742 | { |
743 | char events_name[GRN_TABLE_MAX_KEY_SIZE]; |
744 | unsigned int events_name_size; |
745 | unsigned int events_name_prefix_size; |
746 | |
747 | events_name_size = grn_obj_name(ctx, learner->events, |
748 | events_name, GRN_TABLE_MAX_KEY_SIZE); |
749 | GRN_TEXT_INIT(&(learner->dataset_name), 0); |
750 | events_name_prefix_size = strlen("event_" ); |
751 | if (events_name_size > events_name_prefix_size) { |
752 | GRN_TEXT_PUT(ctx, |
753 | &(learner->dataset_name), |
754 | events_name + events_name_prefix_size, |
755 | events_name_size - events_name_prefix_size); |
756 | } |
757 | } |
758 | |
759 | static void |
760 | learner_fin_dataset_name(grn_ctx *ctx, grn_suggest_learner *learner) |
761 | { |
762 | GRN_OBJ_FIN(ctx, &(learner->dataset_name)); |
763 | } |
764 | |
765 | static void |
766 | learner_init_configuration(grn_ctx *ctx, grn_suggest_learner *learner) |
767 | { |
768 | learner->configuration = grn_ctx_get(ctx, "configuration" , -1); |
769 | } |
770 | |
771 | static void |
772 | learner_fin_configuration(grn_ctx *ctx, grn_suggest_learner *learner) |
773 | { |
774 | if (learner->configuration) { |
775 | grn_obj_unlink(ctx, learner->configuration); |
776 | } |
777 | } |
778 | |
779 | static void |
780 | learner_init_buffers(grn_ctx *ctx, grn_suggest_learner *learner) |
781 | { |
782 | learner_init_weight(ctx, learner); |
783 | GRN_RECORD_INIT(&(learner->pre_events), 0, grn_obj_id(ctx, learner->events)); |
784 | } |
785 | |
786 | static void |
787 | learner_fin_buffers(grn_ctx *ctx, grn_suggest_learner *learner) |
788 | { |
789 | grn_obj_unlink(ctx, &(learner->weight)); |
790 | grn_obj_unlink(ctx, &(learner->pre_events)); |
791 | } |
792 | |
793 | static void |
794 | learner_init_submit_learn(grn_ctx *ctx, grn_suggest_learner *learner) |
795 | { |
796 | grn_id items_id; |
797 | |
798 | learner->key_prefix = ((uint64_t)learner->post_item_id) << 32; |
799 | |
800 | items_id = grn_obj_get_range(ctx, learner->events_item); |
801 | GRN_RECORD_INIT(&(learner->pre_item), 0, items_id); |
802 | |
803 | grn_obj_get_value(ctx, learner->seqs_events, learner->seq_id, |
804 | &(learner->pre_events)); |
805 | } |
806 | |
807 | static void |
808 | learner_fin_submit_learn(grn_ctx *ctx, grn_suggest_learner *learner) |
809 | { |
810 | grn_obj_unlink(ctx, &(learner->pre_item)); |
811 | GRN_BULK_REWIND(&(learner->pre_events)); |
812 | } |
813 | |
814 | static grn_bool |
815 | learner_is_valid_input(grn_ctx *ctx, grn_suggest_learner *learner) |
816 | { |
817 | return learner->post_event_id && learner->post_item_id && learner->seq_id; |
818 | } |
819 | |
820 | static void |
821 | learner_increment(grn_ctx *ctx, grn_suggest_learner *learner, |
822 | grn_obj *column, grn_id record_id) |
823 | { |
824 | grn_obj_set_value(ctx, column, record_id, &(learner->weight), GRN_OBJ_INCR); |
825 | } |
826 | |
827 | static void |
828 | learner_increment_item_freq(grn_ctx *ctx, grn_suggest_learner *learner, |
829 | grn_obj *column) |
830 | { |
831 | learner_increment(ctx, learner, column, learner->post_item_id); |
832 | } |
833 | |
834 | static void |
835 | learner_set_last_post_time(grn_ctx *ctx, grn_suggest_learner *learner) |
836 | { |
837 | grn_obj_set_value(ctx, learner->items_last, learner->post_item_id, |
838 | learner->post_time, GRN_OBJ_SET); |
839 | } |
840 | |
841 | static void |
842 | learner_learn_for_complete_and_correcnt(grn_ctx *ctx, |
843 | grn_suggest_learner *learner) |
844 | { |
845 | grn_obj *pre_item, *post_item, *pre_events; |
846 | grn_obj pre_type, pre_time; |
847 | grn_id *ep, *es; |
848 | uint64_t key; |
849 | int64_t post_time_value; |
850 | |
851 | pre_item = &(learner->pre_item); |
852 | post_item = learner->post_item; |
853 | pre_events = &(learner->pre_events); |
854 | post_time_value = learner->post_time_value; |
855 | GRN_RECORD_INIT(&pre_type, 0, grn_obj_get_range(ctx, learner->events_type)); |
856 | GRN_TIME_INIT(&pre_time, 0); |
857 | ep = (grn_id *)GRN_BULK_CURR(pre_events); |
858 | es = (grn_id *)GRN_BULK_HEAD(pre_events); |
859 | while (es < ep--) { |
860 | grn_id pair_id; |
861 | int added; |
862 | int64_t learn_distance; |
863 | |
864 | GRN_BULK_REWIND(&pre_type); |
865 | GRN_BULK_REWIND(&pre_time); |
866 | GRN_BULK_REWIND(pre_item); |
867 | grn_obj_get_value(ctx, learner->events_type, *ep, &pre_type); |
868 | grn_obj_get_value(ctx, learner->events_time, *ep, &pre_time); |
869 | grn_obj_get_value(ctx, learner->events_item, *ep, pre_item); |
870 | learn_distance = post_time_value - GRN_TIME_VALUE(&pre_time); |
871 | if (learn_distance >= MIN_LEARN_DISTANCE) { |
872 | learner->learn_distance_in_seconds = |
873 | (int)(learn_distance / GRN_TIME_USEC_PER_SEC); |
874 | break; |
875 | } |
876 | key = learner->key_prefix + GRN_RECORD_VALUE(pre_item); |
877 | pair_id = grn_table_add(ctx, learner->pairs, &key, sizeof(uint64_t), |
878 | &added); |
879 | if (added) { |
880 | grn_obj_set_value(ctx, learner->pairs_pre, pair_id, pre_item, |
881 | GRN_OBJ_SET); |
882 | grn_obj_set_value(ctx, learner->pairs_post, pair_id, post_item, |
883 | GRN_OBJ_SET); |
884 | } |
885 | if (GRN_RECORD_VALUE(&pre_type)) { |
886 | learner_increment(ctx, learner, learner->pairs_freq1, pair_id); |
887 | break; |
888 | } else { |
889 | learner_increment(ctx, learner, learner->pairs_freq0, pair_id); |
890 | } |
891 | } |
892 | GRN_OBJ_FIN(ctx, &pre_type); |
893 | GRN_OBJ_FIN(ctx, &pre_time); |
894 | } |
895 | |
896 | static void |
897 | learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner) |
898 | { |
899 | char keybuf[GRN_TABLE_MAX_KEY_SIZE]; |
900 | int keylen = grn_table_get_key(ctx, learner->items, learner->post_item_id, |
901 | keybuf, GRN_TABLE_MAX_KEY_SIZE); |
902 | unsigned int token_flags = 0; |
903 | grn_token_cursor *token_cursor = |
904 | grn_token_cursor_open(ctx, learner->items, keybuf, keylen, |
905 | GRN_TOKEN_ADD, token_flags); |
906 | if (token_cursor) { |
907 | grn_id tid; |
908 | grn_obj *pre_item = &(learner->pre_item); |
909 | grn_obj *post_item = learner->post_item; |
910 | grn_hash *token_ids = NULL; |
911 | while ((tid = grn_token_cursor_next(ctx, token_cursor)) && tid != learner->post_item_id) { |
912 | uint64_t key; |
913 | int added; |
914 | grn_id pair_id; |
915 | key = learner->key_prefix + tid; |
916 | pair_id = grn_table_add(ctx, learner->pairs, &key, sizeof(uint64_t), |
917 | &added); |
918 | if (added) { |
919 | GRN_RECORD_SET(ctx, pre_item, tid); |
920 | grn_obj_set_value(ctx, learner->pairs_pre, pair_id, |
921 | pre_item, GRN_OBJ_SET); |
922 | grn_obj_set_value(ctx, learner->pairs_post, pair_id, |
923 | post_item, GRN_OBJ_SET); |
924 | } |
925 | if (!token_ids) { |
926 | token_ids = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, |
927 | GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY); |
928 | } |
929 | if (token_ids) { |
930 | int token_added; |
931 | grn_hash_add(ctx, token_ids, &tid, sizeof(grn_id), NULL, &token_added); |
932 | if (token_added) { |
933 | learner_increment(ctx, learner, learner->pairs_freq2, pair_id); |
934 | } |
935 | } |
936 | } |
937 | if (token_ids) { |
938 | grn_hash_close(ctx, token_ids); |
939 | } |
940 | grn_token_cursor_close(ctx, token_cursor); |
941 | } |
942 | } |
943 | |
944 | static void |
945 | learner_append_post_event(grn_ctx *ctx, grn_suggest_learner *learner) |
946 | { |
947 | GRN_RECORD_SET(ctx, &(learner->pre_events), learner->post_event_id); |
948 | grn_obj_set_value(ctx, learner->seqs_events, learner->seq_id, |
949 | &(learner->pre_events), GRN_OBJ_APPEND); |
950 | } |
951 | |
952 | static void |
953 | learner_learn(grn_ctx *ctx, grn_suggest_learner *learner) |
954 | { |
955 | if (learner_is_valid_input(ctx, learner)) { |
956 | learner_init_columns(ctx, learner); |
957 | learner_init_dataset_name(ctx, learner); |
958 | learner_init_configuration(ctx, learner); |
959 | learner_init_buffers(ctx, learner); |
960 | learner_increment_item_freq(ctx, learner, learner->items_freq); |
961 | learner_set_last_post_time(ctx, learner); |
962 | if (learner->post_type_id) { |
963 | learner_init_submit_learn(ctx, learner); |
964 | learner_increment_item_freq(ctx, learner, learner->items_freq2); |
965 | learner_learn_for_complete_and_correcnt(ctx, learner); |
966 | learner_learn_for_suggest(ctx, learner); |
967 | learner_fin_submit_learn(ctx, learner); |
968 | } |
969 | learner_append_post_event(ctx, learner); |
970 | learner_fin_buffers(ctx, learner); |
971 | learner_fin_configuration(ctx, learner); |
972 | learner_fin_dataset_name(ctx, learner); |
973 | learner_fin_columns(ctx, learner); |
974 | } |
975 | } |
976 | |
977 | static grn_obj * |
978 | func_suggest_preparer(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) |
979 | { |
980 | int learn_distance_in_seconds = 0; |
981 | grn_obj *obj; |
982 | if (nargs == 6) { |
983 | grn_obj *post_event = args[0]; |
984 | grn_obj *post_type = args[1]; |
985 | grn_obj *post_item = args[2]; |
986 | grn_obj *seq = args[3]; |
987 | grn_obj *post_time = args[4]; |
988 | grn_obj *pairs = args[5]; |
989 | grn_suggest_learner learner; |
990 | learner_init(ctx, &learner, |
991 | post_event, post_type, post_item, seq, post_time, pairs); |
992 | learner_learn(ctx, &learner); |
993 | learn_distance_in_seconds = learner.learn_distance_in_seconds; |
994 | } |
995 | if ((obj = GRN_PROC_ALLOC(GRN_DB_UINT32, 0))) { |
996 | GRN_UINT32_SET(ctx, obj, learn_distance_in_seconds); |
997 | } |
998 | return obj; |
999 | } |
1000 | |
1001 | grn_rc |
1002 | GRN_PLUGIN_INIT(grn_ctx *ctx) |
1003 | { |
1004 | return GRN_SUCCESS; |
1005 | } |
1006 | |
1007 | grn_rc |
1008 | GRN_PLUGIN_REGISTER(grn_ctx *ctx) |
1009 | { |
1010 | grn_expr_var vars[12]; |
1011 | |
1012 | grn_plugin_expr_var_init(ctx, &vars[0], "types" , -1); |
1013 | grn_plugin_expr_var_init(ctx, &vars[1], "table" , -1); |
1014 | grn_plugin_expr_var_init(ctx, &vars[2], "column" , -1); |
1015 | grn_plugin_expr_var_init(ctx, &vars[3], "query" , -1); |
1016 | grn_plugin_expr_var_init(ctx, &vars[4], "sortby" , -1); |
1017 | grn_plugin_expr_var_init(ctx, &vars[5], "output_columns" , -1); |
1018 | grn_plugin_expr_var_init(ctx, &vars[6], "offset" , -1); |
1019 | grn_plugin_expr_var_init(ctx, &vars[7], "limit" , -1); |
1020 | grn_plugin_expr_var_init(ctx, &vars[8], "frequency_threshold" , -1); |
1021 | grn_plugin_expr_var_init(ctx, &vars[9], "conditional_probability_threshold" , -1); |
1022 | grn_plugin_expr_var_init(ctx, &vars[10], "prefix_search" , -1); |
1023 | grn_plugin_expr_var_init(ctx, &vars[11], "similar_search" , -1); |
1024 | grn_plugin_command_create(ctx, "suggest" , -1, command_suggest, 12, vars); |
1025 | |
1026 | grn_proc_create(ctx, CONST_STR_LEN("suggest_preparer" ), GRN_PROC_FUNCTION, |
1027 | func_suggest_preparer, NULL, NULL, 0, NULL); |
1028 | return ctx->rc; |
1029 | } |
1030 | |
1031 | grn_rc |
1032 | GRN_PLUGIN_FIN(grn_ctx *ctx) |
1033 | { |
1034 | return GRN_SUCCESS; |
1035 | } |
1036 | |