1/* -*- c-basic-offset: 2 -*- */
2/*
3 Copyright(C) 2009-2016 Brazil
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License version 2.1 as published by the Free Software Foundation.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19#include "../grn_proc.h"
20#include "../grn_expr.h"
21
22#include <groonga/plugin.h>
23#include <string.h>
24
25#define GRN_FUNC_SNIPPET_HTML_CACHE_NAME "$snippet_html"
26
27static grn_obj *
28snippet_exec(grn_ctx *ctx, grn_obj *snip, grn_obj *text,
29 grn_user_data *user_data,
30 const char *prefix, int prefix_length,
31 const char *suffix, int suffix_length)
32{
33 grn_rc rc;
34 unsigned int i, n_results, max_tagged_length;
35 grn_obj snippet_buffer;
36 grn_obj *snippets;
37
38 if (GRN_TEXT_LEN(text) == 0) {
39 return NULL;
40 }
41
42 rc = grn_snip_exec(ctx, snip,
43 GRN_TEXT_VALUE(text), GRN_TEXT_LEN(text),
44 &n_results, &max_tagged_length);
45 if (rc != GRN_SUCCESS) {
46 return NULL;
47 }
48
49 if (n_results == 0) {
50 return grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
51 }
52
53 snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_SHORT_TEXT, GRN_OBJ_VECTOR);
54 if (!snippets) {
55 return NULL;
56 }
57
58 GRN_TEXT_INIT(&snippet_buffer, 0);
59 grn_bulk_space(ctx, &snippet_buffer,
60 prefix_length + max_tagged_length + suffix_length);
61 for (i = 0; i < n_results; i++) {
62 unsigned int snippet_length;
63
64 GRN_BULK_REWIND(&snippet_buffer);
65 if (prefix_length) {
66 GRN_TEXT_PUT(ctx, &snippet_buffer, prefix, prefix_length);
67 }
68 rc = grn_snip_get_result(ctx, snip, i,
69 GRN_TEXT_VALUE(&snippet_buffer) + prefix_length,
70 &snippet_length);
71 if (rc == GRN_SUCCESS) {
72 grn_strncat(GRN_TEXT_VALUE(&snippet_buffer),
73 GRN_BULK_WSIZE(&snippet_buffer),
74 suffix,
75 suffix_length);
76 grn_vector_add_element(ctx, snippets,
77 GRN_TEXT_VALUE(&snippet_buffer),
78 prefix_length + snippet_length + suffix_length,
79 0, GRN_DB_SHORT_TEXT);
80 }
81 }
82 GRN_OBJ_FIN(ctx, &snippet_buffer);
83
84 return snippets;
85}
86
87/* TODO: support caching for the same parameter. */
88static grn_obj *
89func_snippet(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
90{
91 grn_obj *snippets = NULL;
92
93#define N_REQUIRED_ARGS 1
94#define KEYWORD_SET_SIZE 3
95 if (nargs > N_REQUIRED_ARGS) {
96 grn_obj *text = args[0];
97 grn_obj *end_arg = args[nargs - 1];
98 grn_obj *snip = NULL;
99 unsigned int width = 200;
100 unsigned int max_n_results = 3;
101 grn_snip_mapping *mapping = NULL;
102 int flags = GRN_SNIP_SKIP_LEADING_SPACES;
103 const char *prefix = NULL;
104 int prefix_length = 0;
105 const char *suffix = NULL;
106 int suffix_length = 0;
107 const char *normalizer_name = NULL;
108 int normalizer_name_length = 0;
109 const char *default_open_tag = NULL;
110 int default_open_tag_length = 0;
111 const char *default_close_tag = NULL;
112 int default_close_tag_length = 0;
113 int n_args_without_option = nargs;
114
115 if (end_arg->header.type == GRN_TABLE_HASH_KEY) {
116 grn_obj *options = end_arg;
117 grn_hash_cursor *cursor;
118 void *key;
119 int key_size;
120 grn_obj *value;
121
122 n_args_without_option--;
123 cursor = grn_hash_cursor_open(ctx, (grn_hash *)options,
124 NULL, 0, NULL, 0,
125 0, -1, 0);
126 if (!cursor) {
127 GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
128 "snippet(): couldn't open cursor");
129 goto exit;
130 }
131 while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) {
132 grn_hash_cursor_get_key_value(ctx, cursor,
133 &key, &key_size,
134 (void **)&value);
135 if (key_size == 5 && !memcmp(key, "width", 5)) {
136 width = GRN_UINT32_VALUE(value);
137 } else if (key_size == 13 && !memcmp(key, "max_n_results", 13)) {
138 max_n_results = GRN_UINT32_VALUE(value);
139 } else if (key_size == 19 && !memcmp(key, "skip_leading_spaces", 19)) {
140 if (GRN_BOOL_VALUE(value) == GRN_FALSE) {
141 flags &= ~GRN_SNIP_SKIP_LEADING_SPACES;
142 }
143 } else if (key_size == 11 && !memcmp(key, "html_escape", 11)) {
144 if (GRN_BOOL_VALUE(value)) {
145 mapping = GRN_SNIP_MAPPING_HTML_ESCAPE;
146 }
147 } else if (key_size == 6 && !memcmp(key, "prefix", 6)) {
148 prefix = GRN_TEXT_VALUE(value);
149 prefix_length = GRN_TEXT_LEN(value);
150 } else if (key_size == 6 && !memcmp(key, "suffix", 6)) {
151 suffix = GRN_TEXT_VALUE(value);
152 suffix_length = GRN_TEXT_LEN(value);
153 } else if (key_size == 10 && !memcmp(key, "normalizer", 10)) {
154 normalizer_name = GRN_TEXT_VALUE(value);
155 normalizer_name_length = GRN_TEXT_LEN(value);
156 } else if (key_size == 16 && !memcmp(key, "default_open_tag", 16)) {
157 default_open_tag = GRN_TEXT_VALUE(value);
158 default_open_tag_length = GRN_TEXT_LEN(value);
159 } else if (key_size == 17 && !memcmp(key, "default_close_tag", 17)) {
160 default_close_tag = GRN_TEXT_VALUE(value);
161 default_close_tag_length = GRN_TEXT_LEN(value);
162 } else {
163 GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
164 "invalid option name: <%.*s>",
165 key_size, (char *)key);
166 grn_hash_cursor_close(ctx, cursor);
167 goto exit;
168 }
169 }
170 grn_hash_cursor_close(ctx, cursor);
171 }
172
173 snip = grn_snip_open(ctx, flags, width, max_n_results,
174 default_open_tag, default_open_tag_length,
175 default_close_tag, default_close_tag_length, mapping);
176 if (snip) {
177 grn_rc rc;
178 unsigned int i;
179 if (!normalizer_name) {
180 grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO);
181 } else if (normalizer_name_length > 0) {
182 grn_obj *normalizer;
183 normalizer = grn_ctx_get(ctx, normalizer_name, normalizer_name_length);
184 if (!grn_obj_is_normalizer_proc(ctx, normalizer)) {
185 grn_obj inspected;
186 GRN_TEXT_INIT(&inspected, 0);
187 grn_inspect(ctx, &inspected, normalizer);
188 GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
189 "snippet(): not normalizer: <%.*s>",
190 (int)GRN_TEXT_LEN(&inspected),
191 GRN_TEXT_VALUE(&inspected));
192 GRN_OBJ_FIN(ctx, &inspected);
193 grn_obj_unlink(ctx, normalizer);
194 goto exit;
195 }
196 grn_snip_set_normalizer(ctx, snip, normalizer);
197 grn_obj_unlink(ctx, normalizer);
198 }
199 if (default_open_tag_length == 0 && default_close_tag_length == 0) {
200 unsigned int n_keyword_sets =
201 (n_args_without_option - N_REQUIRED_ARGS) / KEYWORD_SET_SIZE;
202 grn_obj **keyword_set_args = args + N_REQUIRED_ARGS;
203 for (i = 0; i < n_keyword_sets; i++) {
204 rc = grn_snip_add_cond(ctx, snip,
205 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE]),
206 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE]),
207 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 1]),
208 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 1]),
209 GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 2]),
210 GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 2]));
211 }
212 } else {
213 unsigned int n_keywords = n_args_without_option - N_REQUIRED_ARGS;
214 grn_obj **keyword_args = args + N_REQUIRED_ARGS;
215 for (i = 0; i < n_keywords; i++) {
216 rc = grn_snip_add_cond(ctx, snip,
217 GRN_TEXT_VALUE(keyword_args[i]),
218 GRN_TEXT_LEN(keyword_args[i]),
219 NULL, 0,
220 NULL, 0);
221 }
222 }
223 snippets = snippet_exec(ctx, snip, text, user_data,
224 prefix, prefix_length,
225 suffix, suffix_length);
226 }
227 }
228#undef KEYWORD_SET_SIZE
229#undef N_REQUIRED_ARGS
230
231exit :
232 if (!snippets) {
233 snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
234 }
235
236 return snippets;
237}
238
239void
240grn_proc_init_snippet(grn_ctx *ctx)
241{
242 grn_proc_create(ctx, "snippet", -1, GRN_PROC_FUNCTION,
243 func_snippet, NULL, NULL, 0, NULL);
244}
245
246static grn_obj *
247func_snippet_html(grn_ctx *ctx, int nargs, grn_obj **args,
248 grn_user_data *user_data)
249{
250 grn_obj *snippets = NULL;
251
252 /* TODO: support parameters */
253 if (nargs == 1) {
254 grn_obj *text = args[0];
255 grn_obj *expression = NULL;
256 grn_obj *condition_ptr = NULL;
257 grn_obj *condition = NULL;
258 grn_obj *snip = NULL;
259 int flags = GRN_SNIP_SKIP_LEADING_SPACES;
260 unsigned int width = 200;
261 unsigned int max_n_results = 3;
262 const char *open_tag = "<span class=\"keyword\">";
263 const char *close_tag = "</span>";
264 grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE;
265
266 grn_proc_get_info(ctx, user_data, NULL, NULL, &expression);
267 condition_ptr = grn_expr_get_var(ctx, expression,
268 GRN_SELECT_INTERNAL_VAR_CONDITION,
269 strlen(GRN_SELECT_INTERNAL_VAR_CONDITION));
270 if (condition_ptr) {
271 condition = GRN_PTR_VALUE(condition_ptr);
272 }
273
274 if (condition) {
275 grn_obj *snip_ptr;
276 snip_ptr = grn_expr_get_var(ctx, expression,
277 GRN_FUNC_SNIPPET_HTML_CACHE_NAME,
278 strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME));
279 if (snip_ptr) {
280 snip = GRN_PTR_VALUE(snip_ptr);
281 } else {
282 snip_ptr =
283 grn_expr_get_or_add_var(ctx, expression,
284 GRN_FUNC_SNIPPET_HTML_CACHE_NAME,
285 strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME));
286 GRN_OBJ_FIN(ctx, snip_ptr);
287 GRN_PTR_INIT(snip_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT);
288
289 snip = grn_snip_open(ctx, flags, width, max_n_results,
290 open_tag, strlen(open_tag),
291 close_tag, strlen(close_tag),
292 mapping);
293 if (snip) {
294 grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO);
295 grn_expr_snip_add_conditions(ctx, condition, snip,
296 0, NULL, NULL, NULL, NULL);
297 GRN_PTR_SET(ctx, snip_ptr, snip);
298 }
299 }
300 }
301
302 if (snip) {
303 snippets = snippet_exec(ctx, snip, text, user_data, NULL, 0, NULL, 0);
304 }
305 }
306
307 if (!snippets) {
308 snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
309 }
310
311 return snippets;
312}
313
314void
315grn_proc_init_snippet_html(grn_ctx *ctx)
316{
317 grn_proc_create(ctx, "snippet_html", -1, GRN_PROC_FUNCTION,
318 func_snippet_html, NULL, NULL, 0, NULL);
319}
320