1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2009-2016 Brazil |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License version 2.1 as published by the Free Software Foundation. |
8 | |
9 | This library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with this library; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | #include "../grn_proc.h" |
20 | #include "../grn_expr.h" |
21 | |
22 | #include <groonga/plugin.h> |
23 | #include <string.h> |
24 | |
25 | #define GRN_FUNC_SNIPPET_HTML_CACHE_NAME "$snippet_html" |
26 | |
27 | static grn_obj * |
28 | snippet_exec(grn_ctx *ctx, grn_obj *snip, grn_obj *text, |
29 | grn_user_data *user_data, |
30 | const char *prefix, int prefix_length, |
31 | const char *suffix, int suffix_length) |
32 | { |
33 | grn_rc rc; |
34 | unsigned int i, n_results, max_tagged_length; |
35 | grn_obj snippet_buffer; |
36 | grn_obj *snippets; |
37 | |
38 | if (GRN_TEXT_LEN(text) == 0) { |
39 | return NULL; |
40 | } |
41 | |
42 | rc = grn_snip_exec(ctx, snip, |
43 | GRN_TEXT_VALUE(text), GRN_TEXT_LEN(text), |
44 | &n_results, &max_tagged_length); |
45 | if (rc != GRN_SUCCESS) { |
46 | return NULL; |
47 | } |
48 | |
49 | if (n_results == 0) { |
50 | return grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); |
51 | } |
52 | |
53 | snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_SHORT_TEXT, GRN_OBJ_VECTOR); |
54 | if (!snippets) { |
55 | return NULL; |
56 | } |
57 | |
58 | GRN_TEXT_INIT(&snippet_buffer, 0); |
59 | grn_bulk_space(ctx, &snippet_buffer, |
60 | prefix_length + max_tagged_length + suffix_length); |
61 | for (i = 0; i < n_results; i++) { |
62 | unsigned int snippet_length; |
63 | |
64 | GRN_BULK_REWIND(&snippet_buffer); |
65 | if (prefix_length) { |
66 | GRN_TEXT_PUT(ctx, &snippet_buffer, prefix, prefix_length); |
67 | } |
68 | rc = grn_snip_get_result(ctx, snip, i, |
69 | GRN_TEXT_VALUE(&snippet_buffer) + prefix_length, |
70 | &snippet_length); |
71 | if (rc == GRN_SUCCESS) { |
72 | grn_strncat(GRN_TEXT_VALUE(&snippet_buffer), |
73 | GRN_BULK_WSIZE(&snippet_buffer), |
74 | suffix, |
75 | suffix_length); |
76 | grn_vector_add_element(ctx, snippets, |
77 | GRN_TEXT_VALUE(&snippet_buffer), |
78 | prefix_length + snippet_length + suffix_length, |
79 | 0, GRN_DB_SHORT_TEXT); |
80 | } |
81 | } |
82 | GRN_OBJ_FIN(ctx, &snippet_buffer); |
83 | |
84 | return snippets; |
85 | } |
86 | |
87 | /* TODO: support caching for the same parameter. */ |
88 | static grn_obj * |
89 | func_snippet(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) |
90 | { |
91 | grn_obj *snippets = NULL; |
92 | |
93 | #define N_REQUIRED_ARGS 1 |
94 | #define KEYWORD_SET_SIZE 3 |
95 | if (nargs > N_REQUIRED_ARGS) { |
96 | grn_obj *text = args[0]; |
97 | grn_obj *end_arg = args[nargs - 1]; |
98 | grn_obj *snip = NULL; |
99 | unsigned int width = 200; |
100 | unsigned int max_n_results = 3; |
101 | grn_snip_mapping *mapping = NULL; |
102 | int flags = GRN_SNIP_SKIP_LEADING_SPACES; |
103 | const char *prefix = NULL; |
104 | int prefix_length = 0; |
105 | const char *suffix = NULL; |
106 | int suffix_length = 0; |
107 | const char *normalizer_name = NULL; |
108 | int normalizer_name_length = 0; |
109 | const char *default_open_tag = NULL; |
110 | int default_open_tag_length = 0; |
111 | const char *default_close_tag = NULL; |
112 | int default_close_tag_length = 0; |
113 | int n_args_without_option = nargs; |
114 | |
115 | if (end_arg->header.type == GRN_TABLE_HASH_KEY) { |
116 | grn_obj *options = end_arg; |
117 | grn_hash_cursor *cursor; |
118 | void *key; |
119 | int key_size; |
120 | grn_obj *value; |
121 | |
122 | n_args_without_option--; |
123 | cursor = grn_hash_cursor_open(ctx, (grn_hash *)options, |
124 | NULL, 0, NULL, 0, |
125 | 0, -1, 0); |
126 | if (!cursor) { |
127 | GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, |
128 | "snippet(): couldn't open cursor" ); |
129 | goto exit; |
130 | } |
131 | while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { |
132 | grn_hash_cursor_get_key_value(ctx, cursor, |
133 | &key, &key_size, |
134 | (void **)&value); |
135 | if (key_size == 5 && !memcmp(key, "width" , 5)) { |
136 | width = GRN_UINT32_VALUE(value); |
137 | } else if (key_size == 13 && !memcmp(key, "max_n_results" , 13)) { |
138 | max_n_results = GRN_UINT32_VALUE(value); |
139 | } else if (key_size == 19 && !memcmp(key, "skip_leading_spaces" , 19)) { |
140 | if (GRN_BOOL_VALUE(value) == GRN_FALSE) { |
141 | flags &= ~GRN_SNIP_SKIP_LEADING_SPACES; |
142 | } |
143 | } else if (key_size == 11 && !memcmp(key, "html_escape" , 11)) { |
144 | if (GRN_BOOL_VALUE(value)) { |
145 | mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; |
146 | } |
147 | } else if (key_size == 6 && !memcmp(key, "prefix" , 6)) { |
148 | prefix = GRN_TEXT_VALUE(value); |
149 | prefix_length = GRN_TEXT_LEN(value); |
150 | } else if (key_size == 6 && !memcmp(key, "suffix" , 6)) { |
151 | suffix = GRN_TEXT_VALUE(value); |
152 | suffix_length = GRN_TEXT_LEN(value); |
153 | } else if (key_size == 10 && !memcmp(key, "normalizer" , 10)) { |
154 | normalizer_name = GRN_TEXT_VALUE(value); |
155 | normalizer_name_length = GRN_TEXT_LEN(value); |
156 | } else if (key_size == 16 && !memcmp(key, "default_open_tag" , 16)) { |
157 | default_open_tag = GRN_TEXT_VALUE(value); |
158 | default_open_tag_length = GRN_TEXT_LEN(value); |
159 | } else if (key_size == 17 && !memcmp(key, "default_close_tag" , 17)) { |
160 | default_close_tag = GRN_TEXT_VALUE(value); |
161 | default_close_tag_length = GRN_TEXT_LEN(value); |
162 | } else { |
163 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
164 | "invalid option name: <%.*s>" , |
165 | key_size, (char *)key); |
166 | grn_hash_cursor_close(ctx, cursor); |
167 | goto exit; |
168 | } |
169 | } |
170 | grn_hash_cursor_close(ctx, cursor); |
171 | } |
172 | |
173 | snip = grn_snip_open(ctx, flags, width, max_n_results, |
174 | default_open_tag, default_open_tag_length, |
175 | default_close_tag, default_close_tag_length, mapping); |
176 | if (snip) { |
177 | grn_rc rc; |
178 | unsigned int i; |
179 | if (!normalizer_name) { |
180 | grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO); |
181 | } else if (normalizer_name_length > 0) { |
182 | grn_obj *normalizer; |
183 | normalizer = grn_ctx_get(ctx, normalizer_name, normalizer_name_length); |
184 | if (!grn_obj_is_normalizer_proc(ctx, normalizer)) { |
185 | grn_obj inspected; |
186 | GRN_TEXT_INIT(&inspected, 0); |
187 | grn_inspect(ctx, &inspected, normalizer); |
188 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
189 | "snippet(): not normalizer: <%.*s>" , |
190 | (int)GRN_TEXT_LEN(&inspected), |
191 | GRN_TEXT_VALUE(&inspected)); |
192 | GRN_OBJ_FIN(ctx, &inspected); |
193 | grn_obj_unlink(ctx, normalizer); |
194 | goto exit; |
195 | } |
196 | grn_snip_set_normalizer(ctx, snip, normalizer); |
197 | grn_obj_unlink(ctx, normalizer); |
198 | } |
199 | if (default_open_tag_length == 0 && default_close_tag_length == 0) { |
200 | unsigned int n_keyword_sets = |
201 | (n_args_without_option - N_REQUIRED_ARGS) / KEYWORD_SET_SIZE; |
202 | grn_obj **keyword_set_args = args + N_REQUIRED_ARGS; |
203 | for (i = 0; i < n_keyword_sets; i++) { |
204 | rc = grn_snip_add_cond(ctx, snip, |
205 | GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE]), |
206 | GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE]), |
207 | GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 1]), |
208 | GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 1]), |
209 | GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 2]), |
210 | GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 2])); |
211 | } |
212 | } else { |
213 | unsigned int n_keywords = n_args_without_option - N_REQUIRED_ARGS; |
214 | grn_obj **keyword_args = args + N_REQUIRED_ARGS; |
215 | for (i = 0; i < n_keywords; i++) { |
216 | rc = grn_snip_add_cond(ctx, snip, |
217 | GRN_TEXT_VALUE(keyword_args[i]), |
218 | GRN_TEXT_LEN(keyword_args[i]), |
219 | NULL, 0, |
220 | NULL, 0); |
221 | } |
222 | } |
223 | snippets = snippet_exec(ctx, snip, text, user_data, |
224 | prefix, prefix_length, |
225 | suffix, suffix_length); |
226 | } |
227 | } |
228 | #undef KEYWORD_SET_SIZE |
229 | #undef N_REQUIRED_ARGS |
230 | |
231 | exit : |
232 | if (!snippets) { |
233 | snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); |
234 | } |
235 | |
236 | return snippets; |
237 | } |
238 | |
239 | void |
240 | grn_proc_init_snippet(grn_ctx *ctx) |
241 | { |
242 | grn_proc_create(ctx, "snippet" , -1, GRN_PROC_FUNCTION, |
243 | func_snippet, NULL, NULL, 0, NULL); |
244 | } |
245 | |
246 | static grn_obj * |
247 | func_snippet_html(grn_ctx *ctx, int nargs, grn_obj **args, |
248 | grn_user_data *user_data) |
249 | { |
250 | grn_obj *snippets = NULL; |
251 | |
252 | /* TODO: support parameters */ |
253 | if (nargs == 1) { |
254 | grn_obj *text = args[0]; |
255 | grn_obj *expression = NULL; |
256 | grn_obj *condition_ptr = NULL; |
257 | grn_obj *condition = NULL; |
258 | grn_obj *snip = NULL; |
259 | int flags = GRN_SNIP_SKIP_LEADING_SPACES; |
260 | unsigned int width = 200; |
261 | unsigned int max_n_results = 3; |
262 | const char *open_tag = "<span class=\"keyword\">" ; |
263 | const char *close_tag = "</span>" ; |
264 | grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; |
265 | |
266 | grn_proc_get_info(ctx, user_data, NULL, NULL, &expression); |
267 | condition_ptr = grn_expr_get_var(ctx, expression, |
268 | GRN_SELECT_INTERNAL_VAR_CONDITION, |
269 | strlen(GRN_SELECT_INTERNAL_VAR_CONDITION)); |
270 | if (condition_ptr) { |
271 | condition = GRN_PTR_VALUE(condition_ptr); |
272 | } |
273 | |
274 | if (condition) { |
275 | grn_obj *snip_ptr; |
276 | snip_ptr = grn_expr_get_var(ctx, expression, |
277 | GRN_FUNC_SNIPPET_HTML_CACHE_NAME, |
278 | strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME)); |
279 | if (snip_ptr) { |
280 | snip = GRN_PTR_VALUE(snip_ptr); |
281 | } else { |
282 | snip_ptr = |
283 | grn_expr_get_or_add_var(ctx, expression, |
284 | GRN_FUNC_SNIPPET_HTML_CACHE_NAME, |
285 | strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME)); |
286 | GRN_OBJ_FIN(ctx, snip_ptr); |
287 | GRN_PTR_INIT(snip_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT); |
288 | |
289 | snip = grn_snip_open(ctx, flags, width, max_n_results, |
290 | open_tag, strlen(open_tag), |
291 | close_tag, strlen(close_tag), |
292 | mapping); |
293 | if (snip) { |
294 | grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO); |
295 | grn_expr_snip_add_conditions(ctx, condition, snip, |
296 | 0, NULL, NULL, NULL, NULL); |
297 | GRN_PTR_SET(ctx, snip_ptr, snip); |
298 | } |
299 | } |
300 | } |
301 | |
302 | if (snip) { |
303 | snippets = snippet_exec(ctx, snip, text, user_data, NULL, 0, NULL, 0); |
304 | } |
305 | } |
306 | |
307 | if (!snippets) { |
308 | snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); |
309 | } |
310 | |
311 | return snippets; |
312 | } |
313 | |
314 | void |
315 | grn_proc_init_snippet_html(grn_ctx *ctx) |
316 | { |
317 | grn_proc_create(ctx, "snippet_html" , -1, GRN_PROC_FUNCTION, |
318 | func_snippet_html, NULL, NULL, 0, NULL); |
319 | } |
320 | |