1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2009-2016 Brazil |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License version 2.1 as published by the Free Software Foundation. |
8 | |
9 | This library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with this library; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | #include "../grn_proc.h" |
20 | #include "../grn_expr.h" |
21 | |
22 | #include <groonga/plugin.h> |
23 | #include <string.h> |
24 | |
25 | #define GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME "$highlight_html" |
26 | |
27 | static void |
28 | grn_pat_tag_keys_put_original_text(grn_ctx *ctx, grn_obj *output, |
29 | const char *text, unsigned int length, |
30 | grn_bool use_html_escape) |
31 | { |
32 | if (use_html_escape) { |
33 | grn_text_escape_xml(ctx, output, text, length); |
34 | } else { |
35 | GRN_TEXT_PUT(ctx, output, text, length); |
36 | } |
37 | } |
38 | |
39 | static grn_rc |
40 | grn_pat_tag_keys(grn_ctx *ctx, grn_obj *keywords, |
41 | const char *string, unsigned int string_length, |
42 | const char **open_tags, unsigned int *open_tag_lengths, |
43 | const char **close_tags, unsigned int *close_tag_lengths, |
44 | unsigned int n_tags, |
45 | grn_obj *highlighted, |
46 | grn_bool use_html_escape) |
47 | { |
48 | while (string_length > 0) { |
49 | #define MAX_N_HITS 16 |
50 | grn_pat_scan_hit hits[MAX_N_HITS]; |
51 | const char *rest; |
52 | unsigned int i, n_hits; |
53 | unsigned int previous = 0; |
54 | size_t chunk_length; |
55 | |
56 | n_hits = grn_pat_scan(ctx, (grn_pat *)keywords, |
57 | string, string_length, |
58 | hits, MAX_N_HITS, &rest); |
59 | for (i = 0; i < n_hits; i++) { |
60 | unsigned int nth_tag; |
61 | if (hits[i].offset - previous > 0) { |
62 | grn_pat_tag_keys_put_original_text(ctx, |
63 | highlighted, |
64 | string + previous, |
65 | hits[i].offset - previous, |
66 | use_html_escape); |
67 | } |
68 | nth_tag = ((hits[i].id - 1) % n_tags); |
69 | GRN_TEXT_PUT(ctx, highlighted, |
70 | open_tags[nth_tag], open_tag_lengths[nth_tag]); |
71 | grn_pat_tag_keys_put_original_text(ctx, |
72 | highlighted, |
73 | string + hits[i].offset, |
74 | hits[i].length, |
75 | use_html_escape); |
76 | GRN_TEXT_PUT(ctx, highlighted, |
77 | close_tags[nth_tag], close_tag_lengths[nth_tag]); |
78 | previous = hits[i].offset + hits[i].length; |
79 | } |
80 | |
81 | chunk_length = rest - string; |
82 | if (chunk_length - previous > 0) { |
83 | grn_pat_tag_keys_put_original_text(ctx, |
84 | highlighted, |
85 | string + previous, |
86 | string_length - previous, |
87 | use_html_escape); |
88 | } |
89 | string_length -= chunk_length; |
90 | string = rest; |
91 | #undef MAX_N_HITS |
92 | } |
93 | |
94 | return GRN_SUCCESS; |
95 | } |
96 | |
97 | static grn_obj * |
98 | func_highlight_create_keywords_table(grn_ctx *ctx, |
99 | grn_user_data *user_data, |
100 | const char *normalizer_name, |
101 | unsigned int normalizer_name_length) |
102 | { |
103 | grn_obj *keywords; |
104 | |
105 | keywords = grn_table_create(ctx, NULL, 0, NULL, |
106 | GRN_OBJ_TABLE_PAT_KEY, |
107 | grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), |
108 | NULL); |
109 | |
110 | if (normalizer_name_length > 0) { |
111 | grn_obj *normalizer; |
112 | normalizer = grn_ctx_get(ctx, |
113 | normalizer_name, |
114 | normalizer_name_length); |
115 | if (!grn_obj_is_normalizer_proc(ctx, normalizer)) { |
116 | grn_obj inspected; |
117 | GRN_TEXT_INIT(&inspected, 0); |
118 | grn_inspect(ctx, &inspected, normalizer); |
119 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
120 | "highlight_full() not normalizer: <%.*s>" , |
121 | (int)GRN_TEXT_LEN(&inspected), |
122 | GRN_TEXT_VALUE(&inspected)); |
123 | GRN_OBJ_FIN(ctx, &inspected); |
124 | grn_obj_unlink(ctx, normalizer); |
125 | grn_obj_unlink(ctx, keywords); |
126 | return NULL; |
127 | } |
128 | grn_obj_set_info(ctx, keywords, GRN_INFO_NORMALIZER, normalizer); |
129 | grn_obj_unlink(ctx, normalizer); |
130 | } |
131 | |
132 | return keywords; |
133 | } |
134 | |
135 | static grn_obj * |
136 | highlight_keyword_sets(grn_ctx *ctx, grn_user_data *user_data, |
137 | grn_obj **keyword_set_args, unsigned int n_keyword_args, |
138 | grn_obj *string, grn_obj *keywords, |
139 | grn_bool use_html_escape) |
140 | { |
141 | grn_obj *highlighted = NULL; |
142 | #define KEYWORD_SET_SIZE 3 |
143 | { |
144 | unsigned int i; |
145 | unsigned int n_keyword_sets; |
146 | grn_obj open_tags; |
147 | grn_obj open_tag_lengths; |
148 | grn_obj close_tags; |
149 | grn_obj close_tag_lengths; |
150 | |
151 | n_keyword_sets = n_keyword_args / KEYWORD_SET_SIZE; |
152 | |
153 | GRN_OBJ_INIT(&open_tags, GRN_BULK, 0, GRN_DB_VOID); |
154 | GRN_OBJ_INIT(&open_tag_lengths, GRN_BULK, 0, GRN_DB_VOID); |
155 | GRN_OBJ_INIT(&close_tags, GRN_BULK, 0, GRN_DB_VOID); |
156 | GRN_OBJ_INIT(&close_tag_lengths, GRN_BULK, 0, GRN_DB_VOID); |
157 | |
158 | for (i = 0; i < n_keyword_sets; i++) { |
159 | grn_obj *keyword = keyword_set_args[i * KEYWORD_SET_SIZE + 0]; |
160 | grn_obj *open_tag = keyword_set_args[i * KEYWORD_SET_SIZE + 1]; |
161 | grn_obj *close_tag = keyword_set_args[i * KEYWORD_SET_SIZE + 2]; |
162 | |
163 | grn_table_add(ctx, keywords, |
164 | GRN_TEXT_VALUE(keyword), |
165 | GRN_TEXT_LEN(keyword), |
166 | NULL); |
167 | { |
168 | const char *open_tag_content = GRN_TEXT_VALUE(open_tag); |
169 | grn_bulk_write(ctx, &open_tags, |
170 | (const char *)(&open_tag_content), |
171 | sizeof(char *)); |
172 | } |
173 | { |
174 | unsigned int open_tag_length = GRN_TEXT_LEN(open_tag); |
175 | grn_bulk_write(ctx, &open_tag_lengths, |
176 | (const char *)(&open_tag_length), |
177 | sizeof(unsigned int)); |
178 | } |
179 | { |
180 | const char *close_tag_content = GRN_TEXT_VALUE(close_tag); |
181 | grn_bulk_write(ctx, &close_tags, |
182 | (const char *)(&close_tag_content), |
183 | sizeof(char *)); |
184 | } |
185 | { |
186 | unsigned int close_tag_length = GRN_TEXT_LEN(close_tag); |
187 | grn_bulk_write(ctx, &close_tag_lengths, |
188 | (const char *)(&close_tag_length), |
189 | sizeof(unsigned int)); |
190 | } |
191 | } |
192 | |
193 | highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0); |
194 | grn_pat_tag_keys(ctx, keywords, |
195 | GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), |
196 | (const char **)GRN_BULK_HEAD(&open_tags), |
197 | (unsigned int *)GRN_BULK_HEAD(&open_tag_lengths), |
198 | (const char **)GRN_BULK_HEAD(&close_tags), |
199 | (unsigned int *)GRN_BULK_HEAD(&close_tag_lengths), |
200 | n_keyword_sets, |
201 | highlighted, |
202 | use_html_escape); |
203 | grn_obj_unlink(ctx, &open_tags); |
204 | grn_obj_unlink(ctx, &open_tag_lengths); |
205 | grn_obj_unlink(ctx, &close_tags); |
206 | grn_obj_unlink(ctx, &close_tag_lengths); |
207 | } |
208 | #undef KEYWORD_SET_SIZE |
209 | return highlighted; |
210 | } |
211 | |
212 | static grn_obj * |
213 | highlight_keywords(grn_ctx *ctx, grn_user_data *user_data, |
214 | grn_obj *string, grn_obj *keywords, grn_bool use_html_escape, |
215 | const char *default_open_tag, unsigned int default_open_tag_length, |
216 | const char *default_close_tag, unsigned int default_close_tag_length) |
217 | { |
218 | grn_obj *highlighted = NULL; |
219 | const char *open_tags[1]; |
220 | unsigned int open_tag_lengths[1]; |
221 | const char *close_tags[1]; |
222 | unsigned int close_tag_lengths[1]; |
223 | unsigned int n_keyword_sets = 1; |
224 | |
225 | open_tags[0] = default_open_tag; |
226 | open_tag_lengths[0] = default_open_tag_length; |
227 | close_tags[0] = default_close_tag; |
228 | close_tag_lengths[0] = default_close_tag_length; |
229 | |
230 | highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0); |
231 | grn_pat_tag_keys(ctx, keywords, |
232 | GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), |
233 | open_tags, |
234 | open_tag_lengths, |
235 | close_tags, |
236 | close_tag_lengths, |
237 | n_keyword_sets, |
238 | highlighted, |
239 | use_html_escape); |
240 | |
241 | return highlighted; |
242 | } |
243 | |
244 | static grn_obj * |
245 | func_highlight(grn_ctx *ctx, int nargs, grn_obj **args, |
246 | grn_user_data *user_data) |
247 | { |
248 | grn_obj *highlighted = NULL; |
249 | |
250 | #define N_REQUIRED_ARGS 1 |
251 | if (nargs > N_REQUIRED_ARGS) { |
252 | grn_obj *string = args[0]; |
253 | grn_bool use_html_escape = GRN_FALSE; |
254 | grn_obj *keywords; |
255 | const char *normalizer_name = "NormalizerAuto" ; |
256 | unsigned int normalizer_name_length = 14; |
257 | const char *default_open_tag = NULL; |
258 | unsigned int default_open_tag_length = 0; |
259 | const char *default_close_tag = NULL; |
260 | unsigned int default_close_tag_length = 0; |
261 | grn_obj *end_arg = args[nargs - 1]; |
262 | int n_args_without_option = nargs; |
263 | |
264 | if (end_arg->header.type == GRN_TABLE_HASH_KEY) { |
265 | grn_obj *options = end_arg; |
266 | grn_hash_cursor *cursor; |
267 | void *key; |
268 | grn_obj *value; |
269 | int key_size; |
270 | |
271 | n_args_without_option--; |
272 | cursor = grn_hash_cursor_open(ctx, (grn_hash *)options, |
273 | NULL, 0, NULL, 0, |
274 | 0, -1, 0); |
275 | if (!cursor) { |
276 | GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, |
277 | "highlight(): couldn't open cursor" ); |
278 | goto exit; |
279 | } |
280 | while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { |
281 | grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size, |
282 | (void **)&value); |
283 | if (key_size == 10 && !memcmp(key, "normalizer" , 10)) { |
284 | normalizer_name = GRN_TEXT_VALUE(value); |
285 | normalizer_name_length = GRN_TEXT_LEN(value); |
286 | } else if (key_size == 11 && !memcmp(key, "html_escape" , 11)) { |
287 | if (GRN_BOOL_VALUE(value)) { |
288 | use_html_escape = GRN_TRUE; |
289 | } |
290 | } else if (key_size == 16 && !memcmp(key, "default_open_tag" , 16)) { |
291 | default_open_tag = GRN_TEXT_VALUE(value); |
292 | default_open_tag_length = GRN_TEXT_LEN(value); |
293 | } else if (key_size == 17 && !memcmp(key, "default_close_tag" , 17)) { |
294 | default_close_tag = GRN_TEXT_VALUE(value); |
295 | default_close_tag_length = GRN_TEXT_LEN(value); |
296 | } else { |
297 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: <%.*s>" , |
298 | key_size, (char *)key); |
299 | grn_hash_cursor_close(ctx, cursor); |
300 | goto exit; |
301 | } |
302 | } |
303 | grn_hash_cursor_close(ctx, cursor); |
304 | } |
305 | |
306 | keywords = |
307 | func_highlight_create_keywords_table(ctx, user_data, |
308 | normalizer_name, |
309 | normalizer_name_length); |
310 | |
311 | if (keywords) { |
312 | grn_obj **keyword_args = args + N_REQUIRED_ARGS; |
313 | unsigned int n_keyword_args = n_args_without_option - N_REQUIRED_ARGS; |
314 | if (default_open_tag_length == 0 && default_close_tag_length == 0) { |
315 | highlighted = highlight_keyword_sets(ctx, user_data, |
316 | keyword_args, n_keyword_args, |
317 | string, keywords, use_html_escape); |
318 | } else { |
319 | unsigned int i; |
320 | for (i = 0; i < n_keyword_args; i++) { |
321 | grn_table_add(ctx, keywords, |
322 | GRN_TEXT_VALUE(keyword_args[i]), |
323 | GRN_TEXT_LEN(keyword_args[i]), |
324 | NULL); |
325 | } |
326 | highlighted = highlight_keywords(ctx, user_data, |
327 | string, keywords, use_html_escape, |
328 | default_open_tag, default_open_tag_length, |
329 | default_close_tag, default_close_tag_length); |
330 | } |
331 | } |
332 | } |
333 | #undef N_REQUIRED_ARGS |
334 | |
335 | exit : |
336 | if (!highlighted) { |
337 | highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); |
338 | } |
339 | |
340 | return highlighted; |
341 | } |
342 | |
343 | void |
344 | grn_proc_init_highlight(grn_ctx *ctx) |
345 | { |
346 | grn_proc_create(ctx, "highlight" , -1, GRN_PROC_FUNCTION, |
347 | func_highlight, NULL, NULL, 0, NULL); |
348 | } |
349 | |
350 | static grn_obj * |
351 | func_highlight_full(grn_ctx *ctx, int nargs, grn_obj **args, |
352 | grn_user_data *user_data) |
353 | { |
354 | grn_obj *highlighted = NULL; |
355 | |
356 | #define N_REQUIRED_ARGS 3 |
357 | #define KEYWORD_SET_SIZE 3 |
358 | if ((nargs >= (N_REQUIRED_ARGS + KEYWORD_SET_SIZE) && |
359 | (nargs - N_REQUIRED_ARGS) % KEYWORD_SET_SIZE == 0)) { |
360 | grn_obj *string = args[0]; |
361 | grn_obj *keywords; |
362 | const char *normalizer_name = GRN_TEXT_VALUE(args[1]); |
363 | unsigned int normalizer_name_length = GRN_TEXT_LEN(args[1]); |
364 | grn_bool use_html_escape = GRN_BOOL_VALUE(args[2]); |
365 | |
366 | keywords = |
367 | func_highlight_create_keywords_table(ctx, user_data, |
368 | normalizer_name, |
369 | normalizer_name_length); |
370 | if (keywords) { |
371 | highlighted = highlight_keyword_sets(ctx, user_data, |
372 | args + N_REQUIRED_ARGS, |
373 | nargs - N_REQUIRED_ARGS, |
374 | string, keywords, |
375 | use_html_escape); |
376 | } |
377 | } |
378 | |
379 | if (!highlighted) { |
380 | highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); |
381 | } |
382 | #undef KEYWORD_SET_SIZE |
383 | #undef N_REQUIRED_ARGS |
384 | |
385 | return highlighted; |
386 | } |
387 | |
388 | void |
389 | grn_proc_init_highlight_full(grn_ctx *ctx) |
390 | { |
391 | grn_proc_create(ctx, "highlight_full" , -1, GRN_PROC_FUNCTION, |
392 | func_highlight_full, NULL, NULL, 0, NULL); |
393 | } |
394 | |
395 | static grn_obj * |
396 | func_highlight_html_create_keywords_table(grn_ctx *ctx, grn_obj *expression) |
397 | { |
398 | grn_obj *keywords; |
399 | grn_obj *condition_ptr = NULL; |
400 | grn_obj *condition = NULL; |
401 | |
402 | keywords = grn_table_create(ctx, NULL, 0, NULL, |
403 | GRN_OBJ_TABLE_PAT_KEY, |
404 | grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), |
405 | NULL); |
406 | |
407 | { |
408 | grn_obj *normalizer; |
409 | normalizer = grn_ctx_get(ctx, "NormalizerAuto" , -1); |
410 | grn_obj_set_info(ctx, keywords, GRN_INFO_NORMALIZER, normalizer); |
411 | grn_obj_unlink(ctx, normalizer); |
412 | } |
413 | |
414 | condition_ptr = grn_expr_get_var(ctx, expression, |
415 | GRN_SELECT_INTERNAL_VAR_CONDITION, |
416 | strlen(GRN_SELECT_INTERNAL_VAR_CONDITION)); |
417 | if (condition_ptr) { |
418 | condition = GRN_PTR_VALUE(condition_ptr); |
419 | } |
420 | |
421 | if (condition) { |
422 | size_t i, n_keywords; |
423 | grn_obj current_keywords; |
424 | GRN_TEXT_INIT(¤t_keywords, GRN_OBJ_VECTOR); |
425 | grn_expr_get_keywords(ctx, condition, ¤t_keywords); |
426 | |
427 | n_keywords = grn_vector_size(ctx, ¤t_keywords); |
428 | for (i = 0; i < n_keywords; i++) { |
429 | const char *keyword; |
430 | unsigned int keyword_size; |
431 | keyword_size = grn_vector_get_element(ctx, |
432 | ¤t_keywords, |
433 | i, |
434 | &keyword, |
435 | NULL, |
436 | NULL); |
437 | grn_table_add(ctx, |
438 | keywords, |
439 | keyword, |
440 | keyword_size, |
441 | NULL); |
442 | } |
443 | GRN_OBJ_FIN(ctx, ¤t_keywords); |
444 | } |
445 | |
446 | return keywords; |
447 | } |
448 | |
449 | static grn_obj * |
450 | func_highlight_html(grn_ctx *ctx, int nargs, grn_obj **args, |
451 | grn_user_data *user_data) |
452 | { |
453 | grn_obj *highlighted = NULL; |
454 | |
455 | #define N_REQUIRED_ARGS 1 |
456 | if (nargs == N_REQUIRED_ARGS) { |
457 | grn_obj *string = args[0]; |
458 | grn_obj *expression = NULL; |
459 | grn_obj *keywords; |
460 | grn_obj *keywords_ptr; |
461 | grn_bool use_html_escape = GRN_TRUE; |
462 | |
463 | grn_proc_get_info(ctx, user_data, NULL, NULL, &expression); |
464 | |
465 | keywords_ptr = grn_expr_get_var(ctx, expression, |
466 | GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME, |
467 | strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME)); |
468 | if (keywords_ptr) { |
469 | keywords = GRN_PTR_VALUE(keywords_ptr); |
470 | } else { |
471 | keywords_ptr = |
472 | grn_expr_get_or_add_var(ctx, expression, |
473 | GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME, |
474 | strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME)); |
475 | GRN_OBJ_FIN(ctx, keywords_ptr); |
476 | GRN_PTR_INIT(keywords_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT); |
477 | |
478 | keywords = func_highlight_html_create_keywords_table(ctx, expression); |
479 | GRN_PTR_SET(ctx, keywords_ptr, keywords); |
480 | } |
481 | |
482 | highlighted = highlight_keywords(ctx, user_data, |
483 | string, keywords, use_html_escape, |
484 | "<span class=\"keyword\">" , |
485 | strlen("<span class=\"keyword\">" ), |
486 | "</span>" , |
487 | strlen("</span>" )); |
488 | } |
489 | #undef N_REQUIRED_ARGS |
490 | |
491 | if (!highlighted) { |
492 | highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); |
493 | } |
494 | |
495 | return highlighted; |
496 | } |
497 | |
498 | void |
499 | grn_proc_init_highlight_html(grn_ctx *ctx) |
500 | { |
501 | grn_proc_create(ctx, "highlight_html" , -1, GRN_PROC_FUNCTION, |
502 | func_highlight_html, NULL, NULL, 0, NULL); |
503 | } |
504 | |