1/* -*- c-basic-offset: 2 -*- */
2/*
3 Copyright(C) 2009-2016 Brazil
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License version 2.1 as published by the Free Software Foundation.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19#include "../grn_proc.h"
20#include "../grn_expr.h"
21
22#include <groonga/plugin.h>
23#include <string.h>
24
25#define GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME "$highlight_html"
26
27static void
28grn_pat_tag_keys_put_original_text(grn_ctx *ctx, grn_obj *output,
29 const char *text, unsigned int length,
30 grn_bool use_html_escape)
31{
32 if (use_html_escape) {
33 grn_text_escape_xml(ctx, output, text, length);
34 } else {
35 GRN_TEXT_PUT(ctx, output, text, length);
36 }
37}
38
39static grn_rc
40grn_pat_tag_keys(grn_ctx *ctx, grn_obj *keywords,
41 const char *string, unsigned int string_length,
42 const char **open_tags, unsigned int *open_tag_lengths,
43 const char **close_tags, unsigned int *close_tag_lengths,
44 unsigned int n_tags,
45 grn_obj *highlighted,
46 grn_bool use_html_escape)
47{
48 while (string_length > 0) {
49#define MAX_N_HITS 16
50 grn_pat_scan_hit hits[MAX_N_HITS];
51 const char *rest;
52 unsigned int i, n_hits;
53 unsigned int previous = 0;
54 size_t chunk_length;
55
56 n_hits = grn_pat_scan(ctx, (grn_pat *)keywords,
57 string, string_length,
58 hits, MAX_N_HITS, &rest);
59 for (i = 0; i < n_hits; i++) {
60 unsigned int nth_tag;
61 if (hits[i].offset - previous > 0) {
62 grn_pat_tag_keys_put_original_text(ctx,
63 highlighted,
64 string + previous,
65 hits[i].offset - previous,
66 use_html_escape);
67 }
68 nth_tag = ((hits[i].id - 1) % n_tags);
69 GRN_TEXT_PUT(ctx, highlighted,
70 open_tags[nth_tag], open_tag_lengths[nth_tag]);
71 grn_pat_tag_keys_put_original_text(ctx,
72 highlighted,
73 string + hits[i].offset,
74 hits[i].length,
75 use_html_escape);
76 GRN_TEXT_PUT(ctx, highlighted,
77 close_tags[nth_tag], close_tag_lengths[nth_tag]);
78 previous = hits[i].offset + hits[i].length;
79 }
80
81 chunk_length = rest - string;
82 if (chunk_length - previous > 0) {
83 grn_pat_tag_keys_put_original_text(ctx,
84 highlighted,
85 string + previous,
86 string_length - previous,
87 use_html_escape);
88 }
89 string_length -= chunk_length;
90 string = rest;
91#undef MAX_N_HITS
92 }
93
94 return GRN_SUCCESS;
95}
96
97static grn_obj *
98func_highlight_create_keywords_table(grn_ctx *ctx,
99 grn_user_data *user_data,
100 const char *normalizer_name,
101 unsigned int normalizer_name_length)
102{
103 grn_obj *keywords;
104
105 keywords = grn_table_create(ctx, NULL, 0, NULL,
106 GRN_OBJ_TABLE_PAT_KEY,
107 grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
108 NULL);
109
110 if (normalizer_name_length > 0) {
111 grn_obj *normalizer;
112 normalizer = grn_ctx_get(ctx,
113 normalizer_name,
114 normalizer_name_length);
115 if (!grn_obj_is_normalizer_proc(ctx, normalizer)) {
116 grn_obj inspected;
117 GRN_TEXT_INIT(&inspected, 0);
118 grn_inspect(ctx, &inspected, normalizer);
119 GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
120 "highlight_full() not normalizer: <%.*s>",
121 (int)GRN_TEXT_LEN(&inspected),
122 GRN_TEXT_VALUE(&inspected));
123 GRN_OBJ_FIN(ctx, &inspected);
124 grn_obj_unlink(ctx, normalizer);
125 grn_obj_unlink(ctx, keywords);
126 return NULL;
127 }
128 grn_obj_set_info(ctx, keywords, GRN_INFO_NORMALIZER, normalizer);
129 grn_obj_unlink(ctx, normalizer);
130 }
131
132 return keywords;
133}
134
135static grn_obj *
136highlight_keyword_sets(grn_ctx *ctx, grn_user_data *user_data,
137 grn_obj **keyword_set_args, unsigned int n_keyword_args,
138 grn_obj *string, grn_obj *keywords,
139 grn_bool use_html_escape)
140{
141 grn_obj *highlighted = NULL;
142#define KEYWORD_SET_SIZE 3
143 {
144 unsigned int i;
145 unsigned int n_keyword_sets;
146 grn_obj open_tags;
147 grn_obj open_tag_lengths;
148 grn_obj close_tags;
149 grn_obj close_tag_lengths;
150
151 n_keyword_sets = n_keyword_args / KEYWORD_SET_SIZE;
152
153 GRN_OBJ_INIT(&open_tags, GRN_BULK, 0, GRN_DB_VOID);
154 GRN_OBJ_INIT(&open_tag_lengths, GRN_BULK, 0, GRN_DB_VOID);
155 GRN_OBJ_INIT(&close_tags, GRN_BULK, 0, GRN_DB_VOID);
156 GRN_OBJ_INIT(&close_tag_lengths, GRN_BULK, 0, GRN_DB_VOID);
157
158 for (i = 0; i < n_keyword_sets; i++) {
159 grn_obj *keyword = keyword_set_args[i * KEYWORD_SET_SIZE + 0];
160 grn_obj *open_tag = keyword_set_args[i * KEYWORD_SET_SIZE + 1];
161 grn_obj *close_tag = keyword_set_args[i * KEYWORD_SET_SIZE + 2];
162
163 grn_table_add(ctx, keywords,
164 GRN_TEXT_VALUE(keyword),
165 GRN_TEXT_LEN(keyword),
166 NULL);
167 {
168 const char *open_tag_content = GRN_TEXT_VALUE(open_tag);
169 grn_bulk_write(ctx, &open_tags,
170 (const char *)(&open_tag_content),
171 sizeof(char *));
172 }
173 {
174 unsigned int open_tag_length = GRN_TEXT_LEN(open_tag);
175 grn_bulk_write(ctx, &open_tag_lengths,
176 (const char *)(&open_tag_length),
177 sizeof(unsigned int));
178 }
179 {
180 const char *close_tag_content = GRN_TEXT_VALUE(close_tag);
181 grn_bulk_write(ctx, &close_tags,
182 (const char *)(&close_tag_content),
183 sizeof(char *));
184 }
185 {
186 unsigned int close_tag_length = GRN_TEXT_LEN(close_tag);
187 grn_bulk_write(ctx, &close_tag_lengths,
188 (const char *)(&close_tag_length),
189 sizeof(unsigned int));
190 }
191 }
192
193 highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0);
194 grn_pat_tag_keys(ctx, keywords,
195 GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string),
196 (const char **)GRN_BULK_HEAD(&open_tags),
197 (unsigned int *)GRN_BULK_HEAD(&open_tag_lengths),
198 (const char **)GRN_BULK_HEAD(&close_tags),
199 (unsigned int *)GRN_BULK_HEAD(&close_tag_lengths),
200 n_keyword_sets,
201 highlighted,
202 use_html_escape);
203 grn_obj_unlink(ctx, &open_tags);
204 grn_obj_unlink(ctx, &open_tag_lengths);
205 grn_obj_unlink(ctx, &close_tags);
206 grn_obj_unlink(ctx, &close_tag_lengths);
207 }
208#undef KEYWORD_SET_SIZE
209 return highlighted;
210}
211
212static grn_obj *
213highlight_keywords(grn_ctx *ctx, grn_user_data *user_data,
214 grn_obj *string, grn_obj *keywords, grn_bool use_html_escape,
215 const char *default_open_tag, unsigned int default_open_tag_length,
216 const char *default_close_tag, unsigned int default_close_tag_length)
217{
218 grn_obj *highlighted = NULL;
219 const char *open_tags[1];
220 unsigned int open_tag_lengths[1];
221 const char *close_tags[1];
222 unsigned int close_tag_lengths[1];
223 unsigned int n_keyword_sets = 1;
224
225 open_tags[0] = default_open_tag;
226 open_tag_lengths[0] = default_open_tag_length;
227 close_tags[0] = default_close_tag;
228 close_tag_lengths[0] = default_close_tag_length;
229
230 highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_TEXT, 0);
231 grn_pat_tag_keys(ctx, keywords,
232 GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string),
233 open_tags,
234 open_tag_lengths,
235 close_tags,
236 close_tag_lengths,
237 n_keyword_sets,
238 highlighted,
239 use_html_escape);
240
241 return highlighted;
242}
243
244static grn_obj *
245func_highlight(grn_ctx *ctx, int nargs, grn_obj **args,
246 grn_user_data *user_data)
247{
248 grn_obj *highlighted = NULL;
249
250#define N_REQUIRED_ARGS 1
251 if (nargs > N_REQUIRED_ARGS) {
252 grn_obj *string = args[0];
253 grn_bool use_html_escape = GRN_FALSE;
254 grn_obj *keywords;
255 const char *normalizer_name = "NormalizerAuto";
256 unsigned int normalizer_name_length = 14;
257 const char *default_open_tag = NULL;
258 unsigned int default_open_tag_length = 0;
259 const char *default_close_tag = NULL;
260 unsigned int default_close_tag_length = 0;
261 grn_obj *end_arg = args[nargs - 1];
262 int n_args_without_option = nargs;
263
264 if (end_arg->header.type == GRN_TABLE_HASH_KEY) {
265 grn_obj *options = end_arg;
266 grn_hash_cursor *cursor;
267 void *key;
268 grn_obj *value;
269 int key_size;
270
271 n_args_without_option--;
272 cursor = grn_hash_cursor_open(ctx, (grn_hash *)options,
273 NULL, 0, NULL, 0,
274 0, -1, 0);
275 if (!cursor) {
276 GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
277 "highlight(): couldn't open cursor");
278 goto exit;
279 }
280 while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) {
281 grn_hash_cursor_get_key_value(ctx, cursor, &key, &key_size,
282 (void **)&value);
283 if (key_size == 10 && !memcmp(key, "normalizer", 10)) {
284 normalizer_name = GRN_TEXT_VALUE(value);
285 normalizer_name_length = GRN_TEXT_LEN(value);
286 } else if (key_size == 11 && !memcmp(key, "html_escape", 11)) {
287 if (GRN_BOOL_VALUE(value)) {
288 use_html_escape = GRN_TRUE;
289 }
290 } else if (key_size == 16 && !memcmp(key, "default_open_tag", 16)) {
291 default_open_tag = GRN_TEXT_VALUE(value);
292 default_open_tag_length = GRN_TEXT_LEN(value);
293 } else if (key_size == 17 && !memcmp(key, "default_close_tag", 17)) {
294 default_close_tag = GRN_TEXT_VALUE(value);
295 default_close_tag_length = GRN_TEXT_LEN(value);
296 } else {
297 GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, "invalid option name: <%.*s>",
298 key_size, (char *)key);
299 grn_hash_cursor_close(ctx, cursor);
300 goto exit;
301 }
302 }
303 grn_hash_cursor_close(ctx, cursor);
304 }
305
306 keywords =
307 func_highlight_create_keywords_table(ctx, user_data,
308 normalizer_name,
309 normalizer_name_length);
310
311 if (keywords) {
312 grn_obj **keyword_args = args + N_REQUIRED_ARGS;
313 unsigned int n_keyword_args = n_args_without_option - N_REQUIRED_ARGS;
314 if (default_open_tag_length == 0 && default_close_tag_length == 0) {
315 highlighted = highlight_keyword_sets(ctx, user_data,
316 keyword_args, n_keyword_args,
317 string, keywords, use_html_escape);
318 } else {
319 unsigned int i;
320 for (i = 0; i < n_keyword_args; i++) {
321 grn_table_add(ctx, keywords,
322 GRN_TEXT_VALUE(keyword_args[i]),
323 GRN_TEXT_LEN(keyword_args[i]),
324 NULL);
325 }
326 highlighted = highlight_keywords(ctx, user_data,
327 string, keywords, use_html_escape,
328 default_open_tag, default_open_tag_length,
329 default_close_tag, default_close_tag_length);
330 }
331 }
332 }
333#undef N_REQUIRED_ARGS
334
335exit :
336 if (!highlighted) {
337 highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
338 }
339
340 return highlighted;
341}
342
343void
344grn_proc_init_highlight(grn_ctx *ctx)
345{
346 grn_proc_create(ctx, "highlight", -1, GRN_PROC_FUNCTION,
347 func_highlight, NULL, NULL, 0, NULL);
348}
349
350static grn_obj *
351func_highlight_full(grn_ctx *ctx, int nargs, grn_obj **args,
352 grn_user_data *user_data)
353{
354 grn_obj *highlighted = NULL;
355
356#define N_REQUIRED_ARGS 3
357#define KEYWORD_SET_SIZE 3
358 if ((nargs >= (N_REQUIRED_ARGS + KEYWORD_SET_SIZE) &&
359 (nargs - N_REQUIRED_ARGS) % KEYWORD_SET_SIZE == 0)) {
360 grn_obj *string = args[0];
361 grn_obj *keywords;
362 const char *normalizer_name = GRN_TEXT_VALUE(args[1]);
363 unsigned int normalizer_name_length = GRN_TEXT_LEN(args[1]);
364 grn_bool use_html_escape = GRN_BOOL_VALUE(args[2]);
365
366 keywords =
367 func_highlight_create_keywords_table(ctx, user_data,
368 normalizer_name,
369 normalizer_name_length);
370 if (keywords) {
371 highlighted = highlight_keyword_sets(ctx, user_data,
372 args + N_REQUIRED_ARGS,
373 nargs - N_REQUIRED_ARGS,
374 string, keywords,
375 use_html_escape);
376 }
377 }
378
379 if (!highlighted) {
380 highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
381 }
382#undef KEYWORD_SET_SIZE
383#undef N_REQUIRED_ARGS
384
385 return highlighted;
386}
387
388void
389grn_proc_init_highlight_full(grn_ctx *ctx)
390{
391 grn_proc_create(ctx, "highlight_full", -1, GRN_PROC_FUNCTION,
392 func_highlight_full, NULL, NULL, 0, NULL);
393}
394
395static grn_obj *
396func_highlight_html_create_keywords_table(grn_ctx *ctx, grn_obj *expression)
397{
398 grn_obj *keywords;
399 grn_obj *condition_ptr = NULL;
400 grn_obj *condition = NULL;
401
402 keywords = grn_table_create(ctx, NULL, 0, NULL,
403 GRN_OBJ_TABLE_PAT_KEY,
404 grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
405 NULL);
406
407 {
408 grn_obj *normalizer;
409 normalizer = grn_ctx_get(ctx, "NormalizerAuto", -1);
410 grn_obj_set_info(ctx, keywords, GRN_INFO_NORMALIZER, normalizer);
411 grn_obj_unlink(ctx, normalizer);
412 }
413
414 condition_ptr = grn_expr_get_var(ctx, expression,
415 GRN_SELECT_INTERNAL_VAR_CONDITION,
416 strlen(GRN_SELECT_INTERNAL_VAR_CONDITION));
417 if (condition_ptr) {
418 condition = GRN_PTR_VALUE(condition_ptr);
419 }
420
421 if (condition) {
422 size_t i, n_keywords;
423 grn_obj current_keywords;
424 GRN_TEXT_INIT(&current_keywords, GRN_OBJ_VECTOR);
425 grn_expr_get_keywords(ctx, condition, &current_keywords);
426
427 n_keywords = grn_vector_size(ctx, &current_keywords);
428 for (i = 0; i < n_keywords; i++) {
429 const char *keyword;
430 unsigned int keyword_size;
431 keyword_size = grn_vector_get_element(ctx,
432 &current_keywords,
433 i,
434 &keyword,
435 NULL,
436 NULL);
437 grn_table_add(ctx,
438 keywords,
439 keyword,
440 keyword_size,
441 NULL);
442 }
443 GRN_OBJ_FIN(ctx, &current_keywords);
444 }
445
446 return keywords;
447}
448
449static grn_obj *
450func_highlight_html(grn_ctx *ctx, int nargs, grn_obj **args,
451 grn_user_data *user_data)
452{
453 grn_obj *highlighted = NULL;
454
455#define N_REQUIRED_ARGS 1
456 if (nargs == N_REQUIRED_ARGS) {
457 grn_obj *string = args[0];
458 grn_obj *expression = NULL;
459 grn_obj *keywords;
460 grn_obj *keywords_ptr;
461 grn_bool use_html_escape = GRN_TRUE;
462
463 grn_proc_get_info(ctx, user_data, NULL, NULL, &expression);
464
465 keywords_ptr = grn_expr_get_var(ctx, expression,
466 GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME,
467 strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME));
468 if (keywords_ptr) {
469 keywords = GRN_PTR_VALUE(keywords_ptr);
470 } else {
471 keywords_ptr =
472 grn_expr_get_or_add_var(ctx, expression,
473 GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME,
474 strlen(GRN_FUNC_HIGHLIGHT_HTML_CACHE_NAME));
475 GRN_OBJ_FIN(ctx, keywords_ptr);
476 GRN_PTR_INIT(keywords_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT);
477
478 keywords = func_highlight_html_create_keywords_table(ctx, expression);
479 GRN_PTR_SET(ctx, keywords_ptr, keywords);
480 }
481
482 highlighted = highlight_keywords(ctx, user_data,
483 string, keywords, use_html_escape,
484 "<span class=\"keyword\">",
485 strlen("<span class=\"keyword\">"),
486 "</span>",
487 strlen("</span>"));
488 }
489#undef N_REQUIRED_ARGS
490
491 if (!highlighted) {
492 highlighted = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0);
493 }
494
495 return highlighted;
496}
497
498void
499grn_proc_init_highlight_html(grn_ctx *ctx)
500{
501 grn_proc_create(ctx, "highlight_html", -1, GRN_PROC_FUNCTION,
502 func_highlight_html, NULL, NULL, 0, NULL);
503}
504