1/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */
2/*
3 Copyright(C) 2017 Kouhei Sutou <kou@clear-code.com>
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18*/
19
20#include <mrn_mysql.h>
21#include <mrn_mysql_compat.h>
22#include <mrn_err.h>
23#include <mrn_encoding.hpp>
24#include <mrn_windows.hpp>
25#include <mrn_table.hpp>
26#include <mrn_macro.hpp>
27#include <mrn_database_manager.hpp>
28#include <mrn_context_pool.hpp>
29#include <mrn_variables.hpp>
30#include <mrn_query_parser.hpp>
31#include <mrn_current_thread.hpp>
32
33MRN_BEGIN_DECLS
34
35extern mrn::DatabaseManager *mrn_db_manager;
36extern mrn::ContextPool *mrn_context_pool;
37
38typedef struct st_mrn_highlight_html_info
39{
40 grn_ctx *ctx;
41 grn_obj *db;
42 bool use_shared_db;
43 grn_obj *keywords;
44 String result_str;
45 struct {
46 bool used;
47 grn_obj *table;
48 grn_obj *default_column;
49 } query_mode;
50} mrn_highlight_html_info;
51
52static my_bool mrn_highlight_html_prepare(mrn_highlight_html_info *info,
53 UDF_ARGS *args,
54 char *message,
55 grn_obj **keywords)
56{
57 MRN_DBUG_ENTER_FUNCTION();
58
59 grn_ctx *ctx = info->ctx;
60 const char *normalizer_name = "NormalizerAuto";
61 grn_obj *expr = NULL;
62 String *result_str = &(info->result_str);
63
64 *keywords = NULL;
65
66 mrn::encoding::set_raw(ctx, system_charset_info);
67 if (system_charset_info->state & (MY_CS_BINSORT | MY_CS_CSSORT)) {
68 normalizer_name = NULL;
69 }
70
71 *keywords = grn_table_create(ctx, NULL, 0, NULL,
72 GRN_OBJ_TABLE_PAT_KEY,
73 grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
74 NULL);
75 if (ctx->rc != GRN_SUCCESS) {
76 if (message) {
77 snprintf(message, MYSQL_ERRMSG_SIZE,
78 "mroonga_highlight_html(): "
79 "failed to create grn_pat for keywords: <%s>",
80 ctx->errbuf);
81 }
82 goto error;
83 }
84 if (normalizer_name) {
85 grn_obj_set_info(ctx,
86 *keywords,
87 GRN_INFO_NORMALIZER,
88 grn_ctx_get(ctx, normalizer_name, -1));
89 }
90
91 if (info->query_mode.used) {
92 if (!info->query_mode.table) {
93 grn_obj *short_text;
94 short_text = grn_ctx_at(info->ctx, GRN_DB_SHORT_TEXT);
95 info->query_mode.table = grn_table_create(info->ctx,
96 NULL, 0, NULL,
97 GRN_TABLE_HASH_KEY,
98 short_text,
99 NULL);
100 }
101 if (!info->query_mode.default_column) {
102 info->query_mode.default_column =
103 grn_obj_column(info->ctx,
104 info->query_mode.table,
105 GRN_COLUMN_NAME_KEY,
106 GRN_COLUMN_NAME_KEY_LEN);
107 }
108
109 grn_obj *record = NULL;
110 GRN_EXPR_CREATE_FOR_QUERY(info->ctx, info->query_mode.table, expr, record);
111 if (!expr) {
112 if (message) {
113 snprintf(message, MYSQL_ERRMSG_SIZE,
114 "mroonga_highlight_html(): "
115 "failed to create expression: <%s>",
116 ctx->errbuf);
117 }
118 goto error;
119 }
120
121 mrn::QueryParser query_parser(info->ctx,
122 current_thd,
123 expr,
124 info->query_mode.default_column,
125 0,
126 NULL);
127 grn_rc rc = query_parser.parse(args->args[1], args->lengths[1]);
128 if (rc != GRN_SUCCESS) {
129 if (message) {
130 snprintf(message, MYSQL_ERRMSG_SIZE,
131 "mroonga_highlight_html(): "
132 "failed to parse query: <%s>",
133 ctx->errbuf);
134 }
135 goto error;
136 }
137
138 {
139 grn_obj extracted_keywords;
140 GRN_PTR_INIT(&extracted_keywords, GRN_OBJ_VECTOR, GRN_ID_NIL);
141 grn_expr_get_keywords(ctx, expr, &extracted_keywords);
142
143 size_t n_keywords =
144 GRN_BULK_VSIZE(&extracted_keywords) / sizeof(grn_obj *);
145 for (size_t i = 0; i < n_keywords; ++i) {
146 grn_obj *extracted_keyword = GRN_PTR_VALUE_AT(&extracted_keywords, i);
147 grn_table_add(ctx,
148 *keywords,
149 GRN_TEXT_VALUE(extracted_keyword),
150 GRN_TEXT_LEN(extracted_keyword),
151 NULL);
152 if (ctx->rc != GRN_SUCCESS) {
153 if (message) {
154 snprintf(message, MYSQL_ERRMSG_SIZE,
155 "mroonga_highlight_html(): "
156 "failed to add a keyword: <%.*s>: <%s>",
157 static_cast<int>(GRN_TEXT_LEN(extracted_keyword)),
158 GRN_TEXT_VALUE(extracted_keyword),
159 ctx->errbuf);
160 GRN_OBJ_FIN(ctx, &extracted_keywords);
161 }
162 goto error;
163 }
164 }
165 GRN_OBJ_FIN(ctx, &extracted_keywords);
166 }
167 } else {
168 for (unsigned int i = 1; i < args->arg_count; ++i) {
169 if (!args->args[i]) {
170 continue;
171 }
172 grn_table_add(ctx,
173 *keywords,
174 args->args[i],
175 args->lengths[i],
176 NULL);
177 if (ctx->rc != GRN_SUCCESS) {
178 if (message) {
179 snprintf(message, MYSQL_ERRMSG_SIZE,
180 "mroonga_highlight_html(): "
181 "failed to add a keyword: <%.*s>: <%s>",
182 static_cast<int>(args->lengths[i]),
183 args->args[i],
184 ctx->errbuf);
185 }
186 goto error;
187 }
188 }
189 }
190
191 result_str->set_charset(system_charset_info);
192 DBUG_RETURN(FALSE);
193
194error:
195 if (expr) {
196 grn_obj_close(ctx, expr);
197 }
198 if (*keywords) {
199 grn_obj_close(ctx, *keywords);
200 }
201 DBUG_RETURN(TRUE);
202}
203
204MRN_API my_bool mroonga_highlight_html_init(UDF_INIT *init,
205 UDF_ARGS *args,
206 char *message)
207{
208 MRN_DBUG_ENTER_FUNCTION();
209
210 mrn_highlight_html_info *info = NULL;
211
212 init->ptr = NULL;
213
214 if (args->arg_count < 1) {
215 snprintf(message, MYSQL_ERRMSG_SIZE,
216 "mroonga_highlight_html(): wrong number of arguments: %u for 1+",
217 args->arg_count);
218 goto error;
219 }
220
221
222 for (unsigned int i = 0; i < args->arg_count; ++i) {
223 switch (args->arg_type[i]) {
224 case STRING_RESULT:
225 /* OK */
226 break;
227 case REAL_RESULT:
228 snprintf(message, MYSQL_ERRMSG_SIZE,
229 "mroonga_highlight_html(): all arguments must be string: "
230 "<%u>=<%g>",
231 i, *((double *)(args->args[i])));
232 goto error;
233 break;
234 case INT_RESULT:
235 snprintf(message, MYSQL_ERRMSG_SIZE,
236 "mroonga_highlight_html(): all arguments must be string: "
237 "<%u>=<%lld>",
238 i, *((longlong *)(args->args[i])));
239 goto error;
240 break;
241 default:
242 snprintf(message, MYSQL_ERRMSG_SIZE,
243 "mroonga_highlight_html(): all arguments must be string: <%u>",
244 i);
245 goto error;
246 break;
247 }
248 }
249
250 init->maybe_null = 0;
251
252 info =
253 reinterpret_cast<mrn_highlight_html_info *>(
254 mrn_my_malloc(sizeof(mrn_highlight_html_info),
255 MYF(MY_WME | MY_ZEROFILL)));
256 if (!info) {
257 snprintf(message, MYSQL_ERRMSG_SIZE,
258 "mroonga_highlight_html(): failed to allocate memory");
259 goto error;
260 }
261
262 info->ctx = mrn_context_pool->pull();
263 {
264 const char *current_db_path = MRN_THD_DB_PATH(current_thd);
265 const char *action;
266 if (current_db_path) {
267 action = "open database";
268 mrn::Database *db;
269 int error = mrn_db_manager->open(current_db_path, &db);
270 if (error == 0) {
271 info->db = db->get();
272 grn_ctx_use(info->ctx, info->db);
273 info->use_shared_db = true;
274 }
275 } else {
276 action = "create anonymous database";
277 info->db = grn_db_create(info->ctx, NULL, NULL);
278 info->use_shared_db = false;
279 }
280 if (!info->db) {
281 sprintf(message,
282 "mroonga_highlight_html(): failed to %s: %s",
283 action,
284 info->ctx->errbuf);
285 goto error;
286 }
287 }
288
289 info->query_mode.used = FALSE;
290
291 if (args->arg_count == 2 &&
292 args->attribute_lengths[1] == strlen("query") &&
293 strncmp(args->attributes[1], "query", strlen("query")) == 0) {
294 info->query_mode.used = TRUE;
295 info->query_mode.table = NULL;
296 info->query_mode.default_column = NULL;
297 }
298
299 {
300 bool all_keywords_are_constant = TRUE;
301 for (unsigned int i = 1; i < args->arg_count; ++i) {
302 if (!args->args[i]) {
303 all_keywords_are_constant = FALSE;
304 break;
305 }
306 }
307
308 if (all_keywords_are_constant) {
309 if (mrn_highlight_html_prepare(info, args, message, &(info->keywords))) {
310 goto error;
311 }
312 } else {
313 info->keywords = NULL;
314 }
315 }
316
317 init->ptr = (char *)info;
318
319 DBUG_RETURN(FALSE);
320
321error:
322 if (info) {
323 if (!info->use_shared_db) {
324 grn_obj_close(info->ctx, info->db);
325 }
326 mrn_context_pool->release(info->ctx);
327 my_free(info);
328 }
329 DBUG_RETURN(TRUE);
330}
331
332static bool highlight_html(grn_ctx *ctx,
333 grn_pat *keywords,
334 const char *target,
335 size_t target_length,
336 String *output)
337{
338 MRN_DBUG_ENTER_FUNCTION();
339
340 grn_obj buffer;
341
342 GRN_TEXT_INIT(&buffer, 0);
343
344 {
345 const char *open_tag = "<span class=\"keyword\">";
346 size_t open_tag_length = strlen(open_tag);
347 const char *close_tag = "</span>";
348 size_t close_tag_length = strlen(close_tag);
349
350 while (target_length > 0) {
351#define MAX_N_HITS 16
352 grn_pat_scan_hit hits[MAX_N_HITS];
353 const char *rest;
354 size_t previous = 0;
355 size_t chunk_length;
356
357 int n_hits = grn_pat_scan(ctx,
358 keywords,
359 target,
360 target_length,
361 hits, MAX_N_HITS, &rest);
362 for (int i = 0; i < n_hits; i++) {
363 if ((hits[i].offset - previous) > 0) {
364 grn_text_escape_xml(ctx,
365 &buffer,
366 target + previous,
367 hits[i].offset - previous);
368 }
369 GRN_TEXT_PUT(ctx, &buffer, open_tag, open_tag_length);
370 grn_text_escape_xml(ctx,
371 &buffer,
372 target + hits[i].offset,
373 hits[i].length);
374 GRN_TEXT_PUT(ctx, &buffer, close_tag, close_tag_length);
375 previous = hits[i].offset + hits[i].length;
376 }
377
378 chunk_length = rest - target;
379 if ((chunk_length - previous) > 0) {
380 grn_text_escape_xml(ctx,
381 &buffer,
382 target + previous,
383 target_length - previous);
384 }
385 target_length -= chunk_length;
386 target = rest;
387#undef MAX_N_HITS
388 }
389 }
390
391 if (output->reserve(GRN_TEXT_LEN(&buffer))) {
392 my_error(ER_OUT_OF_RESOURCES, MYF(0), HA_ERR_OUT_OF_MEM);
393 GRN_OBJ_FIN(ctx, &buffer);
394 DBUG_RETURN(false);
395 }
396
397 output->q_append(GRN_TEXT_VALUE(&buffer), GRN_TEXT_LEN(&buffer));
398 GRN_OBJ_FIN(ctx, &buffer);
399 DBUG_RETURN(true);
400}
401
402MRN_API char *mroonga_highlight_html(UDF_INIT *init,
403 UDF_ARGS *args,
404 char *result,
405 unsigned long *length,
406 char *is_null,
407 char *error)
408{
409 MRN_DBUG_ENTER_FUNCTION();
410
411 mrn_highlight_html_info *info =
412 reinterpret_cast<mrn_highlight_html_info *>(init->ptr);
413
414 grn_ctx *ctx = info->ctx;
415 grn_obj *keywords = info->keywords;
416 String *result_str = &(info->result_str);
417
418 if (!args->args[0]) {
419 *is_null = 1;
420 DBUG_RETURN(NULL);
421 }
422
423 if (!keywords) {
424 if (mrn_highlight_html_prepare(info, args, NULL, &keywords)) {
425 goto error;
426 }
427 }
428
429 *is_null = 0;
430 result_str->length(0);
431
432 if (!highlight_html(ctx,
433 reinterpret_cast<grn_pat *>(keywords),
434 args->args[0],
435 args->lengths[0],
436 result_str)) {
437 goto error;
438 }
439
440 if (!info->keywords) {
441 grn_rc rc = grn_obj_close(ctx, keywords);
442 if (rc != GRN_SUCCESS) {
443 my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM,
444 ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf);
445 goto error;
446 }
447 }
448
449 *length = result_str->length();
450 DBUG_RETURN((char *)result_str->ptr());
451
452error:
453 if (!info->keywords && keywords) {
454 grn_obj_close(ctx, keywords);
455 }
456
457 *is_null = 1;
458 *error = 1;
459
460 DBUG_RETURN(NULL);
461}
462
463MRN_API void mroonga_highlight_html_deinit(UDF_INIT *init)
464{
465 MRN_DBUG_ENTER_FUNCTION();
466
467 mrn_highlight_html_info *info =
468 reinterpret_cast<mrn_highlight_html_info *>(init->ptr);
469 if (!info) {
470 DBUG_VOID_RETURN;
471 }
472
473 if (info->keywords) {
474 grn_obj_close(info->ctx, info->keywords);
475 }
476 if (info->query_mode.used) {
477 if (info->query_mode.default_column) {
478 grn_obj_close(info->ctx, info->query_mode.default_column);
479 }
480 if (info->query_mode.table) {
481 grn_obj_close(info->ctx, info->query_mode.table);
482 }
483 }
484 MRN_STRING_FREE(info->result_str);
485 if (!info->use_shared_db) {
486 grn_obj_close(info->ctx, info->db);
487 }
488 mrn_context_pool->release(info->ctx);
489 my_free(info);
490
491 DBUG_VOID_RETURN;
492}
493
494MRN_END_DECLS
495