1 | /* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ |
2 | /* |
3 | Copyright(C) 2017 Kouhei Sutou <kou@clear-code.com> |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | This library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with this library; if not, write to the Free Software |
17 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | */ |
19 | |
20 | #include <mrn_mysql.h> |
21 | #include <mrn_mysql_compat.h> |
22 | #include <mrn_err.h> |
23 | #include <mrn_encoding.hpp> |
24 | #include <mrn_windows.hpp> |
25 | #include <mrn_table.hpp> |
26 | #include <mrn_macro.hpp> |
27 | #include <mrn_database_manager.hpp> |
28 | #include <mrn_context_pool.hpp> |
29 | #include <mrn_variables.hpp> |
30 | #include <mrn_query_parser.hpp> |
31 | #include <mrn_current_thread.hpp> |
32 | |
33 | MRN_BEGIN_DECLS |
34 | |
35 | extern mrn::DatabaseManager *mrn_db_manager; |
36 | extern mrn::ContextPool *mrn_context_pool; |
37 | |
38 | typedef struct st_mrn_highlight_html_info |
39 | { |
40 | grn_ctx *ctx; |
41 | grn_obj *db; |
42 | bool use_shared_db; |
43 | grn_obj *keywords; |
44 | String result_str; |
45 | struct { |
46 | bool used; |
47 | grn_obj *table; |
48 | grn_obj *default_column; |
49 | } query_mode; |
50 | } mrn_highlight_html_info; |
51 | |
52 | static my_bool mrn_highlight_html_prepare(mrn_highlight_html_info *info, |
53 | UDF_ARGS *args, |
54 | char *message, |
55 | grn_obj **keywords) |
56 | { |
57 | MRN_DBUG_ENTER_FUNCTION(); |
58 | |
59 | grn_ctx *ctx = info->ctx; |
60 | const char *normalizer_name = "NormalizerAuto" ; |
61 | grn_obj *expr = NULL; |
62 | String *result_str = &(info->result_str); |
63 | |
64 | *keywords = NULL; |
65 | |
66 | mrn::encoding::set_raw(ctx, system_charset_info); |
67 | if (system_charset_info->state & (MY_CS_BINSORT | MY_CS_CSSORT)) { |
68 | normalizer_name = NULL; |
69 | } |
70 | |
71 | *keywords = grn_table_create(ctx, NULL, 0, NULL, |
72 | GRN_OBJ_TABLE_PAT_KEY, |
73 | grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), |
74 | NULL); |
75 | if (ctx->rc != GRN_SUCCESS) { |
76 | if (message) { |
77 | snprintf(message, MYSQL_ERRMSG_SIZE, |
78 | "mroonga_highlight_html(): " |
79 | "failed to create grn_pat for keywords: <%s>" , |
80 | ctx->errbuf); |
81 | } |
82 | goto error; |
83 | } |
84 | if (normalizer_name) { |
85 | grn_obj_set_info(ctx, |
86 | *keywords, |
87 | GRN_INFO_NORMALIZER, |
88 | grn_ctx_get(ctx, normalizer_name, -1)); |
89 | } |
90 | |
91 | if (info->query_mode.used) { |
92 | if (!info->query_mode.table) { |
93 | grn_obj *short_text; |
94 | short_text = grn_ctx_at(info->ctx, GRN_DB_SHORT_TEXT); |
95 | info->query_mode.table = grn_table_create(info->ctx, |
96 | NULL, 0, NULL, |
97 | GRN_TABLE_HASH_KEY, |
98 | short_text, |
99 | NULL); |
100 | } |
101 | if (!info->query_mode.default_column) { |
102 | info->query_mode.default_column = |
103 | grn_obj_column(info->ctx, |
104 | info->query_mode.table, |
105 | GRN_COLUMN_NAME_KEY, |
106 | GRN_COLUMN_NAME_KEY_LEN); |
107 | } |
108 | |
109 | grn_obj *record = NULL; |
110 | GRN_EXPR_CREATE_FOR_QUERY(info->ctx, info->query_mode.table, expr, record); |
111 | if (!expr) { |
112 | if (message) { |
113 | snprintf(message, MYSQL_ERRMSG_SIZE, |
114 | "mroonga_highlight_html(): " |
115 | "failed to create expression: <%s>" , |
116 | ctx->errbuf); |
117 | } |
118 | goto error; |
119 | } |
120 | |
121 | mrn::QueryParser query_parser(info->ctx, |
122 | current_thd, |
123 | expr, |
124 | info->query_mode.default_column, |
125 | 0, |
126 | NULL); |
127 | grn_rc rc = query_parser.parse(args->args[1], args->lengths[1]); |
128 | if (rc != GRN_SUCCESS) { |
129 | if (message) { |
130 | snprintf(message, MYSQL_ERRMSG_SIZE, |
131 | "mroonga_highlight_html(): " |
132 | "failed to parse query: <%s>" , |
133 | ctx->errbuf); |
134 | } |
135 | goto error; |
136 | } |
137 | |
138 | { |
139 | grn_obj ; |
140 | GRN_PTR_INIT(&extracted_keywords, GRN_OBJ_VECTOR, GRN_ID_NIL); |
141 | grn_expr_get_keywords(ctx, expr, &extracted_keywords); |
142 | |
143 | size_t n_keywords = |
144 | GRN_BULK_VSIZE(&extracted_keywords) / sizeof(grn_obj *); |
145 | for (size_t i = 0; i < n_keywords; ++i) { |
146 | grn_obj * = GRN_PTR_VALUE_AT(&extracted_keywords, i); |
147 | grn_table_add(ctx, |
148 | *keywords, |
149 | GRN_TEXT_VALUE(extracted_keyword), |
150 | GRN_TEXT_LEN(extracted_keyword), |
151 | NULL); |
152 | if (ctx->rc != GRN_SUCCESS) { |
153 | if (message) { |
154 | snprintf(message, MYSQL_ERRMSG_SIZE, |
155 | "mroonga_highlight_html(): " |
156 | "failed to add a keyword: <%.*s>: <%s>" , |
157 | static_cast<int>(GRN_TEXT_LEN(extracted_keyword)), |
158 | GRN_TEXT_VALUE(extracted_keyword), |
159 | ctx->errbuf); |
160 | GRN_OBJ_FIN(ctx, &extracted_keywords); |
161 | } |
162 | goto error; |
163 | } |
164 | } |
165 | GRN_OBJ_FIN(ctx, &extracted_keywords); |
166 | } |
167 | } else { |
168 | for (unsigned int i = 1; i < args->arg_count; ++i) { |
169 | if (!args->args[i]) { |
170 | continue; |
171 | } |
172 | grn_table_add(ctx, |
173 | *keywords, |
174 | args->args[i], |
175 | args->lengths[i], |
176 | NULL); |
177 | if (ctx->rc != GRN_SUCCESS) { |
178 | if (message) { |
179 | snprintf(message, MYSQL_ERRMSG_SIZE, |
180 | "mroonga_highlight_html(): " |
181 | "failed to add a keyword: <%.*s>: <%s>" , |
182 | static_cast<int>(args->lengths[i]), |
183 | args->args[i], |
184 | ctx->errbuf); |
185 | } |
186 | goto error; |
187 | } |
188 | } |
189 | } |
190 | |
191 | result_str->set_charset(system_charset_info); |
192 | DBUG_RETURN(FALSE); |
193 | |
194 | error: |
195 | if (expr) { |
196 | grn_obj_close(ctx, expr); |
197 | } |
198 | if (*keywords) { |
199 | grn_obj_close(ctx, *keywords); |
200 | } |
201 | DBUG_RETURN(TRUE); |
202 | } |
203 | |
204 | MRN_API my_bool mroonga_highlight_html_init(UDF_INIT *init, |
205 | UDF_ARGS *args, |
206 | char *message) |
207 | { |
208 | MRN_DBUG_ENTER_FUNCTION(); |
209 | |
210 | mrn_highlight_html_info *info = NULL; |
211 | |
212 | init->ptr = NULL; |
213 | |
214 | if (args->arg_count < 1) { |
215 | snprintf(message, MYSQL_ERRMSG_SIZE, |
216 | "mroonga_highlight_html(): wrong number of arguments: %u for 1+" , |
217 | args->arg_count); |
218 | goto error; |
219 | } |
220 | |
221 | |
222 | for (unsigned int i = 0; i < args->arg_count; ++i) { |
223 | switch (args->arg_type[i]) { |
224 | case STRING_RESULT: |
225 | /* OK */ |
226 | break; |
227 | case REAL_RESULT: |
228 | snprintf(message, MYSQL_ERRMSG_SIZE, |
229 | "mroonga_highlight_html(): all arguments must be string: " |
230 | "<%u>=<%g>" , |
231 | i, *((double *)(args->args[i]))); |
232 | goto error; |
233 | break; |
234 | case INT_RESULT: |
235 | snprintf(message, MYSQL_ERRMSG_SIZE, |
236 | "mroonga_highlight_html(): all arguments must be string: " |
237 | "<%u>=<%lld>" , |
238 | i, *((longlong *)(args->args[i]))); |
239 | goto error; |
240 | break; |
241 | default: |
242 | snprintf(message, MYSQL_ERRMSG_SIZE, |
243 | "mroonga_highlight_html(): all arguments must be string: <%u>" , |
244 | i); |
245 | goto error; |
246 | break; |
247 | } |
248 | } |
249 | |
250 | init->maybe_null = 0; |
251 | |
252 | info = |
253 | reinterpret_cast<mrn_highlight_html_info *>( |
254 | mrn_my_malloc(sizeof(mrn_highlight_html_info), |
255 | MYF(MY_WME | MY_ZEROFILL))); |
256 | if (!info) { |
257 | snprintf(message, MYSQL_ERRMSG_SIZE, |
258 | "mroonga_highlight_html(): failed to allocate memory" ); |
259 | goto error; |
260 | } |
261 | |
262 | info->ctx = mrn_context_pool->pull(); |
263 | { |
264 | const char *current_db_path = MRN_THD_DB_PATH(current_thd); |
265 | const char *action; |
266 | if (current_db_path) { |
267 | action = "open database" ; |
268 | mrn::Database *db; |
269 | int error = mrn_db_manager->open(current_db_path, &db); |
270 | if (error == 0) { |
271 | info->db = db->get(); |
272 | grn_ctx_use(info->ctx, info->db); |
273 | info->use_shared_db = true; |
274 | } |
275 | } else { |
276 | action = "create anonymous database" ; |
277 | info->db = grn_db_create(info->ctx, NULL, NULL); |
278 | info->use_shared_db = false; |
279 | } |
280 | if (!info->db) { |
281 | sprintf(message, |
282 | "mroonga_highlight_html(): failed to %s: %s" , |
283 | action, |
284 | info->ctx->errbuf); |
285 | goto error; |
286 | } |
287 | } |
288 | |
289 | info->query_mode.used = FALSE; |
290 | |
291 | if (args->arg_count == 2 && |
292 | args->attribute_lengths[1] == strlen("query" ) && |
293 | strncmp(args->attributes[1], "query" , strlen("query" )) == 0) { |
294 | info->query_mode.used = TRUE; |
295 | info->query_mode.table = NULL; |
296 | info->query_mode.default_column = NULL; |
297 | } |
298 | |
299 | { |
300 | bool all_keywords_are_constant = TRUE; |
301 | for (unsigned int i = 1; i < args->arg_count; ++i) { |
302 | if (!args->args[i]) { |
303 | all_keywords_are_constant = FALSE; |
304 | break; |
305 | } |
306 | } |
307 | |
308 | if (all_keywords_are_constant) { |
309 | if (mrn_highlight_html_prepare(info, args, message, &(info->keywords))) { |
310 | goto error; |
311 | } |
312 | } else { |
313 | info->keywords = NULL; |
314 | } |
315 | } |
316 | |
317 | init->ptr = (char *)info; |
318 | |
319 | DBUG_RETURN(FALSE); |
320 | |
321 | error: |
322 | if (info) { |
323 | if (!info->use_shared_db) { |
324 | grn_obj_close(info->ctx, info->db); |
325 | } |
326 | mrn_context_pool->release(info->ctx); |
327 | my_free(info); |
328 | } |
329 | DBUG_RETURN(TRUE); |
330 | } |
331 | |
332 | static bool highlight_html(grn_ctx *ctx, |
333 | grn_pat *keywords, |
334 | const char *target, |
335 | size_t target_length, |
336 | String *output) |
337 | { |
338 | MRN_DBUG_ENTER_FUNCTION(); |
339 | |
340 | grn_obj buffer; |
341 | |
342 | GRN_TEXT_INIT(&buffer, 0); |
343 | |
344 | { |
345 | const char *open_tag = "<span class=\"keyword\">" ; |
346 | size_t open_tag_length = strlen(open_tag); |
347 | const char *close_tag = "</span>" ; |
348 | size_t close_tag_length = strlen(close_tag); |
349 | |
350 | while (target_length > 0) { |
351 | #define MAX_N_HITS 16 |
352 | grn_pat_scan_hit hits[MAX_N_HITS]; |
353 | const char *rest; |
354 | size_t previous = 0; |
355 | size_t chunk_length; |
356 | |
357 | int n_hits = grn_pat_scan(ctx, |
358 | keywords, |
359 | target, |
360 | target_length, |
361 | hits, MAX_N_HITS, &rest); |
362 | for (int i = 0; i < n_hits; i++) { |
363 | if ((hits[i].offset - previous) > 0) { |
364 | grn_text_escape_xml(ctx, |
365 | &buffer, |
366 | target + previous, |
367 | hits[i].offset - previous); |
368 | } |
369 | GRN_TEXT_PUT(ctx, &buffer, open_tag, open_tag_length); |
370 | grn_text_escape_xml(ctx, |
371 | &buffer, |
372 | target + hits[i].offset, |
373 | hits[i].length); |
374 | GRN_TEXT_PUT(ctx, &buffer, close_tag, close_tag_length); |
375 | previous = hits[i].offset + hits[i].length; |
376 | } |
377 | |
378 | chunk_length = rest - target; |
379 | if ((chunk_length - previous) > 0) { |
380 | grn_text_escape_xml(ctx, |
381 | &buffer, |
382 | target + previous, |
383 | target_length - previous); |
384 | } |
385 | target_length -= chunk_length; |
386 | target = rest; |
387 | #undef MAX_N_HITS |
388 | } |
389 | } |
390 | |
391 | if (output->reserve(GRN_TEXT_LEN(&buffer))) { |
392 | my_error(ER_OUT_OF_RESOURCES, MYF(0), HA_ERR_OUT_OF_MEM); |
393 | GRN_OBJ_FIN(ctx, &buffer); |
394 | DBUG_RETURN(false); |
395 | } |
396 | |
397 | output->q_append(GRN_TEXT_VALUE(&buffer), GRN_TEXT_LEN(&buffer)); |
398 | GRN_OBJ_FIN(ctx, &buffer); |
399 | DBUG_RETURN(true); |
400 | } |
401 | |
402 | MRN_API char *mroonga_highlight_html(UDF_INIT *init, |
403 | UDF_ARGS *args, |
404 | char *result, |
405 | unsigned long *length, |
406 | char *is_null, |
407 | char *error) |
408 | { |
409 | MRN_DBUG_ENTER_FUNCTION(); |
410 | |
411 | mrn_highlight_html_info *info = |
412 | reinterpret_cast<mrn_highlight_html_info *>(init->ptr); |
413 | |
414 | grn_ctx *ctx = info->ctx; |
415 | grn_obj *keywords = info->keywords; |
416 | String *result_str = &(info->result_str); |
417 | |
418 | if (!args->args[0]) { |
419 | *is_null = 1; |
420 | DBUG_RETURN(NULL); |
421 | } |
422 | |
423 | if (!keywords) { |
424 | if (mrn_highlight_html_prepare(info, args, NULL, &keywords)) { |
425 | goto error; |
426 | } |
427 | } |
428 | |
429 | *is_null = 0; |
430 | result_str->length(0); |
431 | |
432 | if (!highlight_html(ctx, |
433 | reinterpret_cast<grn_pat *>(keywords), |
434 | args->args[0], |
435 | args->lengths[0], |
436 | result_str)) { |
437 | goto error; |
438 | } |
439 | |
440 | if (!info->keywords) { |
441 | grn_rc rc = grn_obj_close(ctx, keywords); |
442 | if (rc != GRN_SUCCESS) { |
443 | my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, |
444 | ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); |
445 | goto error; |
446 | } |
447 | } |
448 | |
449 | *length = result_str->length(); |
450 | DBUG_RETURN((char *)result_str->ptr()); |
451 | |
452 | error: |
453 | if (!info->keywords && keywords) { |
454 | grn_obj_close(ctx, keywords); |
455 | } |
456 | |
457 | *is_null = 1; |
458 | *error = 1; |
459 | |
460 | DBUG_RETURN(NULL); |
461 | } |
462 | |
463 | MRN_API void mroonga_highlight_html_deinit(UDF_INIT *init) |
464 | { |
465 | MRN_DBUG_ENTER_FUNCTION(); |
466 | |
467 | mrn_highlight_html_info *info = |
468 | reinterpret_cast<mrn_highlight_html_info *>(init->ptr); |
469 | if (!info) { |
470 | DBUG_VOID_RETURN; |
471 | } |
472 | |
473 | if (info->keywords) { |
474 | grn_obj_close(info->ctx, info->keywords); |
475 | } |
476 | if (info->query_mode.used) { |
477 | if (info->query_mode.default_column) { |
478 | grn_obj_close(info->ctx, info->query_mode.default_column); |
479 | } |
480 | if (info->query_mode.table) { |
481 | grn_obj_close(info->ctx, info->query_mode.table); |
482 | } |
483 | } |
484 | MRN_STRING_FREE(info->result_str); |
485 | if (!info->use_shared_db) { |
486 | grn_obj_close(info->ctx, info->db); |
487 | } |
488 | mrn_context_pool->release(info->ctx); |
489 | my_free(info); |
490 | |
491 | DBUG_VOID_RETURN; |
492 | } |
493 | |
494 | MRN_END_DECLS |
495 | |