| 1 | /* -*- c-basic-offset: 2 -*- */ |
| 2 | /* |
| 3 | Copyright(C) 2009-2016 Brazil |
| 4 | |
| 5 | This library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License version 2.1 as published by the Free Software Foundation. |
| 8 | |
| 9 | This library is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | Lesser General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU Lesser General Public |
| 15 | License along with this library; if not, write to the Free Software |
| 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | */ |
| 18 | |
| 19 | #include "../grn_proc.h" |
| 20 | #include "../grn_expr.h" |
| 21 | |
| 22 | #include <groonga/plugin.h> |
| 23 | #include <string.h> |
| 24 | |
| 25 | #define GRN_FUNC_SNIPPET_HTML_CACHE_NAME "$snippet_html" |
| 26 | |
| 27 | static grn_obj * |
| 28 | snippet_exec(grn_ctx *ctx, grn_obj *snip, grn_obj *text, |
| 29 | grn_user_data *user_data, |
| 30 | const char *prefix, int prefix_length, |
| 31 | const char *suffix, int suffix_length) |
| 32 | { |
| 33 | grn_rc rc; |
| 34 | unsigned int i, n_results, max_tagged_length; |
| 35 | grn_obj snippet_buffer; |
| 36 | grn_obj *snippets; |
| 37 | |
| 38 | if (GRN_TEXT_LEN(text) == 0) { |
| 39 | return NULL; |
| 40 | } |
| 41 | |
| 42 | rc = grn_snip_exec(ctx, snip, |
| 43 | GRN_TEXT_VALUE(text), GRN_TEXT_LEN(text), |
| 44 | &n_results, &max_tagged_length); |
| 45 | if (rc != GRN_SUCCESS) { |
| 46 | return NULL; |
| 47 | } |
| 48 | |
| 49 | if (n_results == 0) { |
| 50 | return grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); |
| 51 | } |
| 52 | |
| 53 | snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_SHORT_TEXT, GRN_OBJ_VECTOR); |
| 54 | if (!snippets) { |
| 55 | return NULL; |
| 56 | } |
| 57 | |
| 58 | GRN_TEXT_INIT(&snippet_buffer, 0); |
| 59 | grn_bulk_space(ctx, &snippet_buffer, |
| 60 | prefix_length + max_tagged_length + suffix_length); |
| 61 | for (i = 0; i < n_results; i++) { |
| 62 | unsigned int snippet_length; |
| 63 | |
| 64 | GRN_BULK_REWIND(&snippet_buffer); |
| 65 | if (prefix_length) { |
| 66 | GRN_TEXT_PUT(ctx, &snippet_buffer, prefix, prefix_length); |
| 67 | } |
| 68 | rc = grn_snip_get_result(ctx, snip, i, |
| 69 | GRN_TEXT_VALUE(&snippet_buffer) + prefix_length, |
| 70 | &snippet_length); |
| 71 | if (rc == GRN_SUCCESS) { |
| 72 | grn_strncat(GRN_TEXT_VALUE(&snippet_buffer), |
| 73 | GRN_BULK_WSIZE(&snippet_buffer), |
| 74 | suffix, |
| 75 | suffix_length); |
| 76 | grn_vector_add_element(ctx, snippets, |
| 77 | GRN_TEXT_VALUE(&snippet_buffer), |
| 78 | prefix_length + snippet_length + suffix_length, |
| 79 | 0, GRN_DB_SHORT_TEXT); |
| 80 | } |
| 81 | } |
| 82 | GRN_OBJ_FIN(ctx, &snippet_buffer); |
| 83 | |
| 84 | return snippets; |
| 85 | } |
| 86 | |
| 87 | /* TODO: support caching for the same parameter. */ |
| 88 | static grn_obj * |
| 89 | func_snippet(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) |
| 90 | { |
| 91 | grn_obj *snippets = NULL; |
| 92 | |
| 93 | #define N_REQUIRED_ARGS 1 |
| 94 | #define KEYWORD_SET_SIZE 3 |
| 95 | if (nargs > N_REQUIRED_ARGS) { |
| 96 | grn_obj *text = args[0]; |
| 97 | grn_obj *end_arg = args[nargs - 1]; |
| 98 | grn_obj *snip = NULL; |
| 99 | unsigned int width = 200; |
| 100 | unsigned int max_n_results = 3; |
| 101 | grn_snip_mapping *mapping = NULL; |
| 102 | int flags = GRN_SNIP_SKIP_LEADING_SPACES; |
| 103 | const char *prefix = NULL; |
| 104 | int prefix_length = 0; |
| 105 | const char *suffix = NULL; |
| 106 | int suffix_length = 0; |
| 107 | const char *normalizer_name = NULL; |
| 108 | int normalizer_name_length = 0; |
| 109 | const char *default_open_tag = NULL; |
| 110 | int default_open_tag_length = 0; |
| 111 | const char *default_close_tag = NULL; |
| 112 | int default_close_tag_length = 0; |
| 113 | int n_args_without_option = nargs; |
| 114 | |
| 115 | if (end_arg->header.type == GRN_TABLE_HASH_KEY) { |
| 116 | grn_obj *options = end_arg; |
| 117 | grn_hash_cursor *cursor; |
| 118 | void *key; |
| 119 | int key_size; |
| 120 | grn_obj *value; |
| 121 | |
| 122 | n_args_without_option--; |
| 123 | cursor = grn_hash_cursor_open(ctx, (grn_hash *)options, |
| 124 | NULL, 0, NULL, 0, |
| 125 | 0, -1, 0); |
| 126 | if (!cursor) { |
| 127 | GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE, |
| 128 | "snippet(): couldn't open cursor" ); |
| 129 | goto exit; |
| 130 | } |
| 131 | while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { |
| 132 | grn_hash_cursor_get_key_value(ctx, cursor, |
| 133 | &key, &key_size, |
| 134 | (void **)&value); |
| 135 | if (key_size == 5 && !memcmp(key, "width" , 5)) { |
| 136 | width = GRN_UINT32_VALUE(value); |
| 137 | } else if (key_size == 13 && !memcmp(key, "max_n_results" , 13)) { |
| 138 | max_n_results = GRN_UINT32_VALUE(value); |
| 139 | } else if (key_size == 19 && !memcmp(key, "skip_leading_spaces" , 19)) { |
| 140 | if (GRN_BOOL_VALUE(value) == GRN_FALSE) { |
| 141 | flags &= ~GRN_SNIP_SKIP_LEADING_SPACES; |
| 142 | } |
| 143 | } else if (key_size == 11 && !memcmp(key, "html_escape" , 11)) { |
| 144 | if (GRN_BOOL_VALUE(value)) { |
| 145 | mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; |
| 146 | } |
| 147 | } else if (key_size == 6 && !memcmp(key, "prefix" , 6)) { |
| 148 | prefix = GRN_TEXT_VALUE(value); |
| 149 | prefix_length = GRN_TEXT_LEN(value); |
| 150 | } else if (key_size == 6 && !memcmp(key, "suffix" , 6)) { |
| 151 | suffix = GRN_TEXT_VALUE(value); |
| 152 | suffix_length = GRN_TEXT_LEN(value); |
| 153 | } else if (key_size == 10 && !memcmp(key, "normalizer" , 10)) { |
| 154 | normalizer_name = GRN_TEXT_VALUE(value); |
| 155 | normalizer_name_length = GRN_TEXT_LEN(value); |
| 156 | } else if (key_size == 16 && !memcmp(key, "default_open_tag" , 16)) { |
| 157 | default_open_tag = GRN_TEXT_VALUE(value); |
| 158 | default_open_tag_length = GRN_TEXT_LEN(value); |
| 159 | } else if (key_size == 17 && !memcmp(key, "default_close_tag" , 17)) { |
| 160 | default_close_tag = GRN_TEXT_VALUE(value); |
| 161 | default_close_tag_length = GRN_TEXT_LEN(value); |
| 162 | } else { |
| 163 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
| 164 | "invalid option name: <%.*s>" , |
| 165 | key_size, (char *)key); |
| 166 | grn_hash_cursor_close(ctx, cursor); |
| 167 | goto exit; |
| 168 | } |
| 169 | } |
| 170 | grn_hash_cursor_close(ctx, cursor); |
| 171 | } |
| 172 | |
| 173 | snip = grn_snip_open(ctx, flags, width, max_n_results, |
| 174 | default_open_tag, default_open_tag_length, |
| 175 | default_close_tag, default_close_tag_length, mapping); |
| 176 | if (snip) { |
| 177 | grn_rc rc; |
| 178 | unsigned int i; |
| 179 | if (!normalizer_name) { |
| 180 | grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO); |
| 181 | } else if (normalizer_name_length > 0) { |
| 182 | grn_obj *normalizer; |
| 183 | normalizer = grn_ctx_get(ctx, normalizer_name, normalizer_name_length); |
| 184 | if (!grn_obj_is_normalizer_proc(ctx, normalizer)) { |
| 185 | grn_obj inspected; |
| 186 | GRN_TEXT_INIT(&inspected, 0); |
| 187 | grn_inspect(ctx, &inspected, normalizer); |
| 188 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
| 189 | "snippet(): not normalizer: <%.*s>" , |
| 190 | (int)GRN_TEXT_LEN(&inspected), |
| 191 | GRN_TEXT_VALUE(&inspected)); |
| 192 | GRN_OBJ_FIN(ctx, &inspected); |
| 193 | grn_obj_unlink(ctx, normalizer); |
| 194 | goto exit; |
| 195 | } |
| 196 | grn_snip_set_normalizer(ctx, snip, normalizer); |
| 197 | grn_obj_unlink(ctx, normalizer); |
| 198 | } |
| 199 | if (default_open_tag_length == 0 && default_close_tag_length == 0) { |
| 200 | unsigned int n_keyword_sets = |
| 201 | (n_args_without_option - N_REQUIRED_ARGS) / KEYWORD_SET_SIZE; |
| 202 | grn_obj **keyword_set_args = args + N_REQUIRED_ARGS; |
| 203 | for (i = 0; i < n_keyword_sets; i++) { |
| 204 | rc = grn_snip_add_cond(ctx, snip, |
| 205 | GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE]), |
| 206 | GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE]), |
| 207 | GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 1]), |
| 208 | GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 1]), |
| 209 | GRN_TEXT_VALUE(keyword_set_args[i * KEYWORD_SET_SIZE + 2]), |
| 210 | GRN_TEXT_LEN(keyword_set_args[i * KEYWORD_SET_SIZE + 2])); |
| 211 | } |
| 212 | } else { |
| 213 | unsigned int n_keywords = n_args_without_option - N_REQUIRED_ARGS; |
| 214 | grn_obj **keyword_args = args + N_REQUIRED_ARGS; |
| 215 | for (i = 0; i < n_keywords; i++) { |
| 216 | rc = grn_snip_add_cond(ctx, snip, |
| 217 | GRN_TEXT_VALUE(keyword_args[i]), |
| 218 | GRN_TEXT_LEN(keyword_args[i]), |
| 219 | NULL, 0, |
| 220 | NULL, 0); |
| 221 | } |
| 222 | } |
| 223 | snippets = snippet_exec(ctx, snip, text, user_data, |
| 224 | prefix, prefix_length, |
| 225 | suffix, suffix_length); |
| 226 | } |
| 227 | } |
| 228 | #undef KEYWORD_SET_SIZE |
| 229 | #undef N_REQUIRED_ARGS |
| 230 | |
| 231 | exit : |
| 232 | if (!snippets) { |
| 233 | snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); |
| 234 | } |
| 235 | |
| 236 | return snippets; |
| 237 | } |
| 238 | |
| 239 | void |
| 240 | grn_proc_init_snippet(grn_ctx *ctx) |
| 241 | { |
| 242 | grn_proc_create(ctx, "snippet" , -1, GRN_PROC_FUNCTION, |
| 243 | func_snippet, NULL, NULL, 0, NULL); |
| 244 | } |
| 245 | |
| 246 | static grn_obj * |
| 247 | func_snippet_html(grn_ctx *ctx, int nargs, grn_obj **args, |
| 248 | grn_user_data *user_data) |
| 249 | { |
| 250 | grn_obj *snippets = NULL; |
| 251 | |
| 252 | /* TODO: support parameters */ |
| 253 | if (nargs == 1) { |
| 254 | grn_obj *text = args[0]; |
| 255 | grn_obj *expression = NULL; |
| 256 | grn_obj *condition_ptr = NULL; |
| 257 | grn_obj *condition = NULL; |
| 258 | grn_obj *snip = NULL; |
| 259 | int flags = GRN_SNIP_SKIP_LEADING_SPACES; |
| 260 | unsigned int width = 200; |
| 261 | unsigned int max_n_results = 3; |
| 262 | const char *open_tag = "<span class=\"keyword\">" ; |
| 263 | const char *close_tag = "</span>" ; |
| 264 | grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; |
| 265 | |
| 266 | grn_proc_get_info(ctx, user_data, NULL, NULL, &expression); |
| 267 | condition_ptr = grn_expr_get_var(ctx, expression, |
| 268 | GRN_SELECT_INTERNAL_VAR_CONDITION, |
| 269 | strlen(GRN_SELECT_INTERNAL_VAR_CONDITION)); |
| 270 | if (condition_ptr) { |
| 271 | condition = GRN_PTR_VALUE(condition_ptr); |
| 272 | } |
| 273 | |
| 274 | if (condition) { |
| 275 | grn_obj *snip_ptr; |
| 276 | snip_ptr = grn_expr_get_var(ctx, expression, |
| 277 | GRN_FUNC_SNIPPET_HTML_CACHE_NAME, |
| 278 | strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME)); |
| 279 | if (snip_ptr) { |
| 280 | snip = GRN_PTR_VALUE(snip_ptr); |
| 281 | } else { |
| 282 | snip_ptr = |
| 283 | grn_expr_get_or_add_var(ctx, expression, |
| 284 | GRN_FUNC_SNIPPET_HTML_CACHE_NAME, |
| 285 | strlen(GRN_FUNC_SNIPPET_HTML_CACHE_NAME)); |
| 286 | GRN_OBJ_FIN(ctx, snip_ptr); |
| 287 | GRN_PTR_INIT(snip_ptr, GRN_OBJ_OWN, GRN_DB_OBJECT); |
| 288 | |
| 289 | snip = grn_snip_open(ctx, flags, width, max_n_results, |
| 290 | open_tag, strlen(open_tag), |
| 291 | close_tag, strlen(close_tag), |
| 292 | mapping); |
| 293 | if (snip) { |
| 294 | grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO); |
| 295 | grn_expr_snip_add_conditions(ctx, condition, snip, |
| 296 | 0, NULL, NULL, NULL, NULL); |
| 297 | GRN_PTR_SET(ctx, snip_ptr, snip); |
| 298 | } |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | if (snip) { |
| 303 | snippets = snippet_exec(ctx, snip, text, user_data, NULL, 0, NULL, 0); |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | if (!snippets) { |
| 308 | snippets = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_VOID, 0); |
| 309 | } |
| 310 | |
| 311 | return snippets; |
| 312 | } |
| 313 | |
| 314 | void |
| 315 | grn_proc_init_snippet_html(grn_ctx *ctx) |
| 316 | { |
| 317 | grn_proc_create(ctx, "snippet_html" , -1, GRN_PROC_FUNCTION, |
| 318 | func_snippet_html, NULL, NULL, 0, NULL); |
| 319 | } |
| 320 | |