| 1 | /* -*- c-basic-offset: 2 -*- */ |
| 2 | /* |
| 3 | Copyright(C) 2016 Brazil |
| 4 | |
| 5 | This library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License version 2.1 as published by the Free Software Foundation. |
| 8 | |
| 9 | This library is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | Lesser General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU Lesser General Public |
| 15 | License along with this library; if not, write to the Free Software |
| 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | */ |
| 18 | |
| 19 | #ifdef GRN_EMBEDDED |
| 20 | # define GRN_PLUGIN_FUNCTION_TAG functions_string |
| 21 | #endif |
| 22 | |
| 23 | #include <groonga/plugin.h> |
| 24 | |
| 25 | /* |
| 26 | * func_string_length() returns the number of characters in a string. |
| 27 | * If the string contains an invalid byte sequence, this function returns the |
| 28 | * number of characters before the invalid byte sequence. |
| 29 | */ |
| 30 | static grn_obj * |
| 31 | func_string_length(grn_ctx *ctx, int n_args, grn_obj **args, |
| 32 | grn_user_data *user_data) |
| 33 | { |
| 34 | grn_obj *target; |
| 35 | unsigned int length = 0; |
| 36 | grn_obj *grn_length; |
| 37 | |
| 38 | if (n_args != 1) { |
| 39 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
| 40 | "string_length(): wrong number of arguments (%d for 1)" , |
| 41 | n_args); |
| 42 | return NULL; |
| 43 | } |
| 44 | |
| 45 | target = args[0]; |
| 46 | if (!(target->header.type == GRN_BULK && |
| 47 | ((target->header.domain == GRN_DB_SHORT_TEXT) || |
| 48 | (target->header.domain == GRN_DB_TEXT) || |
| 49 | (target->header.domain == GRN_DB_LONG_TEXT)))) { |
| 50 | grn_obj inspected; |
| 51 | |
| 52 | GRN_TEXT_INIT(&inspected, 0); |
| 53 | grn_inspect(ctx, &inspected, target); |
| 54 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
| 55 | "string_length(): target object must be a text bulk: " |
| 56 | "<%.*s>" , |
| 57 | (int)GRN_TEXT_LEN(&inspected), |
| 58 | GRN_TEXT_VALUE(&inspected)); |
| 59 | GRN_OBJ_FIN(ctx, &inspected); |
| 60 | return NULL; |
| 61 | } |
| 62 | |
| 63 | { |
| 64 | const char *s = GRN_TEXT_VALUE(target); |
| 65 | const char *e = GRN_TEXT_VALUE(target) + GRN_TEXT_LEN(target); |
| 66 | const char *p; |
| 67 | unsigned int cl = 0; |
| 68 | for (p = s; p < e && (cl = grn_charlen(ctx, p, e)); p += cl) { |
| 69 | length++; |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | grn_length = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0); |
| 74 | if (!grn_length) { |
| 75 | return NULL; |
| 76 | } |
| 77 | |
| 78 | GRN_UINT32_SET(ctx, grn_length, length); |
| 79 | |
| 80 | return grn_length; |
| 81 | } |
| 82 | |
| 83 | static grn_obj * |
| 84 | func_string_substring(grn_ctx *ctx, int n_args, grn_obj **args, |
| 85 | grn_user_data *user_data) |
| 86 | { |
| 87 | grn_obj *target; |
| 88 | grn_obj *from_raw; |
| 89 | grn_obj *length_raw = NULL; |
| 90 | int64_t from = 0; |
| 91 | int64_t length = -1; |
| 92 | const char *start = NULL; |
| 93 | const char *end = NULL; |
| 94 | grn_obj *substring; |
| 95 | |
| 96 | if (n_args < 2) { |
| 97 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
| 98 | "string_substring(): wrong number of arguments (%d for 2..3)" , |
| 99 | n_args); |
| 100 | return NULL; |
| 101 | } |
| 102 | |
| 103 | target = args[0]; |
| 104 | from_raw = args[1]; |
| 105 | if (n_args == 3) { |
| 106 | length_raw = args[2]; |
| 107 | } |
| 108 | |
| 109 | if (!(target->header.type == GRN_BULK && |
| 110 | grn_type_id_is_text_family(ctx, target->header.domain))) { |
| 111 | grn_obj inspected; |
| 112 | |
| 113 | GRN_TEXT_INIT(&inspected, 0); |
| 114 | grn_inspect(ctx, &inspected, target); |
| 115 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
| 116 | "string_substring(): target object must be a text bulk: " |
| 117 | "<%.*s>" , |
| 118 | (int)GRN_TEXT_LEN(&inspected), |
| 119 | GRN_TEXT_VALUE(&inspected)); |
| 120 | GRN_OBJ_FIN(ctx, &inspected); |
| 121 | return NULL; |
| 122 | } |
| 123 | |
| 124 | /* TODO: extract as grn_func_arg_int64() */ |
| 125 | if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) { |
| 126 | grn_obj inspected; |
| 127 | |
| 128 | GRN_TEXT_INIT(&inspected, 0); |
| 129 | grn_inspect(ctx, &inspected, from_raw); |
| 130 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
| 131 | "string_substring(): from must be a number: <%.*s>" , |
| 132 | (int)GRN_TEXT_LEN(&inspected), |
| 133 | GRN_TEXT_VALUE(&inspected)); |
| 134 | GRN_OBJ_FIN(ctx, &inspected); |
| 135 | return NULL; |
| 136 | } |
| 137 | if (from_raw->header.domain == GRN_DB_INT32) { |
| 138 | from = GRN_INT32_VALUE(from_raw); |
| 139 | } else if (from_raw->header.domain == GRN_DB_INT64) { |
| 140 | from = GRN_INT64_VALUE(from_raw); |
| 141 | } else { |
| 142 | grn_obj buffer; |
| 143 | grn_rc rc; |
| 144 | |
| 145 | GRN_INT64_INIT(&buffer, 0); |
| 146 | rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE); |
| 147 | if (rc == GRN_SUCCESS) { |
| 148 | from = GRN_INT64_VALUE(&buffer); |
| 149 | } |
| 150 | GRN_OBJ_FIN(ctx, &buffer); |
| 151 | |
| 152 | if (rc != GRN_SUCCESS) { |
| 153 | grn_obj inspected; |
| 154 | |
| 155 | GRN_TEXT_INIT(&inspected, 0); |
| 156 | grn_inspect(ctx, &inspected, from_raw); |
| 157 | GRN_PLUGIN_ERROR(ctx, rc, |
| 158 | "string_substring(): " |
| 159 | "failed to cast from value to number: <%.*s>" , |
| 160 | (int)GRN_TEXT_LEN(&inspected), |
| 161 | GRN_TEXT_VALUE(&inspected)); |
| 162 | GRN_OBJ_FIN(ctx, &inspected); |
| 163 | return NULL; |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | if (length_raw) { |
| 168 | /* TODO: extract as grn_func_arg_int64() */ |
| 169 | if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) { |
| 170 | grn_obj inspected; |
| 171 | |
| 172 | GRN_TEXT_INIT(&inspected, 0); |
| 173 | grn_inspect(ctx, &inspected, length_raw); |
| 174 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
| 175 | "string_substring(): length must be a number: <%.*s>" , |
| 176 | (int)GRN_TEXT_LEN(&inspected), |
| 177 | GRN_TEXT_VALUE(&inspected)); |
| 178 | GRN_OBJ_FIN(ctx, &inspected); |
| 179 | return NULL; |
| 180 | } |
| 181 | if (length_raw->header.domain == GRN_DB_INT32) { |
| 182 | length = GRN_INT32_VALUE(length_raw); |
| 183 | } else if (length_raw->header.domain == GRN_DB_INT64) { |
| 184 | length = GRN_INT64_VALUE(length_raw); |
| 185 | } else { |
| 186 | grn_obj buffer; |
| 187 | grn_rc rc; |
| 188 | |
| 189 | GRN_INT64_INIT(&buffer, 0); |
| 190 | rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE); |
| 191 | if (rc == GRN_SUCCESS) { |
| 192 | length = GRN_INT64_VALUE(&buffer); |
| 193 | } |
| 194 | GRN_OBJ_FIN(ctx, &buffer); |
| 195 | |
| 196 | if (rc != GRN_SUCCESS) { |
| 197 | grn_obj inspected; |
| 198 | |
| 199 | GRN_TEXT_INIT(&inspected, 0); |
| 200 | grn_inspect(ctx, &inspected, length_raw); |
| 201 | GRN_PLUGIN_ERROR(ctx, rc, |
| 202 | "string_substring(): " |
| 203 | "failed to cast length value to number: <%.*s>" , |
| 204 | (int)GRN_TEXT_LEN(&inspected), |
| 205 | GRN_TEXT_VALUE(&inspected)); |
| 206 | GRN_OBJ_FIN(ctx, &inspected); |
| 207 | return NULL; |
| 208 | } |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | substring = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, 0); |
| 213 | if (!substring) { |
| 214 | return NULL; |
| 215 | } |
| 216 | |
| 217 | GRN_BULK_REWIND(substring); |
| 218 | |
| 219 | if (GRN_TEXT_LEN(target) == 0) { |
| 220 | return substring; |
| 221 | } |
| 222 | if (length == 0) { |
| 223 | return substring; |
| 224 | } |
| 225 | |
| 226 | while (from < 0) { |
| 227 | from += GRN_TEXT_LEN(target); |
| 228 | } |
| 229 | |
| 230 | { |
| 231 | const char *p; |
| 232 | |
| 233 | start = NULL; |
| 234 | p = GRN_TEXT_VALUE(target); |
| 235 | end = p + GRN_TEXT_LEN(target); |
| 236 | |
| 237 | if (from == 0) { |
| 238 | start = p; |
| 239 | } else { |
| 240 | unsigned int char_length = 0; |
| 241 | size_t n_chars = 0; |
| 242 | |
| 243 | for (; |
| 244 | p < end && (char_length = grn_charlen(ctx, p, end)); |
| 245 | p += char_length, n_chars++) { |
| 246 | if (n_chars == from) { |
| 247 | start = p; |
| 248 | break; |
| 249 | } |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | if (start && length > 0) { |
| 254 | unsigned int char_length = 0; |
| 255 | size_t n_chars = 0; |
| 256 | |
| 257 | for (; |
| 258 | p < end && (char_length = grn_charlen(ctx, p, end)); |
| 259 | p += char_length, n_chars++) { |
| 260 | if (n_chars == length) { |
| 261 | end = p; |
| 262 | break; |
| 263 | } |
| 264 | } |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | if (start) { |
| 269 | GRN_TEXT_SET(ctx, substring, start, end - start); |
| 270 | } |
| 271 | |
| 272 | return substring; |
| 273 | } |
| 274 | |
| 275 | grn_rc |
| 276 | GRN_PLUGIN_INIT(grn_ctx *ctx) |
| 277 | { |
| 278 | return ctx->rc; |
| 279 | } |
| 280 | |
| 281 | grn_rc |
| 282 | GRN_PLUGIN_REGISTER(grn_ctx *ctx) |
| 283 | { |
| 284 | grn_rc rc = GRN_SUCCESS; |
| 285 | |
| 286 | grn_proc_create(ctx, "string_length" , -1, GRN_PROC_FUNCTION, func_string_length, |
| 287 | NULL, NULL, 0, NULL); |
| 288 | |
| 289 | grn_proc_create(ctx, "string_substring" , -1, GRN_PROC_FUNCTION, func_string_substring, |
| 290 | NULL, NULL, 0, NULL); |
| 291 | |
| 292 | return rc; |
| 293 | } |
| 294 | |
| 295 | grn_rc |
| 296 | GRN_PLUGIN_FIN(grn_ctx *ctx) |
| 297 | { |
| 298 | return GRN_SUCCESS; |
| 299 | } |
| 300 | |