| 1 | /* -*- c-basic-offset: 2 -*- */ |
| 2 | /* |
| 3 | Copyright(C) 2015 Brazil |
| 4 | |
| 5 | This library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License version 2.1 as published by the Free Software Foundation. |
| 8 | |
| 9 | This library is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | Lesser General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU Lesser General Public |
| 15 | License along with this library; if not, write to the Free Software |
| 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | */ |
| 18 | |
| 19 | /* TS is an acronym for "Turbo Selector". */ |
| 20 | |
| 21 | #include "grn_ts.h" |
| 22 | |
| 23 | #include "grn_output.h" |
| 24 | #include "grn_str.h" |
| 25 | |
| 26 | #include "ts/ts_buf.h" |
| 27 | #include "ts/ts_cursor.h" |
| 28 | #include "ts/ts_expr.h" |
| 29 | #include "ts/ts_expr_parser.h" |
| 30 | #include "ts/ts_log.h" |
| 31 | #include "ts/ts_sorter.h" |
| 32 | #include "ts/ts_str.h" |
| 33 | #include "ts/ts_types.h" |
| 34 | #include "ts/ts_util.h" |
| 35 | |
| 36 | #include <string.h> |
| 37 | |
| 38 | /*------------------------------------------------------------- |
| 39 | * Miscellaneous. |
| 40 | */ |
| 41 | |
| 42 | enum { GRN_TS_BATCH_SIZE = 1024 }; |
| 43 | |
| 44 | /* grn_ts_bool_output() outputs a value. */ |
| 45 | static grn_rc |
| 46 | grn_ts_bool_output(grn_ctx *ctx, grn_ts_bool value) |
| 47 | { |
| 48 | if (value) { |
| 49 | return grn_bulk_write(ctx, ctx->impl->output.buf, "true" , 4); |
| 50 | } else { |
| 51 | return grn_bulk_write(ctx, ctx->impl->output.buf, "false" , 5); |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | /* grn_ts_int_output() outputs a value. */ |
| 56 | static grn_rc |
| 57 | grn_ts_int_output(grn_ctx *ctx, grn_ts_int value) |
| 58 | { |
| 59 | return grn_text_lltoa(ctx, ctx->impl->output.buf, value); |
| 60 | } |
| 61 | |
| 62 | /* grn_ts_float_output() outputs a value. */ |
| 63 | static grn_rc |
| 64 | grn_ts_float_output(grn_ctx *ctx, grn_ts_float value) |
| 65 | { |
| 66 | return grn_text_ftoa(ctx, ctx->impl->output.buf, value); |
| 67 | } |
| 68 | |
| 69 | /* grn_ts_time_output() outputs a value. */ |
| 70 | static grn_rc |
| 71 | grn_ts_time_output(grn_ctx *ctx, grn_ts_time value) |
| 72 | { |
| 73 | return grn_text_ftoa(ctx, ctx->impl->output.buf, value * 0.000001); |
| 74 | } |
| 75 | |
| 76 | /* grn_ts_text_output() outputs a value. */ |
| 77 | static grn_rc |
| 78 | grn_ts_text_output(grn_ctx *ctx, grn_ts_text value) |
| 79 | { |
| 80 | return grn_text_esc(ctx, ctx->impl->output.buf, value.ptr, value.size); |
| 81 | } |
| 82 | |
| 83 | /* grn_ts_geo_output() outputs a value. */ |
| 84 | static grn_rc |
| 85 | grn_ts_geo_output(grn_ctx *ctx, grn_ts_geo value) |
| 86 | { |
| 87 | grn_rc rc = grn_bulk_write(ctx, ctx->impl->output.buf, "\"" , 1); |
| 88 | if (rc != GRN_SUCCESS) { |
| 89 | return rc; |
| 90 | } |
| 91 | rc = grn_text_itoa(ctx, ctx->impl->output.buf, value.latitude); |
| 92 | if (rc != GRN_SUCCESS) { |
| 93 | return rc; |
| 94 | } |
| 95 | rc = grn_bulk_write(ctx, ctx->impl->output.buf, "x" , 1); |
| 96 | if (rc != GRN_SUCCESS) { |
| 97 | return rc; |
| 98 | } |
| 99 | rc = grn_text_itoa(ctx, ctx->impl->output.buf, value.longitude); |
| 100 | if (rc != GRN_SUCCESS) { |
| 101 | return rc; |
| 102 | } |
| 103 | return grn_bulk_write(ctx, ctx->impl->output.buf, "\"" , 1); |
| 104 | } |
| 105 | |
| 106 | #define GRN_TS_VECTOR_OUTPUT(kind)\ |
| 107 | size_t i;\ |
| 108 | grn_rc rc = grn_bulk_write(ctx, ctx->impl->output.buf, "[", 1);\ |
| 109 | if (rc != GRN_SUCCESS) {\ |
| 110 | return rc;\ |
| 111 | }\ |
| 112 | for (i = 0; i < value.size; ++i) {\ |
| 113 | if (i) {\ |
| 114 | rc = grn_bulk_write(ctx, ctx->impl->output.buf, ",", 1);\ |
| 115 | if (rc != GRN_SUCCESS) {\ |
| 116 | return rc;\ |
| 117 | }\ |
| 118 | }\ |
| 119 | rc = grn_ts_ ## kind ## _output(ctx, value.ptr[i]);\ |
| 120 | if (rc != GRN_SUCCESS) {\ |
| 121 | return rc;\ |
| 122 | }\ |
| 123 | }\ |
| 124 | return grn_bulk_write(ctx, ctx->impl->output.buf, "]", 1); |
| 125 | /* grn_ts_bool_vector_output() outputs a value. */ |
| 126 | static grn_rc |
| 127 | grn_ts_bool_vector_output(grn_ctx *ctx, grn_ts_bool_vector value) |
| 128 | { |
| 129 | GRN_TS_VECTOR_OUTPUT(bool) |
| 130 | } |
| 131 | |
| 132 | /* grn_ts_int_vector_output() outputs a value. */ |
| 133 | static grn_rc |
| 134 | grn_ts_int_vector_output(grn_ctx *ctx, grn_ts_int_vector value) |
| 135 | { |
| 136 | GRN_TS_VECTOR_OUTPUT(int) |
| 137 | } |
| 138 | |
| 139 | /* grn_ts_float_vector_output() outputs a value. */ |
| 140 | static grn_rc |
| 141 | grn_ts_float_vector_output(grn_ctx *ctx, grn_ts_float_vector value) |
| 142 | { |
| 143 | GRN_TS_VECTOR_OUTPUT(float) |
| 144 | } |
| 145 | |
| 146 | /* grn_ts_time_vector_output() outputs a value. */ |
| 147 | static grn_rc |
| 148 | grn_ts_time_vector_output(grn_ctx *ctx, grn_ts_time_vector value) |
| 149 | { |
| 150 | GRN_TS_VECTOR_OUTPUT(time) |
| 151 | } |
| 152 | |
| 153 | /* grn_ts_text_vector_output() outputs a value. */ |
| 154 | static grn_rc |
| 155 | grn_ts_text_vector_output(grn_ctx *ctx, grn_ts_text_vector value) |
| 156 | { |
| 157 | GRN_TS_VECTOR_OUTPUT(text) |
| 158 | } |
| 159 | |
| 160 | /* grn_ts_geo_vector_output() outputs a value. */ |
| 161 | static grn_rc |
| 162 | grn_ts_geo_vector_output(grn_ctx *ctx, grn_ts_geo_vector value) |
| 163 | { |
| 164 | GRN_TS_VECTOR_OUTPUT(geo) |
| 165 | } |
| 166 | #undef GRN_TS_VECTOR_OUTPUT |
| 167 | |
| 168 | /*------------------------------------------------------------- |
| 169 | * grn_ts_writer. |
| 170 | */ |
| 171 | |
| 172 | typedef struct { |
| 173 | grn_ts_expr_parser *parser; |
| 174 | grn_ts_expr **exprs; |
| 175 | size_t n_exprs; |
| 176 | size_t max_n_exprs; |
| 177 | grn_obj name_buf; |
| 178 | grn_ts_str *names; |
| 179 | grn_ts_buf *bufs; |
| 180 | } grn_ts_writer; |
| 181 | |
| 182 | /* grn_ts_writer_init() initializes a writer. */ |
| 183 | static void |
| 184 | grn_ts_writer_init(grn_ctx *ctx, grn_ts_writer *writer) |
| 185 | { |
| 186 | memset(writer, 0, sizeof(*writer)); |
| 187 | writer->parser = NULL; |
| 188 | writer->exprs = NULL; |
| 189 | GRN_TEXT_INIT(&writer->name_buf, GRN_OBJ_VECTOR); |
| 190 | writer->names = NULL; |
| 191 | writer->bufs = NULL; |
| 192 | } |
| 193 | |
| 194 | /* grn_ts_writer_fin() finalizes a writer. */ |
| 195 | static void |
| 196 | grn_ts_writer_fin(grn_ctx *ctx, grn_ts_writer *writer) |
| 197 | { |
| 198 | size_t i; |
| 199 | if (writer->bufs) { |
| 200 | for (i = 0; i < writer->n_exprs; i++) { |
| 201 | grn_ts_buf_fin(ctx, &writer->bufs[i]); |
| 202 | } |
| 203 | GRN_FREE(writer->bufs); |
| 204 | } |
| 205 | if (writer->names) { |
| 206 | GRN_FREE(writer->names); |
| 207 | } |
| 208 | GRN_OBJ_FIN(ctx, &writer->name_buf); |
| 209 | if (writer->exprs) { |
| 210 | for (i = 0; i < writer->n_exprs; i++) { |
| 211 | grn_ts_expr_close(ctx, writer->exprs[i]); |
| 212 | } |
| 213 | GRN_FREE(writer->exprs); |
| 214 | } |
| 215 | if (writer->parser) { |
| 216 | grn_ts_expr_parser_close(ctx, writer->parser); |
| 217 | } |
| 218 | } |
| 219 | |
| 220 | /* grn_ts_writer_expand() expands a wildcard. */ |
| 221 | static grn_rc |
| 222 | grn_ts_writer_expand(grn_ctx *ctx, grn_ts_writer *writer, |
| 223 | grn_obj *table, grn_ts_str str) |
| 224 | { |
| 225 | grn_rc rc = GRN_SUCCESS; |
| 226 | grn_hash_cursor *cursor; |
| 227 | grn_hash *hash = grn_hash_create(ctx, NULL, sizeof(grn_ts_id), 0, |
| 228 | GRN_OBJ_TABLE_HASH_KEY | GRN_HASH_TINY); |
| 229 | if (!hash) { |
| 230 | return GRN_INVALID_ARGUMENT; |
| 231 | } |
| 232 | grn_table_columns(ctx, table, str.ptr, str.size - 1, (grn_obj *)hash); |
| 233 | if (ctx->rc != GRN_SUCCESS) { |
| 234 | return ctx->rc; |
| 235 | } |
| 236 | cursor = grn_hash_cursor_open(ctx, hash, NULL, 0, NULL, 0, 0, -1, 0); |
| 237 | if (!cursor) { |
| 238 | rc = GRN_INVALID_ARGUMENT; |
| 239 | } else { |
| 240 | while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { |
| 241 | char name_buf[GRN_TABLE_MAX_KEY_SIZE]; |
| 242 | size_t name_size; |
| 243 | grn_obj *column; |
| 244 | grn_ts_id *column_id; |
| 245 | if (!grn_hash_cursor_get_key(ctx, cursor, (void **)&column_id)) { |
| 246 | rc = GRN_INVALID_ARGUMENT; |
| 247 | break; |
| 248 | } |
| 249 | column = grn_ctx_at(ctx, *column_id); |
| 250 | if (!column) { |
| 251 | rc = GRN_INVALID_ARGUMENT; |
| 252 | break; |
| 253 | } |
| 254 | name_size = grn_column_name(ctx, column, name_buf, sizeof(name_buf)); |
| 255 | grn_obj_unlink(ctx, column); |
| 256 | rc = grn_vector_add_element(ctx, &writer->name_buf, |
| 257 | name_buf, name_size, 0, GRN_DB_TEXT); |
| 258 | if (rc != GRN_SUCCESS) { |
| 259 | break; |
| 260 | } |
| 261 | } |
| 262 | grn_hash_cursor_close(ctx, cursor); |
| 263 | } |
| 264 | grn_hash_close(ctx, hash); |
| 265 | return rc; |
| 266 | } |
| 267 | |
| 268 | /* grn_ts_writer_parse() parses output expressions. */ |
| 269 | static grn_rc |
| 270 | grn_ts_writer_parse(grn_ctx *ctx, grn_ts_writer *writer, |
| 271 | grn_obj *table, grn_ts_str str) |
| 272 | { |
| 273 | grn_rc rc; |
| 274 | grn_ts_str rest = str; |
| 275 | rc = grn_ts_expr_parser_open(ctx, table, &writer->parser); |
| 276 | for ( ; ; ) { |
| 277 | grn_ts_str first = { NULL, 0 }; |
| 278 | rc = grn_ts_expr_parser_split(ctx, writer->parser, rest, &first, &rest); |
| 279 | if (rc != GRN_SUCCESS) { |
| 280 | return (rc == GRN_END_OF_DATA) ? GRN_SUCCESS : rc; |
| 281 | } |
| 282 | if ((first.ptr[first.size - 1] == '*') && |
| 283 | grn_ts_str_is_name_prefix((grn_ts_str){ first.ptr, first.size - 1 })) { |
| 284 | rc = grn_ts_writer_expand(ctx, writer, table, first); |
| 285 | if (rc != GRN_SUCCESS) { |
| 286 | return rc; |
| 287 | } |
| 288 | } else if (grn_ts_str_is_key_name(first) && |
| 289 | !grn_ts_table_has_key(ctx, table)) { |
| 290 | /* |
| 291 | * Skip _key if the table has no _key, because the default output_columns |
| 292 | * option contains _key. |
| 293 | */ |
| 294 | GRN_TS_DEBUG("skip \"_key\" because the table has no _key" ); |
| 295 | } else { |
| 296 | rc = grn_vector_add_element(ctx, &writer->name_buf, |
| 297 | first.ptr, first.size, 0, GRN_DB_TEXT); |
| 298 | if (rc != GRN_SUCCESS) { |
| 299 | return rc; |
| 300 | } |
| 301 | } |
| 302 | } |
| 303 | return GRN_SUCCESS; |
| 304 | } |
| 305 | |
| 306 | /* grn_ts_writer_build() builds output expresions. */ |
| 307 | static grn_rc |
| 308 | grn_ts_writer_build(grn_ctx *ctx, grn_ts_writer *writer, grn_obj *table) |
| 309 | { |
| 310 | size_t i, n_names = grn_vector_size(ctx, &writer->name_buf); |
| 311 | if (!n_names) { |
| 312 | return GRN_SUCCESS; |
| 313 | } |
| 314 | writer->names = GRN_MALLOCN(grn_ts_str, n_names); |
| 315 | if (!writer->names) { |
| 316 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
| 317 | "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x %" GRN_FMT_SIZE, |
| 318 | sizeof(grn_ts_str), n_names); |
| 319 | } |
| 320 | writer->exprs = GRN_MALLOCN(grn_ts_expr *, n_names); |
| 321 | if (!writer->exprs) { |
| 322 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
| 323 | "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x %" GRN_FMT_SIZE, |
| 324 | sizeof(grn_ts_expr *), n_names); |
| 325 | } |
| 326 | for (i = 0; i < n_names; i++) { |
| 327 | grn_rc rc; |
| 328 | grn_ts_expr *new_expr; |
| 329 | const char *name_ptr; |
| 330 | size_t name_size = grn_vector_get_element(ctx, &writer->name_buf, i, |
| 331 | &name_ptr, NULL, NULL); |
| 332 | rc = grn_ts_expr_parser_parse(ctx, writer->parser, |
| 333 | (grn_ts_str){ name_ptr, name_size }, |
| 334 | &new_expr); |
| 335 | if (rc != GRN_SUCCESS) { |
| 336 | return rc; |
| 337 | } |
| 338 | writer->names[i].ptr = name_ptr; |
| 339 | writer->names[i].size = name_size; |
| 340 | writer->exprs[i] = new_expr; |
| 341 | writer->n_exprs++; |
| 342 | } |
| 343 | return GRN_SUCCESS; |
| 344 | } |
| 345 | |
| 346 | /* grn_ts_writer_open() creates a writer. */ |
| 347 | static grn_rc |
| 348 | grn_ts_writer_open(grn_ctx *ctx, grn_obj *table, grn_ts_str str, |
| 349 | grn_ts_writer **writer) |
| 350 | { |
| 351 | grn_rc rc; |
| 352 | grn_ts_writer *new_writer = GRN_MALLOCN(grn_ts_writer, 1); |
| 353 | if (!new_writer) { |
| 354 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
| 355 | "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x 1" , |
| 356 | sizeof(grn_ts_writer)); |
| 357 | } |
| 358 | grn_ts_writer_init(ctx, new_writer); |
| 359 | rc = grn_ts_writer_parse(ctx, new_writer, table, str); |
| 360 | if (rc == GRN_SUCCESS) { |
| 361 | rc = grn_ts_writer_build(ctx, new_writer, table); |
| 362 | } |
| 363 | if (rc != GRN_SUCCESS) { |
| 364 | grn_ts_writer_fin(ctx, new_writer); |
| 365 | GRN_FREE(new_writer); |
| 366 | return rc; |
| 367 | } |
| 368 | *writer = new_writer; |
| 369 | return GRN_SUCCESS; |
| 370 | } |
| 371 | |
| 372 | /* grn_ts_writer_close() destroys a writer. */ |
| 373 | static void |
| 374 | grn_ts_writer_close(grn_ctx *ctx, grn_ts_writer *writer) |
| 375 | { |
| 376 | grn_ts_writer_fin(ctx, writer); |
| 377 | GRN_FREE(writer); |
| 378 | } |
| 379 | |
| 380 | /* TODO: Errors of output macros, such as GRN_TEXT_*(), are ignored. */ |
| 381 | |
| 382 | #define (TYPE, name)\ |
| 383 | case GRN_DB_ ## TYPE: {\ |
| 384 | GRN_TEXT_PUTS(ctx, ctx->impl->output.buf, name);\ |
| 385 | break;\ |
| 386 | } |
| 387 | /* grn_ts_writer_output_header() outputs names and data types. */ |
| 388 | static grn_rc |
| 389 | (grn_ctx *ctx, grn_ts_writer *writer) |
| 390 | { |
| 391 | grn_rc rc; |
| 392 | GRN_OUTPUT_ARRAY_OPEN("COLUMNS" , writer->n_exprs); |
| 393 | for (size_t i = 0; i < writer->n_exprs; ++i) { |
| 394 | GRN_OUTPUT_ARRAY_OPEN("COLUMN" , 2); |
| 395 | rc = grn_text_esc(ctx, ctx->impl->output.buf, |
| 396 | writer->names[i].ptr, writer->names[i].size); |
| 397 | if (rc != GRN_SUCCESS) { |
| 398 | return rc; |
| 399 | } |
| 400 | GRN_TEXT_PUT(ctx, ctx->impl->output.buf, ",\"" , 2); |
| 401 | switch (writer->exprs[i]->data_type) { |
| 402 | case GRN_DB_VOID: { |
| 403 | if (writer->exprs[i]->data_kind == GRN_TS_GEO) { |
| 404 | GRN_TEXT_PUTS(ctx, ctx->impl->output.buf, "GeoPoint" ); |
| 405 | } else { |
| 406 | GRN_TEXT_PUTS(ctx, ctx->impl->output.buf, "Void" ); |
| 407 | } |
| 408 | break; |
| 409 | } |
| 410 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(BOOL, "Bool" ) |
| 411 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT8, "Int8" ) |
| 412 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT16, "Int16" ) |
| 413 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT32, "Int32" ) |
| 414 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT64, "Int64" ) |
| 415 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT8, "UInt8" ) |
| 416 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT16, "UInt16" ) |
| 417 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT32, "UInt32" ) |
| 418 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT64, "UInt64" ) |
| 419 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(FLOAT, "Float" ) |
| 420 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(TIME, "Time" ) |
| 421 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(SHORT_TEXT, "ShortText" ) |
| 422 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(TEXT, "Text" ) |
| 423 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(LONG_TEXT, "LongText" ) |
| 424 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(TOKYO_GEO_POINT, "TokyoGeoPoint" ) |
| 425 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(WGS84_GEO_POINT, "WGS84GeoPoint" ) |
| 426 | default: { |
| 427 | char name_buf[GRN_TABLE_MAX_KEY_SIZE]; |
| 428 | size_t name_size; |
| 429 | grn_obj *obj = grn_ctx_at(ctx, writer->exprs[i]->data_type); |
| 430 | if (!obj) { |
| 431 | GRN_TS_ERR_RETURN(GRN_UNKNOWN_ERROR, "grn_ctx_at failed: %d" , |
| 432 | writer->exprs[i]->data_type); |
| 433 | } |
| 434 | if (!grn_ts_obj_is_table(ctx, obj)) { |
| 435 | grn_obj_unlink(ctx, obj); |
| 436 | GRN_TS_ERR_RETURN(GRN_UNKNOWN_ERROR, "not table: %d" , |
| 437 | writer->exprs[i]->data_type); |
| 438 | } |
| 439 | name_size = grn_obj_name(ctx, obj, name_buf, sizeof(name_buf)); |
| 440 | GRN_TEXT_PUT(ctx, ctx->impl->output.buf, name_buf, name_size); |
| 441 | grn_obj_unlink(ctx, obj); |
| 442 | break; |
| 443 | } |
| 444 | } |
| 445 | GRN_TEXT_PUTC(ctx, ctx->impl->output.buf, '"'); |
| 446 | GRN_OUTPUT_ARRAY_CLOSE(); |
| 447 | } |
| 448 | GRN_OUTPUT_ARRAY_CLOSE(); /* COLUMNS. */ |
| 449 | return GRN_SUCCESS; |
| 450 | } |
| 451 | #undef GRN_TS_WRITER_OUTPUT_HEADER_CASE |
| 452 | |
| 453 | #define GRN_TS_WRITER_OUTPUT_BODY_CASE(KIND, kind)\ |
| 454 | case GRN_TS_ ## KIND: {\ |
| 455 | grn_ts_ ## kind *value = (grn_ts_ ## kind *)writer->bufs[j].ptr;\ |
| 456 | grn_ts_ ## kind ## _output(ctx, value[i]);\ |
| 457 | break;\ |
| 458 | } |
| 459 | #define GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(KIND, kind)\ |
| 460 | GRN_TS_WRITER_OUTPUT_BODY_CASE(KIND ## _VECTOR, kind ## _vector) |
| 461 | /* |
| 462 | * grn_ts_writer_output_body() evaluates expressions and outputs the results. |
| 463 | */ |
| 464 | static grn_rc |
| 465 | grn_ts_writer_output_body(grn_ctx *ctx, grn_ts_writer *writer, |
| 466 | const grn_ts_record *in, size_t n_in) |
| 467 | { |
| 468 | size_t i, j, count = 0; |
| 469 | writer->bufs = GRN_MALLOCN(grn_ts_buf, writer->n_exprs); |
| 470 | if (!writer->bufs) { |
| 471 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
| 472 | "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x %" GRN_FMT_SIZE, |
| 473 | sizeof(grn_ts_buf), writer->n_exprs); |
| 474 | } |
| 475 | for (i = 0; i < writer->n_exprs; i++) { |
| 476 | grn_ts_buf_init(ctx, &writer->bufs[i]); |
| 477 | } |
| 478 | while (count < n_in) { |
| 479 | size_t batch_size = GRN_TS_BATCH_SIZE; |
| 480 | if (batch_size > (n_in - count)) { |
| 481 | batch_size = n_in - count; |
| 482 | } |
| 483 | for (i = 0; i < writer->n_exprs; ++i) { |
| 484 | grn_rc rc = grn_ts_expr_evaluate_to_buf(ctx, writer->exprs[i], in + count, |
| 485 | batch_size, &writer->bufs[i]); |
| 486 | if (rc != GRN_SUCCESS) { |
| 487 | return rc; |
| 488 | } |
| 489 | } |
| 490 | for (i = 0; i < batch_size; ++i) { |
| 491 | GRN_OUTPUT_ARRAY_OPEN("HIT" , writer->n_exprs); |
| 492 | for (j = 0; j < writer->n_exprs; ++j) { |
| 493 | if (j) { |
| 494 | GRN_TEXT_PUTC(ctx, ctx->impl->output.buf, ','); |
| 495 | } |
| 496 | switch (writer->exprs[j]->data_kind) { |
| 497 | GRN_TS_WRITER_OUTPUT_BODY_CASE(BOOL, bool); |
| 498 | GRN_TS_WRITER_OUTPUT_BODY_CASE(INT, int); |
| 499 | GRN_TS_WRITER_OUTPUT_BODY_CASE(FLOAT, float); |
| 500 | GRN_TS_WRITER_OUTPUT_BODY_CASE(TIME, time); |
| 501 | GRN_TS_WRITER_OUTPUT_BODY_CASE(TEXT, text); |
| 502 | GRN_TS_WRITER_OUTPUT_BODY_CASE(GEO, geo); |
| 503 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(BOOL, bool); |
| 504 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(INT, int); |
| 505 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(FLOAT, float); |
| 506 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(TIME, time); |
| 507 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(TEXT, text); |
| 508 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(GEO, geo); |
| 509 | default: { |
| 510 | break; |
| 511 | } |
| 512 | } |
| 513 | } |
| 514 | GRN_OUTPUT_ARRAY_CLOSE(); /* HITS. */ |
| 515 | } |
| 516 | count += batch_size; |
| 517 | } |
| 518 | return GRN_SUCCESS; |
| 519 | } |
| 520 | #undef GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE |
| 521 | #undef GRN_TS_WRITER_OUTPUT_BODY_CASE |
| 522 | |
| 523 | /* grn_ts_writer_output() outputs search results into the output buffer. */ |
| 524 | static grn_rc |
| 525 | grn_ts_writer_output(grn_ctx *ctx, grn_ts_writer *writer, |
| 526 | const grn_ts_record *in, size_t n_in, size_t n_hits) |
| 527 | { |
| 528 | grn_rc rc; |
| 529 | GRN_OUTPUT_ARRAY_OPEN("RESULT" , 1); |
| 530 | GRN_OUTPUT_ARRAY_OPEN("RESULTSET" , 2 + n_in); |
| 531 | GRN_OUTPUT_ARRAY_OPEN("NHITS" , 1); |
| 532 | rc = grn_text_ulltoa(ctx, ctx->impl->output.buf, n_hits); |
| 533 | if (rc != GRN_SUCCESS) { |
| 534 | return rc; |
| 535 | } |
| 536 | GRN_OUTPUT_ARRAY_CLOSE(); /* NHITS. */ |
| 537 | rc = grn_ts_writer_output_header(ctx, writer); |
| 538 | if (rc != GRN_SUCCESS) { |
| 539 | return rc; |
| 540 | } |
| 541 | rc = grn_ts_writer_output_body(ctx, writer, in, n_in); |
| 542 | if (rc != GRN_SUCCESS) { |
| 543 | return rc; |
| 544 | } |
| 545 | GRN_OUTPUT_ARRAY_CLOSE(); /* RESULTSET. */ |
| 546 | GRN_OUTPUT_ARRAY_CLOSE(); /* RESET. */ |
| 547 | return GRN_SUCCESS; |
| 548 | } |
| 549 | |
| 550 | /* grn_ts_select_filter() applies a filter to all the records of a table. */ |
| 551 | static grn_rc |
| 552 | grn_ts_select_filter(grn_ctx *ctx, grn_obj *table, grn_ts_str str, |
| 553 | size_t offset, size_t limit, |
| 554 | grn_ts_record **out, size_t *n_out, size_t *n_hits) |
| 555 | { |
| 556 | grn_rc rc; |
| 557 | grn_table_cursor *cursor_obj; |
| 558 | grn_ts_cursor *cursor; |
| 559 | grn_ts_expr *expr = NULL; |
| 560 | grn_ts_record *buf = NULL; |
| 561 | size_t buf_size = 0; |
| 562 | |
| 563 | *out = NULL; |
| 564 | *n_out = 0; |
| 565 | *n_hits = 0; |
| 566 | |
| 567 | cursor_obj = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1, |
| 568 | GRN_CURSOR_ASCENDING | GRN_CURSOR_BY_ID); |
| 569 | if (!cursor_obj) { |
| 570 | return (ctx->rc != GRN_SUCCESS) ? ctx->rc : GRN_UNKNOWN_ERROR; |
| 571 | } |
| 572 | rc = grn_ts_obj_cursor_open(ctx, cursor_obj, &cursor); |
| 573 | if (rc != GRN_SUCCESS) { |
| 574 | grn_obj_close(ctx, cursor_obj); |
| 575 | return rc; |
| 576 | } |
| 577 | |
| 578 | if (str.size) { |
| 579 | rc = grn_ts_expr_parse(ctx, table, str, &expr); |
| 580 | } |
| 581 | if (rc == GRN_SUCCESS) { |
| 582 | for ( ; ; ) { |
| 583 | size_t batch_size; |
| 584 | grn_ts_record *batch; |
| 585 | |
| 586 | /* Extend the record buffer. */ |
| 587 | if (buf_size < (*n_out + GRN_TS_BATCH_SIZE)) { |
| 588 | size_t new_size = buf_size ? (buf_size * 2) : GRN_TS_BATCH_SIZE; |
| 589 | size_t n_bytes = sizeof(grn_ts_record) * new_size; |
| 590 | grn_ts_record *new_buf = (grn_ts_record *)GRN_REALLOC(buf, n_bytes); |
| 591 | if (!new_buf) { |
| 592 | GRN_TS_ERR(GRN_NO_MEMORY_AVAILABLE, |
| 593 | "GRN_REALLOC failed: %" GRN_FMT_SIZE, |
| 594 | n_bytes); |
| 595 | rc = ctx->rc; |
| 596 | break; |
| 597 | } |
| 598 | buf = new_buf; |
| 599 | buf_size = new_size; |
| 600 | } |
| 601 | |
| 602 | /* Read records from the cursor. */ |
| 603 | batch = buf + *n_out; |
| 604 | rc = grn_ts_cursor_read(ctx, cursor, batch, GRN_TS_BATCH_SIZE, |
| 605 | &batch_size); |
| 606 | if ((rc != GRN_SUCCESS) || !batch_size) { |
| 607 | break; |
| 608 | } |
| 609 | |
| 610 | /* Apply the filter. */ |
| 611 | if (expr) { |
| 612 | rc = grn_ts_expr_filter(ctx, expr, batch, batch_size, |
| 613 | batch, &batch_size); |
| 614 | if (rc != GRN_SUCCESS) { |
| 615 | break; |
| 616 | } |
| 617 | } |
| 618 | *n_hits += batch_size; |
| 619 | |
| 620 | /* Apply the offset and the limit. */ |
| 621 | if (offset) { |
| 622 | if (batch_size <= offset) { |
| 623 | offset -= batch_size; |
| 624 | batch_size = 0; |
| 625 | } else { |
| 626 | size_t n_bytes = sizeof(grn_ts_record) * (batch_size - offset); |
| 627 | grn_memmove(batch, batch + offset, n_bytes); |
| 628 | batch_size -= offset; |
| 629 | offset = 0; |
| 630 | } |
| 631 | } |
| 632 | if (batch_size <= limit) { |
| 633 | limit -= batch_size; |
| 634 | } else { |
| 635 | batch_size = limit; |
| 636 | limit = 0; |
| 637 | } |
| 638 | *n_out += batch_size; |
| 639 | } |
| 640 | /* Ignore a failure of destruction. */ |
| 641 | if (expr) { |
| 642 | grn_ts_expr_close(ctx, expr); |
| 643 | } |
| 644 | } |
| 645 | /* Ignore a failure of destruction. */ |
| 646 | grn_ts_cursor_close(ctx, cursor); |
| 647 | |
| 648 | if (rc != GRN_SUCCESS) { |
| 649 | if (buf) { |
| 650 | GRN_FREE(buf); |
| 651 | } |
| 652 | *n_out = 0; |
| 653 | *n_hits = 0; |
| 654 | return rc; |
| 655 | } |
| 656 | *out = buf; |
| 657 | return GRN_SUCCESS; |
| 658 | } |
| 659 | |
| 660 | /* grn_ts_select_scorer() adjust scores. */ |
| 661 | static grn_rc |
| 662 | grn_ts_select_scorer(grn_ctx *ctx, grn_obj *table, grn_ts_str str, |
| 663 | grn_ts_record *records, size_t n_records) |
| 664 | { |
| 665 | grn_rc rc; |
| 666 | grn_ts_str rest; |
| 667 | grn_ts_expr *expr; |
| 668 | rest = grn_ts_str_trim_score_assignment(str); |
| 669 | if (!rest.size) { |
| 670 | return GRN_SUCCESS; |
| 671 | } |
| 672 | rc = grn_ts_expr_parse(ctx, table, rest, &expr); |
| 673 | if (rc != GRN_SUCCESS) { |
| 674 | return rc; |
| 675 | } |
| 676 | rc = grn_ts_expr_adjust(ctx, expr, records, n_records); |
| 677 | grn_ts_expr_close(ctx, expr); |
| 678 | return rc; |
| 679 | } |
| 680 | |
| 681 | /* grn_ts_select_output() outputs the results. */ |
| 682 | static grn_rc |
| 683 | grn_ts_select_output(grn_ctx *ctx, grn_obj *table, grn_ts_str str, |
| 684 | const grn_ts_record *in, size_t n_in, size_t n_hits) |
| 685 | { |
| 686 | grn_ts_writer *writer; |
| 687 | grn_rc rc = grn_ts_writer_open(ctx, table, str, &writer); |
| 688 | if (rc != GRN_SUCCESS) { |
| 689 | return rc; |
| 690 | } |
| 691 | rc = grn_ts_writer_output(ctx, writer, in, n_in, n_hits); |
| 692 | grn_ts_writer_close(ctx, writer); |
| 693 | return rc; |
| 694 | } |
| 695 | |
| 696 | /* grn_ts_select_with_sortby() executes a select command with --sortby. */ |
| 697 | static grn_rc |
| 698 | grn_ts_select_with_sortby(grn_ctx *ctx, grn_obj *table, |
| 699 | grn_ts_str filter, grn_ts_str scorer, |
| 700 | grn_ts_str sortby, grn_ts_str output_columns, |
| 701 | size_t offset, size_t limit) |
| 702 | { |
| 703 | grn_rc rc; |
| 704 | grn_ts_record *recs = NULL; |
| 705 | size_t n_recs = 0, max_n_recs = 0, n_hits = 0; |
| 706 | grn_table_cursor *cursor_obj; |
| 707 | grn_ts_cursor *cursor = NULL; |
| 708 | grn_ts_expr *filter_expr = NULL; |
| 709 | grn_ts_expr *scorer_expr = NULL; |
| 710 | grn_ts_sorter *sorter = NULL; |
| 711 | cursor_obj = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1, |
| 712 | GRN_CURSOR_ASCENDING | GRN_CURSOR_BY_ID); |
| 713 | if (!cursor_obj) { |
| 714 | GRN_TS_ERR_RETURN(GRN_UNKNOWN_ERROR, "grn_table_cursor_open failed" ); |
| 715 | } |
| 716 | rc = grn_ts_obj_cursor_open(ctx, cursor_obj, &cursor); |
| 717 | if (rc != GRN_SUCCESS) { |
| 718 | grn_obj_close(ctx, cursor_obj); |
| 719 | return rc; |
| 720 | } |
| 721 | if (filter.size) { |
| 722 | rc = grn_ts_expr_parse(ctx, table, filter, &filter_expr); |
| 723 | } |
| 724 | if (rc == GRN_SUCCESS) { |
| 725 | scorer = grn_ts_str_trim_score_assignment(scorer); |
| 726 | if (scorer.size) { |
| 727 | rc = grn_ts_expr_parse(ctx, table, scorer, &scorer_expr); |
| 728 | } |
| 729 | if (rc == GRN_SUCCESS) { |
| 730 | rc = grn_ts_sorter_parse(ctx, table, sortby, offset, limit, &sorter); |
| 731 | } |
| 732 | } |
| 733 | if (rc == GRN_SUCCESS) { |
| 734 | size_t n_pending_recs = 0; |
| 735 | for ( ; ; ) { |
| 736 | size_t batch_size; |
| 737 | grn_ts_record *batch; |
| 738 | /* Extend a buffer for records. */ |
| 739 | if (max_n_recs < (n_recs + GRN_TS_BATCH_SIZE)) { |
| 740 | size_t n_bytes, new_max_n_recs = max_n_recs * 2; |
| 741 | grn_ts_record *new_recs; |
| 742 | if (!new_max_n_recs) { |
| 743 | new_max_n_recs = GRN_TS_BATCH_SIZE; |
| 744 | } |
| 745 | n_bytes = sizeof(grn_ts_record) * new_max_n_recs; |
| 746 | new_recs = (grn_ts_record *)GRN_REALLOC(recs, n_bytes); |
| 747 | if (!new_recs) { |
| 748 | GRN_TS_ERR(GRN_NO_MEMORY_AVAILABLE, |
| 749 | "GRN_REALLOC failed: %" GRN_FMT_SIZE, |
| 750 | n_bytes); |
| 751 | rc = ctx->rc; |
| 752 | break; |
| 753 | } |
| 754 | recs = new_recs; |
| 755 | max_n_recs = new_max_n_recs; |
| 756 | } |
| 757 | /* Read records from a cursor. */ |
| 758 | batch = recs + n_recs; |
| 759 | rc = grn_ts_cursor_read(ctx, cursor, batch, GRN_TS_BATCH_SIZE, |
| 760 | &batch_size); |
| 761 | if (rc != GRN_SUCCESS) { |
| 762 | break; |
| 763 | } else if (!batch_size) { |
| 764 | /* Apply a scorer and complete sorting. */ |
| 765 | if (scorer_expr) { |
| 766 | rc = grn_ts_expr_adjust(ctx, scorer_expr, |
| 767 | recs + n_recs - n_pending_recs, |
| 768 | n_pending_recs); |
| 769 | if (rc != GRN_SUCCESS) { |
| 770 | break; |
| 771 | } |
| 772 | } |
| 773 | if (n_pending_recs) { |
| 774 | rc = grn_ts_sorter_progress(ctx, sorter, recs, n_recs, &n_recs); |
| 775 | if (rc != GRN_SUCCESS) { |
| 776 | break; |
| 777 | } |
| 778 | } |
| 779 | rc = grn_ts_sorter_complete(ctx, sorter, recs, n_recs, &n_recs); |
| 780 | break; |
| 781 | } |
| 782 | /* Apply a filter. */ |
| 783 | if (filter_expr) { |
| 784 | rc = grn_ts_expr_filter(ctx, filter_expr, batch, batch_size, |
| 785 | batch, &batch_size); |
| 786 | if (rc != GRN_SUCCESS) { |
| 787 | break; |
| 788 | } |
| 789 | } |
| 790 | n_hits += batch_size; |
| 791 | n_recs += batch_size; |
| 792 | n_pending_recs += batch_size; |
| 793 | /* |
| 794 | * Apply a scorer and progress sorting if there are enough pending |
| 795 | * records. |
| 796 | */ |
| 797 | if (n_pending_recs >= GRN_TS_BATCH_SIZE) { |
| 798 | if (scorer_expr) { |
| 799 | rc = grn_ts_expr_adjust(ctx, scorer_expr, |
| 800 | recs + n_recs - n_pending_recs, |
| 801 | n_pending_recs); |
| 802 | if (rc != GRN_SUCCESS) { |
| 803 | break; |
| 804 | } |
| 805 | } |
| 806 | rc = grn_ts_sorter_progress(ctx, sorter, recs, n_recs, &n_recs); |
| 807 | if (rc != GRN_SUCCESS) { |
| 808 | break; |
| 809 | } |
| 810 | n_pending_recs = 0; |
| 811 | } |
| 812 | } |
| 813 | } |
| 814 | if (rc == GRN_SUCCESS) { |
| 815 | rc = grn_ts_select_output(ctx, table, output_columns, |
| 816 | recs, n_recs, n_hits); |
| 817 | } |
| 818 | if (cursor) { |
| 819 | grn_ts_cursor_close(ctx, cursor); |
| 820 | } |
| 821 | if (recs) { |
| 822 | GRN_FREE(recs); |
| 823 | } |
| 824 | if (sorter) { |
| 825 | grn_ts_sorter_close(ctx, sorter); |
| 826 | } |
| 827 | if (scorer_expr) { |
| 828 | grn_ts_expr_close(ctx, scorer_expr); |
| 829 | } |
| 830 | if (filter_expr) { |
| 831 | grn_ts_expr_close(ctx, filter_expr); |
| 832 | } |
| 833 | return rc; |
| 834 | } |
| 835 | |
| 836 | /* |
| 837 | * grn_ts_select_without_sortby() executes a select command without --sortby. |
| 838 | */ |
| 839 | static grn_rc |
| 840 | grn_ts_select_without_sortby(grn_ctx *ctx, grn_obj *table, |
| 841 | grn_ts_str filter, grn_ts_str scorer, |
| 842 | grn_ts_str output_columns, |
| 843 | size_t offset, size_t limit) |
| 844 | { |
| 845 | grn_rc rc; |
| 846 | grn_ts_record *records = NULL; |
| 847 | size_t n_records, n_hits; |
| 848 | rc = grn_ts_select_filter(ctx, table, filter, offset, limit, |
| 849 | &records, &n_records, &n_hits); |
| 850 | if (rc == GRN_SUCCESS) { |
| 851 | rc = grn_ts_select_scorer(ctx, table, scorer, records, n_records); |
| 852 | if (rc == GRN_SUCCESS) { |
| 853 | rc = grn_ts_select_output(ctx, table, output_columns, |
| 854 | records, n_records, n_hits); |
| 855 | } |
| 856 | } |
| 857 | if (records) { |
| 858 | GRN_FREE(records); |
| 859 | } |
| 860 | return rc; |
| 861 | } |
| 862 | |
| 863 | /*------------------------------------------------------------- |
| 864 | * API. |
| 865 | */ |
| 866 | |
| 867 | grn_rc |
| 868 | grn_ts_select(grn_ctx *ctx, grn_obj *table, |
| 869 | const char *filter_ptr, size_t filter_len, |
| 870 | const char *scorer_ptr, size_t scorer_len, |
| 871 | const char *sortby_ptr, size_t sortby_len, |
| 872 | const char *output_columns_ptr, size_t output_columns_len, |
| 873 | size_t offset, size_t limit) |
| 874 | { |
| 875 | grn_rc rc; |
| 876 | grn_ts_str filter = { filter_ptr, filter_len }; |
| 877 | grn_ts_str scorer = { scorer_ptr, scorer_len }; |
| 878 | grn_ts_str sortby = { sortby_ptr, sortby_len }; |
| 879 | grn_ts_str output_columns = { output_columns_ptr, output_columns_len }; |
| 880 | if (!ctx) { |
| 881 | return GRN_INVALID_ARGUMENT; |
| 882 | } |
| 883 | if (!table || !grn_ts_obj_is_table(ctx, table) || |
| 884 | (!filter_ptr && filter_len) || (!scorer_ptr && scorer_len) || |
| 885 | (!sortby_ptr && sortby_len) || |
| 886 | (!output_columns_ptr && output_columns_len)) { |
| 887 | GRN_TS_ERR_RETURN(GRN_INVALID_ARGUMENT, "invalid argument" ); |
| 888 | } |
| 889 | filter = grn_ts_str_trim_left(filter); |
| 890 | if (sortby_len) { |
| 891 | rc = grn_ts_select_with_sortby(ctx, table, filter, scorer, sortby, |
| 892 | output_columns, offset, limit); |
| 893 | } else { |
| 894 | rc = grn_ts_select_without_sortby(ctx, table, filter, scorer, |
| 895 | output_columns, offset, limit); |
| 896 | } |
| 897 | if (rc != GRN_SUCCESS) { |
| 898 | GRN_BULK_REWIND(ctx->impl->output.buf); |
| 899 | if ((ctx->rc == GRN_SUCCESS) || !ctx->errbuf[0]) { |
| 900 | ERR(rc, "error message is missing" ); |
| 901 | } else if (ctx->errlvl < GRN_LOG_ERROR) { |
| 902 | ctx->errlvl = GRN_LOG_ERROR; |
| 903 | } |
| 904 | } |
| 905 | return rc; |
| 906 | } |
| 907 | |