1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2015 Brazil |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License version 2.1 as published by the Free Software Foundation. |
8 | |
9 | This library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with this library; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | /* TS is an acronym for "Turbo Selector". */ |
20 | |
21 | #include "grn_ts.h" |
22 | |
23 | #include "grn_output.h" |
24 | #include "grn_str.h" |
25 | |
26 | #include "ts/ts_buf.h" |
27 | #include "ts/ts_cursor.h" |
28 | #include "ts/ts_expr.h" |
29 | #include "ts/ts_expr_parser.h" |
30 | #include "ts/ts_log.h" |
31 | #include "ts/ts_sorter.h" |
32 | #include "ts/ts_str.h" |
33 | #include "ts/ts_types.h" |
34 | #include "ts/ts_util.h" |
35 | |
36 | #include <string.h> |
37 | |
38 | /*------------------------------------------------------------- |
39 | * Miscellaneous. |
40 | */ |
41 | |
42 | enum { GRN_TS_BATCH_SIZE = 1024 }; |
43 | |
44 | /* grn_ts_bool_output() outputs a value. */ |
45 | static grn_rc |
46 | grn_ts_bool_output(grn_ctx *ctx, grn_ts_bool value) |
47 | { |
48 | if (value) { |
49 | return grn_bulk_write(ctx, ctx->impl->output.buf, "true" , 4); |
50 | } else { |
51 | return grn_bulk_write(ctx, ctx->impl->output.buf, "false" , 5); |
52 | } |
53 | } |
54 | |
55 | /* grn_ts_int_output() outputs a value. */ |
56 | static grn_rc |
57 | grn_ts_int_output(grn_ctx *ctx, grn_ts_int value) |
58 | { |
59 | return grn_text_lltoa(ctx, ctx->impl->output.buf, value); |
60 | } |
61 | |
62 | /* grn_ts_float_output() outputs a value. */ |
63 | static grn_rc |
64 | grn_ts_float_output(grn_ctx *ctx, grn_ts_float value) |
65 | { |
66 | return grn_text_ftoa(ctx, ctx->impl->output.buf, value); |
67 | } |
68 | |
69 | /* grn_ts_time_output() outputs a value. */ |
70 | static grn_rc |
71 | grn_ts_time_output(grn_ctx *ctx, grn_ts_time value) |
72 | { |
73 | return grn_text_ftoa(ctx, ctx->impl->output.buf, value * 0.000001); |
74 | } |
75 | |
76 | /* grn_ts_text_output() outputs a value. */ |
77 | static grn_rc |
78 | grn_ts_text_output(grn_ctx *ctx, grn_ts_text value) |
79 | { |
80 | return grn_text_esc(ctx, ctx->impl->output.buf, value.ptr, value.size); |
81 | } |
82 | |
83 | /* grn_ts_geo_output() outputs a value. */ |
84 | static grn_rc |
85 | grn_ts_geo_output(grn_ctx *ctx, grn_ts_geo value) |
86 | { |
87 | grn_rc rc = grn_bulk_write(ctx, ctx->impl->output.buf, "\"" , 1); |
88 | if (rc != GRN_SUCCESS) { |
89 | return rc; |
90 | } |
91 | rc = grn_text_itoa(ctx, ctx->impl->output.buf, value.latitude); |
92 | if (rc != GRN_SUCCESS) { |
93 | return rc; |
94 | } |
95 | rc = grn_bulk_write(ctx, ctx->impl->output.buf, "x" , 1); |
96 | if (rc != GRN_SUCCESS) { |
97 | return rc; |
98 | } |
99 | rc = grn_text_itoa(ctx, ctx->impl->output.buf, value.longitude); |
100 | if (rc != GRN_SUCCESS) { |
101 | return rc; |
102 | } |
103 | return grn_bulk_write(ctx, ctx->impl->output.buf, "\"" , 1); |
104 | } |
105 | |
106 | #define GRN_TS_VECTOR_OUTPUT(kind)\ |
107 | size_t i;\ |
108 | grn_rc rc = grn_bulk_write(ctx, ctx->impl->output.buf, "[", 1);\ |
109 | if (rc != GRN_SUCCESS) {\ |
110 | return rc;\ |
111 | }\ |
112 | for (i = 0; i < value.size; ++i) {\ |
113 | if (i) {\ |
114 | rc = grn_bulk_write(ctx, ctx->impl->output.buf, ",", 1);\ |
115 | if (rc != GRN_SUCCESS) {\ |
116 | return rc;\ |
117 | }\ |
118 | }\ |
119 | rc = grn_ts_ ## kind ## _output(ctx, value.ptr[i]);\ |
120 | if (rc != GRN_SUCCESS) {\ |
121 | return rc;\ |
122 | }\ |
123 | }\ |
124 | return grn_bulk_write(ctx, ctx->impl->output.buf, "]", 1); |
125 | /* grn_ts_bool_vector_output() outputs a value. */ |
126 | static grn_rc |
127 | grn_ts_bool_vector_output(grn_ctx *ctx, grn_ts_bool_vector value) |
128 | { |
129 | GRN_TS_VECTOR_OUTPUT(bool) |
130 | } |
131 | |
132 | /* grn_ts_int_vector_output() outputs a value. */ |
133 | static grn_rc |
134 | grn_ts_int_vector_output(grn_ctx *ctx, grn_ts_int_vector value) |
135 | { |
136 | GRN_TS_VECTOR_OUTPUT(int) |
137 | } |
138 | |
139 | /* grn_ts_float_vector_output() outputs a value. */ |
140 | static grn_rc |
141 | grn_ts_float_vector_output(grn_ctx *ctx, grn_ts_float_vector value) |
142 | { |
143 | GRN_TS_VECTOR_OUTPUT(float) |
144 | } |
145 | |
146 | /* grn_ts_time_vector_output() outputs a value. */ |
147 | static grn_rc |
148 | grn_ts_time_vector_output(grn_ctx *ctx, grn_ts_time_vector value) |
149 | { |
150 | GRN_TS_VECTOR_OUTPUT(time) |
151 | } |
152 | |
153 | /* grn_ts_text_vector_output() outputs a value. */ |
154 | static grn_rc |
155 | grn_ts_text_vector_output(grn_ctx *ctx, grn_ts_text_vector value) |
156 | { |
157 | GRN_TS_VECTOR_OUTPUT(text) |
158 | } |
159 | |
160 | /* grn_ts_geo_vector_output() outputs a value. */ |
161 | static grn_rc |
162 | grn_ts_geo_vector_output(grn_ctx *ctx, grn_ts_geo_vector value) |
163 | { |
164 | GRN_TS_VECTOR_OUTPUT(geo) |
165 | } |
166 | #undef GRN_TS_VECTOR_OUTPUT |
167 | |
168 | /*------------------------------------------------------------- |
169 | * grn_ts_writer. |
170 | */ |
171 | |
172 | typedef struct { |
173 | grn_ts_expr_parser *parser; |
174 | grn_ts_expr **exprs; |
175 | size_t n_exprs; |
176 | size_t max_n_exprs; |
177 | grn_obj name_buf; |
178 | grn_ts_str *names; |
179 | grn_ts_buf *bufs; |
180 | } grn_ts_writer; |
181 | |
182 | /* grn_ts_writer_init() initializes a writer. */ |
183 | static void |
184 | grn_ts_writer_init(grn_ctx *ctx, grn_ts_writer *writer) |
185 | { |
186 | memset(writer, 0, sizeof(*writer)); |
187 | writer->parser = NULL; |
188 | writer->exprs = NULL; |
189 | GRN_TEXT_INIT(&writer->name_buf, GRN_OBJ_VECTOR); |
190 | writer->names = NULL; |
191 | writer->bufs = NULL; |
192 | } |
193 | |
194 | /* grn_ts_writer_fin() finalizes a writer. */ |
195 | static void |
196 | grn_ts_writer_fin(grn_ctx *ctx, grn_ts_writer *writer) |
197 | { |
198 | size_t i; |
199 | if (writer->bufs) { |
200 | for (i = 0; i < writer->n_exprs; i++) { |
201 | grn_ts_buf_fin(ctx, &writer->bufs[i]); |
202 | } |
203 | GRN_FREE(writer->bufs); |
204 | } |
205 | if (writer->names) { |
206 | GRN_FREE(writer->names); |
207 | } |
208 | GRN_OBJ_FIN(ctx, &writer->name_buf); |
209 | if (writer->exprs) { |
210 | for (i = 0; i < writer->n_exprs; i++) { |
211 | grn_ts_expr_close(ctx, writer->exprs[i]); |
212 | } |
213 | GRN_FREE(writer->exprs); |
214 | } |
215 | if (writer->parser) { |
216 | grn_ts_expr_parser_close(ctx, writer->parser); |
217 | } |
218 | } |
219 | |
220 | /* grn_ts_writer_expand() expands a wildcard. */ |
221 | static grn_rc |
222 | grn_ts_writer_expand(grn_ctx *ctx, grn_ts_writer *writer, |
223 | grn_obj *table, grn_ts_str str) |
224 | { |
225 | grn_rc rc = GRN_SUCCESS; |
226 | grn_hash_cursor *cursor; |
227 | grn_hash *hash = grn_hash_create(ctx, NULL, sizeof(grn_ts_id), 0, |
228 | GRN_OBJ_TABLE_HASH_KEY | GRN_HASH_TINY); |
229 | if (!hash) { |
230 | return GRN_INVALID_ARGUMENT; |
231 | } |
232 | grn_table_columns(ctx, table, str.ptr, str.size - 1, (grn_obj *)hash); |
233 | if (ctx->rc != GRN_SUCCESS) { |
234 | return ctx->rc; |
235 | } |
236 | cursor = grn_hash_cursor_open(ctx, hash, NULL, 0, NULL, 0, 0, -1, 0); |
237 | if (!cursor) { |
238 | rc = GRN_INVALID_ARGUMENT; |
239 | } else { |
240 | while (grn_hash_cursor_next(ctx, cursor) != GRN_ID_NIL) { |
241 | char name_buf[GRN_TABLE_MAX_KEY_SIZE]; |
242 | size_t name_size; |
243 | grn_obj *column; |
244 | grn_ts_id *column_id; |
245 | if (!grn_hash_cursor_get_key(ctx, cursor, (void **)&column_id)) { |
246 | rc = GRN_INVALID_ARGUMENT; |
247 | break; |
248 | } |
249 | column = grn_ctx_at(ctx, *column_id); |
250 | if (!column) { |
251 | rc = GRN_INVALID_ARGUMENT; |
252 | break; |
253 | } |
254 | name_size = grn_column_name(ctx, column, name_buf, sizeof(name_buf)); |
255 | grn_obj_unlink(ctx, column); |
256 | rc = grn_vector_add_element(ctx, &writer->name_buf, |
257 | name_buf, name_size, 0, GRN_DB_TEXT); |
258 | if (rc != GRN_SUCCESS) { |
259 | break; |
260 | } |
261 | } |
262 | grn_hash_cursor_close(ctx, cursor); |
263 | } |
264 | grn_hash_close(ctx, hash); |
265 | return rc; |
266 | } |
267 | |
268 | /* grn_ts_writer_parse() parses output expressions. */ |
269 | static grn_rc |
270 | grn_ts_writer_parse(grn_ctx *ctx, grn_ts_writer *writer, |
271 | grn_obj *table, grn_ts_str str) |
272 | { |
273 | grn_rc rc; |
274 | grn_ts_str rest = str; |
275 | rc = grn_ts_expr_parser_open(ctx, table, &writer->parser); |
276 | for ( ; ; ) { |
277 | grn_ts_str first = { NULL, 0 }; |
278 | rc = grn_ts_expr_parser_split(ctx, writer->parser, rest, &first, &rest); |
279 | if (rc != GRN_SUCCESS) { |
280 | return (rc == GRN_END_OF_DATA) ? GRN_SUCCESS : rc; |
281 | } |
282 | if ((first.ptr[first.size - 1] == '*') && |
283 | grn_ts_str_is_name_prefix((grn_ts_str){ first.ptr, first.size - 1 })) { |
284 | rc = grn_ts_writer_expand(ctx, writer, table, first); |
285 | if (rc != GRN_SUCCESS) { |
286 | return rc; |
287 | } |
288 | } else if (grn_ts_str_is_key_name(first) && |
289 | !grn_ts_table_has_key(ctx, table)) { |
290 | /* |
291 | * Skip _key if the table has no _key, because the default output_columns |
292 | * option contains _key. |
293 | */ |
294 | GRN_TS_DEBUG("skip \"_key\" because the table has no _key" ); |
295 | } else { |
296 | rc = grn_vector_add_element(ctx, &writer->name_buf, |
297 | first.ptr, first.size, 0, GRN_DB_TEXT); |
298 | if (rc != GRN_SUCCESS) { |
299 | return rc; |
300 | } |
301 | } |
302 | } |
303 | return GRN_SUCCESS; |
304 | } |
305 | |
306 | /* grn_ts_writer_build() builds output expresions. */ |
307 | static grn_rc |
308 | grn_ts_writer_build(grn_ctx *ctx, grn_ts_writer *writer, grn_obj *table) |
309 | { |
310 | size_t i, n_names = grn_vector_size(ctx, &writer->name_buf); |
311 | if (!n_names) { |
312 | return GRN_SUCCESS; |
313 | } |
314 | writer->names = GRN_MALLOCN(grn_ts_str, n_names); |
315 | if (!writer->names) { |
316 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
317 | "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x %" GRN_FMT_SIZE, |
318 | sizeof(grn_ts_str), n_names); |
319 | } |
320 | writer->exprs = GRN_MALLOCN(grn_ts_expr *, n_names); |
321 | if (!writer->exprs) { |
322 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
323 | "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x %" GRN_FMT_SIZE, |
324 | sizeof(grn_ts_expr *), n_names); |
325 | } |
326 | for (i = 0; i < n_names; i++) { |
327 | grn_rc rc; |
328 | grn_ts_expr *new_expr; |
329 | const char *name_ptr; |
330 | size_t name_size = grn_vector_get_element(ctx, &writer->name_buf, i, |
331 | &name_ptr, NULL, NULL); |
332 | rc = grn_ts_expr_parser_parse(ctx, writer->parser, |
333 | (grn_ts_str){ name_ptr, name_size }, |
334 | &new_expr); |
335 | if (rc != GRN_SUCCESS) { |
336 | return rc; |
337 | } |
338 | writer->names[i].ptr = name_ptr; |
339 | writer->names[i].size = name_size; |
340 | writer->exprs[i] = new_expr; |
341 | writer->n_exprs++; |
342 | } |
343 | return GRN_SUCCESS; |
344 | } |
345 | |
346 | /* grn_ts_writer_open() creates a writer. */ |
347 | static grn_rc |
348 | grn_ts_writer_open(grn_ctx *ctx, grn_obj *table, grn_ts_str str, |
349 | grn_ts_writer **writer) |
350 | { |
351 | grn_rc rc; |
352 | grn_ts_writer *new_writer = GRN_MALLOCN(grn_ts_writer, 1); |
353 | if (!new_writer) { |
354 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
355 | "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x 1" , |
356 | sizeof(grn_ts_writer)); |
357 | } |
358 | grn_ts_writer_init(ctx, new_writer); |
359 | rc = grn_ts_writer_parse(ctx, new_writer, table, str); |
360 | if (rc == GRN_SUCCESS) { |
361 | rc = grn_ts_writer_build(ctx, new_writer, table); |
362 | } |
363 | if (rc != GRN_SUCCESS) { |
364 | grn_ts_writer_fin(ctx, new_writer); |
365 | GRN_FREE(new_writer); |
366 | return rc; |
367 | } |
368 | *writer = new_writer; |
369 | return GRN_SUCCESS; |
370 | } |
371 | |
372 | /* grn_ts_writer_close() destroys a writer. */ |
373 | static void |
374 | grn_ts_writer_close(grn_ctx *ctx, grn_ts_writer *writer) |
375 | { |
376 | grn_ts_writer_fin(ctx, writer); |
377 | GRN_FREE(writer); |
378 | } |
379 | |
380 | /* TODO: Errors of output macros, such as GRN_TEXT_*(), are ignored. */ |
381 | |
382 | #define (TYPE, name)\ |
383 | case GRN_DB_ ## TYPE: {\ |
384 | GRN_TEXT_PUTS(ctx, ctx->impl->output.buf, name);\ |
385 | break;\ |
386 | } |
387 | /* grn_ts_writer_output_header() outputs names and data types. */ |
388 | static grn_rc |
389 | (grn_ctx *ctx, grn_ts_writer *writer) |
390 | { |
391 | grn_rc rc; |
392 | GRN_OUTPUT_ARRAY_OPEN("COLUMNS" , writer->n_exprs); |
393 | for (size_t i = 0; i < writer->n_exprs; ++i) { |
394 | GRN_OUTPUT_ARRAY_OPEN("COLUMN" , 2); |
395 | rc = grn_text_esc(ctx, ctx->impl->output.buf, |
396 | writer->names[i].ptr, writer->names[i].size); |
397 | if (rc != GRN_SUCCESS) { |
398 | return rc; |
399 | } |
400 | GRN_TEXT_PUT(ctx, ctx->impl->output.buf, ",\"" , 2); |
401 | switch (writer->exprs[i]->data_type) { |
402 | case GRN_DB_VOID: { |
403 | if (writer->exprs[i]->data_kind == GRN_TS_GEO) { |
404 | GRN_TEXT_PUTS(ctx, ctx->impl->output.buf, "GeoPoint" ); |
405 | } else { |
406 | GRN_TEXT_PUTS(ctx, ctx->impl->output.buf, "Void" ); |
407 | } |
408 | break; |
409 | } |
410 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(BOOL, "Bool" ) |
411 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT8, "Int8" ) |
412 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT16, "Int16" ) |
413 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT32, "Int32" ) |
414 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(INT64, "Int64" ) |
415 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT8, "UInt8" ) |
416 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT16, "UInt16" ) |
417 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT32, "UInt32" ) |
418 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(UINT64, "UInt64" ) |
419 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(FLOAT, "Float" ) |
420 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(TIME, "Time" ) |
421 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(SHORT_TEXT, "ShortText" ) |
422 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(TEXT, "Text" ) |
423 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(LONG_TEXT, "LongText" ) |
424 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(TOKYO_GEO_POINT, "TokyoGeoPoint" ) |
425 | GRN_TS_WRITER_OUTPUT_HEADER_CASE(WGS84_GEO_POINT, "WGS84GeoPoint" ) |
426 | default: { |
427 | char name_buf[GRN_TABLE_MAX_KEY_SIZE]; |
428 | size_t name_size; |
429 | grn_obj *obj = grn_ctx_at(ctx, writer->exprs[i]->data_type); |
430 | if (!obj) { |
431 | GRN_TS_ERR_RETURN(GRN_UNKNOWN_ERROR, "grn_ctx_at failed: %d" , |
432 | writer->exprs[i]->data_type); |
433 | } |
434 | if (!grn_ts_obj_is_table(ctx, obj)) { |
435 | grn_obj_unlink(ctx, obj); |
436 | GRN_TS_ERR_RETURN(GRN_UNKNOWN_ERROR, "not table: %d" , |
437 | writer->exprs[i]->data_type); |
438 | } |
439 | name_size = grn_obj_name(ctx, obj, name_buf, sizeof(name_buf)); |
440 | GRN_TEXT_PUT(ctx, ctx->impl->output.buf, name_buf, name_size); |
441 | grn_obj_unlink(ctx, obj); |
442 | break; |
443 | } |
444 | } |
445 | GRN_TEXT_PUTC(ctx, ctx->impl->output.buf, '"'); |
446 | GRN_OUTPUT_ARRAY_CLOSE(); |
447 | } |
448 | GRN_OUTPUT_ARRAY_CLOSE(); /* COLUMNS. */ |
449 | return GRN_SUCCESS; |
450 | } |
451 | #undef GRN_TS_WRITER_OUTPUT_HEADER_CASE |
452 | |
453 | #define GRN_TS_WRITER_OUTPUT_BODY_CASE(KIND, kind)\ |
454 | case GRN_TS_ ## KIND: {\ |
455 | grn_ts_ ## kind *value = (grn_ts_ ## kind *)writer->bufs[j].ptr;\ |
456 | grn_ts_ ## kind ## _output(ctx, value[i]);\ |
457 | break;\ |
458 | } |
459 | #define GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(KIND, kind)\ |
460 | GRN_TS_WRITER_OUTPUT_BODY_CASE(KIND ## _VECTOR, kind ## _vector) |
461 | /* |
462 | * grn_ts_writer_output_body() evaluates expressions and outputs the results. |
463 | */ |
464 | static grn_rc |
465 | grn_ts_writer_output_body(grn_ctx *ctx, grn_ts_writer *writer, |
466 | const grn_ts_record *in, size_t n_in) |
467 | { |
468 | size_t i, j, count = 0; |
469 | writer->bufs = GRN_MALLOCN(grn_ts_buf, writer->n_exprs); |
470 | if (!writer->bufs) { |
471 | GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE, |
472 | "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x %" GRN_FMT_SIZE, |
473 | sizeof(grn_ts_buf), writer->n_exprs); |
474 | } |
475 | for (i = 0; i < writer->n_exprs; i++) { |
476 | grn_ts_buf_init(ctx, &writer->bufs[i]); |
477 | } |
478 | while (count < n_in) { |
479 | size_t batch_size = GRN_TS_BATCH_SIZE; |
480 | if (batch_size > (n_in - count)) { |
481 | batch_size = n_in - count; |
482 | } |
483 | for (i = 0; i < writer->n_exprs; ++i) { |
484 | grn_rc rc = grn_ts_expr_evaluate_to_buf(ctx, writer->exprs[i], in + count, |
485 | batch_size, &writer->bufs[i]); |
486 | if (rc != GRN_SUCCESS) { |
487 | return rc; |
488 | } |
489 | } |
490 | for (i = 0; i < batch_size; ++i) { |
491 | GRN_OUTPUT_ARRAY_OPEN("HIT" , writer->n_exprs); |
492 | for (j = 0; j < writer->n_exprs; ++j) { |
493 | if (j) { |
494 | GRN_TEXT_PUTC(ctx, ctx->impl->output.buf, ','); |
495 | } |
496 | switch (writer->exprs[j]->data_kind) { |
497 | GRN_TS_WRITER_OUTPUT_BODY_CASE(BOOL, bool); |
498 | GRN_TS_WRITER_OUTPUT_BODY_CASE(INT, int); |
499 | GRN_TS_WRITER_OUTPUT_BODY_CASE(FLOAT, float); |
500 | GRN_TS_WRITER_OUTPUT_BODY_CASE(TIME, time); |
501 | GRN_TS_WRITER_OUTPUT_BODY_CASE(TEXT, text); |
502 | GRN_TS_WRITER_OUTPUT_BODY_CASE(GEO, geo); |
503 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(BOOL, bool); |
504 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(INT, int); |
505 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(FLOAT, float); |
506 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(TIME, time); |
507 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(TEXT, text); |
508 | GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE(GEO, geo); |
509 | default: { |
510 | break; |
511 | } |
512 | } |
513 | } |
514 | GRN_OUTPUT_ARRAY_CLOSE(); /* HITS. */ |
515 | } |
516 | count += batch_size; |
517 | } |
518 | return GRN_SUCCESS; |
519 | } |
520 | #undef GRN_TS_WRITER_OUTPUT_BODY_VECTOR_CASE |
521 | #undef GRN_TS_WRITER_OUTPUT_BODY_CASE |
522 | |
523 | /* grn_ts_writer_output() outputs search results into the output buffer. */ |
524 | static grn_rc |
525 | grn_ts_writer_output(grn_ctx *ctx, grn_ts_writer *writer, |
526 | const grn_ts_record *in, size_t n_in, size_t n_hits) |
527 | { |
528 | grn_rc rc; |
529 | GRN_OUTPUT_ARRAY_OPEN("RESULT" , 1); |
530 | GRN_OUTPUT_ARRAY_OPEN("RESULTSET" , 2 + n_in); |
531 | GRN_OUTPUT_ARRAY_OPEN("NHITS" , 1); |
532 | rc = grn_text_ulltoa(ctx, ctx->impl->output.buf, n_hits); |
533 | if (rc != GRN_SUCCESS) { |
534 | return rc; |
535 | } |
536 | GRN_OUTPUT_ARRAY_CLOSE(); /* NHITS. */ |
537 | rc = grn_ts_writer_output_header(ctx, writer); |
538 | if (rc != GRN_SUCCESS) { |
539 | return rc; |
540 | } |
541 | rc = grn_ts_writer_output_body(ctx, writer, in, n_in); |
542 | if (rc != GRN_SUCCESS) { |
543 | return rc; |
544 | } |
545 | GRN_OUTPUT_ARRAY_CLOSE(); /* RESULTSET. */ |
546 | GRN_OUTPUT_ARRAY_CLOSE(); /* RESET. */ |
547 | return GRN_SUCCESS; |
548 | } |
549 | |
550 | /* grn_ts_select_filter() applies a filter to all the records of a table. */ |
551 | static grn_rc |
552 | grn_ts_select_filter(grn_ctx *ctx, grn_obj *table, grn_ts_str str, |
553 | size_t offset, size_t limit, |
554 | grn_ts_record **out, size_t *n_out, size_t *n_hits) |
555 | { |
556 | grn_rc rc; |
557 | grn_table_cursor *cursor_obj; |
558 | grn_ts_cursor *cursor; |
559 | grn_ts_expr *expr = NULL; |
560 | grn_ts_record *buf = NULL; |
561 | size_t buf_size = 0; |
562 | |
563 | *out = NULL; |
564 | *n_out = 0; |
565 | *n_hits = 0; |
566 | |
567 | cursor_obj = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1, |
568 | GRN_CURSOR_ASCENDING | GRN_CURSOR_BY_ID); |
569 | if (!cursor_obj) { |
570 | return (ctx->rc != GRN_SUCCESS) ? ctx->rc : GRN_UNKNOWN_ERROR; |
571 | } |
572 | rc = grn_ts_obj_cursor_open(ctx, cursor_obj, &cursor); |
573 | if (rc != GRN_SUCCESS) { |
574 | grn_obj_close(ctx, cursor_obj); |
575 | return rc; |
576 | } |
577 | |
578 | if (str.size) { |
579 | rc = grn_ts_expr_parse(ctx, table, str, &expr); |
580 | } |
581 | if (rc == GRN_SUCCESS) { |
582 | for ( ; ; ) { |
583 | size_t batch_size; |
584 | grn_ts_record *batch; |
585 | |
586 | /* Extend the record buffer. */ |
587 | if (buf_size < (*n_out + GRN_TS_BATCH_SIZE)) { |
588 | size_t new_size = buf_size ? (buf_size * 2) : GRN_TS_BATCH_SIZE; |
589 | size_t n_bytes = sizeof(grn_ts_record) * new_size; |
590 | grn_ts_record *new_buf = (grn_ts_record *)GRN_REALLOC(buf, n_bytes); |
591 | if (!new_buf) { |
592 | GRN_TS_ERR(GRN_NO_MEMORY_AVAILABLE, |
593 | "GRN_REALLOC failed: %" GRN_FMT_SIZE, |
594 | n_bytes); |
595 | rc = ctx->rc; |
596 | break; |
597 | } |
598 | buf = new_buf; |
599 | buf_size = new_size; |
600 | } |
601 | |
602 | /* Read records from the cursor. */ |
603 | batch = buf + *n_out; |
604 | rc = grn_ts_cursor_read(ctx, cursor, batch, GRN_TS_BATCH_SIZE, |
605 | &batch_size); |
606 | if ((rc != GRN_SUCCESS) || !batch_size) { |
607 | break; |
608 | } |
609 | |
610 | /* Apply the filter. */ |
611 | if (expr) { |
612 | rc = grn_ts_expr_filter(ctx, expr, batch, batch_size, |
613 | batch, &batch_size); |
614 | if (rc != GRN_SUCCESS) { |
615 | break; |
616 | } |
617 | } |
618 | *n_hits += batch_size; |
619 | |
620 | /* Apply the offset and the limit. */ |
621 | if (offset) { |
622 | if (batch_size <= offset) { |
623 | offset -= batch_size; |
624 | batch_size = 0; |
625 | } else { |
626 | size_t n_bytes = sizeof(grn_ts_record) * (batch_size - offset); |
627 | grn_memmove(batch, batch + offset, n_bytes); |
628 | batch_size -= offset; |
629 | offset = 0; |
630 | } |
631 | } |
632 | if (batch_size <= limit) { |
633 | limit -= batch_size; |
634 | } else { |
635 | batch_size = limit; |
636 | limit = 0; |
637 | } |
638 | *n_out += batch_size; |
639 | } |
640 | /* Ignore a failure of destruction. */ |
641 | if (expr) { |
642 | grn_ts_expr_close(ctx, expr); |
643 | } |
644 | } |
645 | /* Ignore a failure of destruction. */ |
646 | grn_ts_cursor_close(ctx, cursor); |
647 | |
648 | if (rc != GRN_SUCCESS) { |
649 | if (buf) { |
650 | GRN_FREE(buf); |
651 | } |
652 | *n_out = 0; |
653 | *n_hits = 0; |
654 | return rc; |
655 | } |
656 | *out = buf; |
657 | return GRN_SUCCESS; |
658 | } |
659 | |
660 | /* grn_ts_select_scorer() adjust scores. */ |
661 | static grn_rc |
662 | grn_ts_select_scorer(grn_ctx *ctx, grn_obj *table, grn_ts_str str, |
663 | grn_ts_record *records, size_t n_records) |
664 | { |
665 | grn_rc rc; |
666 | grn_ts_str rest; |
667 | grn_ts_expr *expr; |
668 | rest = grn_ts_str_trim_score_assignment(str); |
669 | if (!rest.size) { |
670 | return GRN_SUCCESS; |
671 | } |
672 | rc = grn_ts_expr_parse(ctx, table, rest, &expr); |
673 | if (rc != GRN_SUCCESS) { |
674 | return rc; |
675 | } |
676 | rc = grn_ts_expr_adjust(ctx, expr, records, n_records); |
677 | grn_ts_expr_close(ctx, expr); |
678 | return rc; |
679 | } |
680 | |
681 | /* grn_ts_select_output() outputs the results. */ |
682 | static grn_rc |
683 | grn_ts_select_output(grn_ctx *ctx, grn_obj *table, grn_ts_str str, |
684 | const grn_ts_record *in, size_t n_in, size_t n_hits) |
685 | { |
686 | grn_ts_writer *writer; |
687 | grn_rc rc = grn_ts_writer_open(ctx, table, str, &writer); |
688 | if (rc != GRN_SUCCESS) { |
689 | return rc; |
690 | } |
691 | rc = grn_ts_writer_output(ctx, writer, in, n_in, n_hits); |
692 | grn_ts_writer_close(ctx, writer); |
693 | return rc; |
694 | } |
695 | |
696 | /* grn_ts_select_with_sortby() executes a select command with --sortby. */ |
697 | static grn_rc |
698 | grn_ts_select_with_sortby(grn_ctx *ctx, grn_obj *table, |
699 | grn_ts_str filter, grn_ts_str scorer, |
700 | grn_ts_str sortby, grn_ts_str output_columns, |
701 | size_t offset, size_t limit) |
702 | { |
703 | grn_rc rc; |
704 | grn_ts_record *recs = NULL; |
705 | size_t n_recs = 0, max_n_recs = 0, n_hits = 0; |
706 | grn_table_cursor *cursor_obj; |
707 | grn_ts_cursor *cursor = NULL; |
708 | grn_ts_expr *filter_expr = NULL; |
709 | grn_ts_expr *scorer_expr = NULL; |
710 | grn_ts_sorter *sorter = NULL; |
711 | cursor_obj = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1, |
712 | GRN_CURSOR_ASCENDING | GRN_CURSOR_BY_ID); |
713 | if (!cursor_obj) { |
714 | GRN_TS_ERR_RETURN(GRN_UNKNOWN_ERROR, "grn_table_cursor_open failed" ); |
715 | } |
716 | rc = grn_ts_obj_cursor_open(ctx, cursor_obj, &cursor); |
717 | if (rc != GRN_SUCCESS) { |
718 | grn_obj_close(ctx, cursor_obj); |
719 | return rc; |
720 | } |
721 | if (filter.size) { |
722 | rc = grn_ts_expr_parse(ctx, table, filter, &filter_expr); |
723 | } |
724 | if (rc == GRN_SUCCESS) { |
725 | scorer = grn_ts_str_trim_score_assignment(scorer); |
726 | if (scorer.size) { |
727 | rc = grn_ts_expr_parse(ctx, table, scorer, &scorer_expr); |
728 | } |
729 | if (rc == GRN_SUCCESS) { |
730 | rc = grn_ts_sorter_parse(ctx, table, sortby, offset, limit, &sorter); |
731 | } |
732 | } |
733 | if (rc == GRN_SUCCESS) { |
734 | size_t n_pending_recs = 0; |
735 | for ( ; ; ) { |
736 | size_t batch_size; |
737 | grn_ts_record *batch; |
738 | /* Extend a buffer for records. */ |
739 | if (max_n_recs < (n_recs + GRN_TS_BATCH_SIZE)) { |
740 | size_t n_bytes, new_max_n_recs = max_n_recs * 2; |
741 | grn_ts_record *new_recs; |
742 | if (!new_max_n_recs) { |
743 | new_max_n_recs = GRN_TS_BATCH_SIZE; |
744 | } |
745 | n_bytes = sizeof(grn_ts_record) * new_max_n_recs; |
746 | new_recs = (grn_ts_record *)GRN_REALLOC(recs, n_bytes); |
747 | if (!new_recs) { |
748 | GRN_TS_ERR(GRN_NO_MEMORY_AVAILABLE, |
749 | "GRN_REALLOC failed: %" GRN_FMT_SIZE, |
750 | n_bytes); |
751 | rc = ctx->rc; |
752 | break; |
753 | } |
754 | recs = new_recs; |
755 | max_n_recs = new_max_n_recs; |
756 | } |
757 | /* Read records from a cursor. */ |
758 | batch = recs + n_recs; |
759 | rc = grn_ts_cursor_read(ctx, cursor, batch, GRN_TS_BATCH_SIZE, |
760 | &batch_size); |
761 | if (rc != GRN_SUCCESS) { |
762 | break; |
763 | } else if (!batch_size) { |
764 | /* Apply a scorer and complete sorting. */ |
765 | if (scorer_expr) { |
766 | rc = grn_ts_expr_adjust(ctx, scorer_expr, |
767 | recs + n_recs - n_pending_recs, |
768 | n_pending_recs); |
769 | if (rc != GRN_SUCCESS) { |
770 | break; |
771 | } |
772 | } |
773 | if (n_pending_recs) { |
774 | rc = grn_ts_sorter_progress(ctx, sorter, recs, n_recs, &n_recs); |
775 | if (rc != GRN_SUCCESS) { |
776 | break; |
777 | } |
778 | } |
779 | rc = grn_ts_sorter_complete(ctx, sorter, recs, n_recs, &n_recs); |
780 | break; |
781 | } |
782 | /* Apply a filter. */ |
783 | if (filter_expr) { |
784 | rc = grn_ts_expr_filter(ctx, filter_expr, batch, batch_size, |
785 | batch, &batch_size); |
786 | if (rc != GRN_SUCCESS) { |
787 | break; |
788 | } |
789 | } |
790 | n_hits += batch_size; |
791 | n_recs += batch_size; |
792 | n_pending_recs += batch_size; |
793 | /* |
794 | * Apply a scorer and progress sorting if there are enough pending |
795 | * records. |
796 | */ |
797 | if (n_pending_recs >= GRN_TS_BATCH_SIZE) { |
798 | if (scorer_expr) { |
799 | rc = grn_ts_expr_adjust(ctx, scorer_expr, |
800 | recs + n_recs - n_pending_recs, |
801 | n_pending_recs); |
802 | if (rc != GRN_SUCCESS) { |
803 | break; |
804 | } |
805 | } |
806 | rc = grn_ts_sorter_progress(ctx, sorter, recs, n_recs, &n_recs); |
807 | if (rc != GRN_SUCCESS) { |
808 | break; |
809 | } |
810 | n_pending_recs = 0; |
811 | } |
812 | } |
813 | } |
814 | if (rc == GRN_SUCCESS) { |
815 | rc = grn_ts_select_output(ctx, table, output_columns, |
816 | recs, n_recs, n_hits); |
817 | } |
818 | if (cursor) { |
819 | grn_ts_cursor_close(ctx, cursor); |
820 | } |
821 | if (recs) { |
822 | GRN_FREE(recs); |
823 | } |
824 | if (sorter) { |
825 | grn_ts_sorter_close(ctx, sorter); |
826 | } |
827 | if (scorer_expr) { |
828 | grn_ts_expr_close(ctx, scorer_expr); |
829 | } |
830 | if (filter_expr) { |
831 | grn_ts_expr_close(ctx, filter_expr); |
832 | } |
833 | return rc; |
834 | } |
835 | |
836 | /* |
837 | * grn_ts_select_without_sortby() executes a select command without --sortby. |
838 | */ |
839 | static grn_rc |
840 | grn_ts_select_without_sortby(grn_ctx *ctx, grn_obj *table, |
841 | grn_ts_str filter, grn_ts_str scorer, |
842 | grn_ts_str output_columns, |
843 | size_t offset, size_t limit) |
844 | { |
845 | grn_rc rc; |
846 | grn_ts_record *records = NULL; |
847 | size_t n_records, n_hits; |
848 | rc = grn_ts_select_filter(ctx, table, filter, offset, limit, |
849 | &records, &n_records, &n_hits); |
850 | if (rc == GRN_SUCCESS) { |
851 | rc = grn_ts_select_scorer(ctx, table, scorer, records, n_records); |
852 | if (rc == GRN_SUCCESS) { |
853 | rc = grn_ts_select_output(ctx, table, output_columns, |
854 | records, n_records, n_hits); |
855 | } |
856 | } |
857 | if (records) { |
858 | GRN_FREE(records); |
859 | } |
860 | return rc; |
861 | } |
862 | |
863 | /*------------------------------------------------------------- |
864 | * API. |
865 | */ |
866 | |
867 | grn_rc |
868 | grn_ts_select(grn_ctx *ctx, grn_obj *table, |
869 | const char *filter_ptr, size_t filter_len, |
870 | const char *scorer_ptr, size_t scorer_len, |
871 | const char *sortby_ptr, size_t sortby_len, |
872 | const char *output_columns_ptr, size_t output_columns_len, |
873 | size_t offset, size_t limit) |
874 | { |
875 | grn_rc rc; |
876 | grn_ts_str filter = { filter_ptr, filter_len }; |
877 | grn_ts_str scorer = { scorer_ptr, scorer_len }; |
878 | grn_ts_str sortby = { sortby_ptr, sortby_len }; |
879 | grn_ts_str output_columns = { output_columns_ptr, output_columns_len }; |
880 | if (!ctx) { |
881 | return GRN_INVALID_ARGUMENT; |
882 | } |
883 | if (!table || !grn_ts_obj_is_table(ctx, table) || |
884 | (!filter_ptr && filter_len) || (!scorer_ptr && scorer_len) || |
885 | (!sortby_ptr && sortby_len) || |
886 | (!output_columns_ptr && output_columns_len)) { |
887 | GRN_TS_ERR_RETURN(GRN_INVALID_ARGUMENT, "invalid argument" ); |
888 | } |
889 | filter = grn_ts_str_trim_left(filter); |
890 | if (sortby_len) { |
891 | rc = grn_ts_select_with_sortby(ctx, table, filter, scorer, sortby, |
892 | output_columns, offset, limit); |
893 | } else { |
894 | rc = grn_ts_select_without_sortby(ctx, table, filter, scorer, |
895 | output_columns, offset, limit); |
896 | } |
897 | if (rc != GRN_SUCCESS) { |
898 | GRN_BULK_REWIND(ctx->impl->output.buf); |
899 | if ((ctx->rc == GRN_SUCCESS) || !ctx->errbuf[0]) { |
900 | ERR(rc, "error message is missing" ); |
901 | } else if (ctx->errlvl < GRN_LOG_ERROR) { |
902 | ctx->errlvl = GRN_LOG_ERROR; |
903 | } |
904 | } |
905 | return rc; |
906 | } |
907 | |