1/* -*- c-basic-offset: 2 -*- */
2/*
3 Copyright(C) 2009-2017 Brazil
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License version 2.1 as published by the Free Software Foundation.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19#include "../grn_proc.h"
20#include "../grn_ctx_impl.h"
21#include "../grn_db.h"
22#include "../grn_str.h"
23
24#include <groonga/plugin.h>
25
26static const size_t DUMP_FLUSH_THRESHOLD_SIZE = 256 * 1024;
27
28typedef struct {
29 grn_obj *output;
30 grn_bool is_close_opened_object_mode;
31 grn_bool have_reference_column;
32 grn_bool have_index_column;
33 grn_bool is_sort_hash_table;
34 grn_obj column_name_buffer;
35} grn_dumper;
36
37static void
38dumper_collect_statistics_table(grn_ctx *ctx,
39 grn_dumper *dumper,
40 grn_obj *table)
41{
42 grn_hash *columns;
43
44 columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0,
45 GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY);
46 if (!columns) {
47 return;
48 }
49
50 grn_table_columns(ctx, table, NULL, 0, (grn_obj *)columns);
51 GRN_HASH_EACH_BEGIN(ctx, columns, cursor, id) {
52 void *key;
53 grn_id column_id;
54 grn_obj *column;
55
56 grn_hash_cursor_get_key(ctx, cursor, &key);
57 column_id = *((grn_id *)key);
58
59 if (dumper->is_close_opened_object_mode) {
60 grn_ctx_push_temporary_open_space(ctx);
61 }
62
63 column = grn_ctx_at(ctx, column_id);
64 if (!column) {
65 GRN_PLUGIN_CLEAR_ERROR(ctx);
66 goto next_loop;
67 }
68
69 if (grn_obj_is_index_column(ctx, column)) {
70 dumper->have_index_column = GRN_TRUE;
71 } else if (grn_obj_is_reference_column(ctx, column)) {
72 dumper->have_reference_column = GRN_TRUE;
73 }
74
75 next_loop :
76 if (dumper->is_close_opened_object_mode) {
77 grn_ctx_pop_temporary_open_space(ctx);
78 }
79 } GRN_HASH_EACH_END(ctx, cursor);
80 grn_hash_close(ctx, columns);
81}
82
83static void
84dumper_collect_statistics(grn_ctx *ctx, grn_dumper *dumper)
85{
86 GRN_DB_EACH_BEGIN_BY_ID(ctx, cursor, id) {
87 void *name;
88 int name_size;
89 grn_obj *object;
90
91 if (grn_id_is_builtin(ctx, id)) {
92 continue;
93 }
94
95 name_size = grn_table_cursor_get_key(ctx, cursor, &name);
96 if (grn_obj_name_is_column(ctx, name, name_size)) {
97 continue;
98 }
99
100 if (dumper->is_close_opened_object_mode) {
101 grn_ctx_push_temporary_open_space(ctx);
102 }
103
104 object = grn_ctx_at(ctx, id);
105 if (!object) {
106 /* XXX: this clause is executed when MeCab tokenizer is enabled in
107 database but the groonga isn't supported MeCab.
108 We should return error mesage about it and error exit status
109 but it's too difficult for this architecture. :< */
110 GRN_PLUGIN_CLEAR_ERROR(ctx);
111 goto next_loop;
112 }
113
114 if (!grn_obj_is_table(ctx, object)) {
115 goto next_loop;
116 }
117
118 dumper_collect_statistics_table(ctx, dumper, object);
119
120next_loop :
121 if (dumper->is_close_opened_object_mode) {
122 grn_ctx_pop_temporary_open_space(ctx);
123 }
124 } GRN_DB_EACH_END(ctx, cursor);
125}
126
127static void
128dump_value_raw(grn_ctx *ctx, grn_obj *output, const char *value, int value_len)
129{
130 grn_obj escaped_value;
131 GRN_TEXT_INIT(&escaped_value, 0);
132 grn_text_esc(ctx, &escaped_value, value, value_len);
133 /* is no character escaped? */
134 /* TODO false positive with spaces inside values */
135 if (GRN_TEXT_LEN(&escaped_value) == value_len + 2) {
136 GRN_TEXT_PUT(ctx, output, value, value_len);
137 } else {
138 GRN_TEXT_PUT(ctx, output,
139 GRN_TEXT_VALUE(&escaped_value), GRN_TEXT_LEN(&escaped_value));
140 }
141 grn_obj_close(ctx, &escaped_value);
142}
143
144static void
145dump_value(grn_ctx *ctx, grn_dumper *dumper, const char *value, int value_len)
146{
147 dump_value_raw(ctx, dumper->output, value, value_len);
148}
149
150static void
151dump_configs(grn_ctx *ctx, grn_dumper *dumper)
152{
153 grn_obj *config_cursor;
154
155 config_cursor = grn_config_cursor_open(ctx);
156 if (!config_cursor)
157 return;
158
159 while (grn_config_cursor_next(ctx, config_cursor)) {
160 const char *key;
161 uint32_t key_size;
162 const char *value;
163 uint32_t value_size;
164
165 key_size = grn_config_cursor_get_key(ctx, config_cursor, &key);
166 value_size = grn_config_cursor_get_value(ctx, config_cursor, &value);
167
168 GRN_TEXT_PUTS(ctx, dumper->output, "config_set ");
169 dump_value(ctx, dumper, key, key_size);
170 GRN_TEXT_PUTS(ctx, dumper->output, " ");
171 dump_value(ctx, dumper, value, value_size);
172 GRN_TEXT_PUTC(ctx, dumper->output, '\n');
173 }
174 grn_obj_close(ctx, config_cursor);
175}
176
177static void
178dump_plugins(grn_ctx *ctx, grn_dumper *dumper)
179{
180 grn_obj plugin_names;
181 unsigned int i, n;
182
183 GRN_TEXT_INIT(&plugin_names, GRN_OBJ_VECTOR);
184
185 grn_plugin_get_names(ctx, &plugin_names);
186
187 n = grn_vector_size(ctx, &plugin_names);
188 if (n == 0) {
189 GRN_OBJ_FIN(ctx, &plugin_names);
190 return;
191 }
192
193 if (GRN_TEXT_LEN(dumper->output) > 0) {
194 GRN_TEXT_PUTC(ctx, dumper->output, '\n');
195 grn_ctx_output_flush(ctx, 0);
196 }
197 for (i = 0; i < n; i++) {
198 const char *name;
199 unsigned int name_size;
200
201 name_size = grn_vector_get_element(ctx, &plugin_names, i, &name, NULL, NULL);
202 grn_text_printf(ctx, dumper->output, "plugin_register %.*s\n",
203 (int)name_size, name);
204 }
205
206 GRN_OBJ_FIN(ctx, &plugin_names);
207}
208
209static void
210dump_obj_name_raw(grn_ctx *ctx, grn_obj *output, grn_obj *obj)
211{
212 char name[GRN_TABLE_MAX_KEY_SIZE];
213 int name_len;
214 name_len = grn_obj_name(ctx, obj, name, GRN_TABLE_MAX_KEY_SIZE);
215 dump_value_raw(ctx, output, name, name_len);
216}
217
218static void
219dump_obj_name(grn_ctx *ctx, grn_dumper *dumper, grn_obj *obj)
220{
221 dump_obj_name_raw(ctx, dumper->output, obj);
222}
223
224static void
225dump_column_name(grn_ctx *ctx, grn_dumper *dumper, grn_obj *column)
226{
227 char name[GRN_TABLE_MAX_KEY_SIZE];
228 int name_len;
229 name_len = grn_column_name(ctx, column, name, GRN_TABLE_MAX_KEY_SIZE);
230 dump_value(ctx, dumper, name, name_len);
231}
232
233static void
234dump_index_column_sources(grn_ctx *ctx, grn_dumper *dumper, grn_obj *column)
235{
236 grn_obj sources;
237 grn_id *source_ids;
238 int i, n;
239
240 GRN_OBJ_INIT(&sources, GRN_BULK, 0, GRN_ID_NIL);
241 grn_obj_get_info(ctx, column, GRN_INFO_SOURCE, &sources);
242
243 n = GRN_BULK_VSIZE(&sources) / sizeof(grn_id);
244 source_ids = (grn_id *)GRN_BULK_HEAD(&sources);
245 if (n > 0) {
246 GRN_TEXT_PUTC(ctx, dumper->output, ' ');
247 }
248 for (i = 0; i < n; i++) {
249 grn_id source_id;
250 grn_obj *source;
251
252 source_id = *source_ids;
253 source_ids++;
254
255 if (dumper->is_close_opened_object_mode) {
256 grn_ctx_push_temporary_open_space(ctx);
257 }
258
259 source = grn_ctx_at(ctx, source_id);
260 if (!source) {
261 goto next_loop;
262 }
263
264 if (i) { GRN_TEXT_PUTC(ctx, dumper->output, ','); }
265 switch (source->header.type) {
266 case GRN_TABLE_PAT_KEY:
267 case GRN_TABLE_DAT_KEY:
268 case GRN_TABLE_HASH_KEY:
269 GRN_TEXT_PUT(ctx,
270 dumper->output,
271 GRN_COLUMN_NAME_KEY,
272 GRN_COLUMN_NAME_KEY_LEN);
273 break;
274 default:
275 dump_column_name(ctx, dumper, source);
276 break;
277 }
278
279 next_loop :
280 if (dumper->is_close_opened_object_mode) {
281 grn_ctx_pop_temporary_open_space(ctx);
282 }
283 }
284 grn_obj_close(ctx, &sources);
285}
286
287static void
288dump_column(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table, grn_obj *column)
289{
290 grn_id type_id;
291 grn_obj *type;
292 grn_column_flags flags;
293 grn_column_flags default_flags = GRN_OBJ_PERSISTENT;
294
295 type_id = grn_obj_get_range(ctx, column);
296 if (dumper->is_close_opened_object_mode) {
297 grn_ctx_push_temporary_open_space(ctx);
298 }
299 type = grn_ctx_at(ctx, type_id);
300 if (!type) {
301 /* ERR(GRN_RANGE_ERROR, "couldn't get column's type object"); */
302 goto exit;
303 }
304
305 GRN_TEXT_PUTS(ctx, dumper->output, "column_create ");
306 dump_obj_name(ctx, dumper, table);
307 GRN_TEXT_PUTC(ctx, dumper->output, ' ');
308 dump_column_name(ctx, dumper, column);
309 GRN_TEXT_PUTC(ctx, dumper->output, ' ');
310 if (type->header.type == GRN_TYPE) {
311 default_flags |= type->header.flags;
312 }
313 flags = grn_column_get_flags(ctx, column);
314 grn_dump_column_create_flags(ctx,
315 flags & ~default_flags,
316 dumper->output);
317 GRN_TEXT_PUTC(ctx, dumper->output, ' ');
318 dump_obj_name(ctx, dumper, type);
319 if (column->header.flags & GRN_OBJ_COLUMN_INDEX) {
320 dump_index_column_sources(ctx, dumper, column);
321 }
322 GRN_TEXT_PUTC(ctx, dumper->output, '\n');
323
324exit :
325 if (dumper->is_close_opened_object_mode) {
326 grn_ctx_pop_temporary_open_space(ctx);
327 }
328}
329
330static void
331dump_columns(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table,
332 grn_bool dump_data_column,
333 grn_bool dump_reference_column,
334 grn_bool dump_index_column)
335{
336 grn_hash *columns;
337 columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0,
338 GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY);
339 if (!columns) {
340 GRN_PLUGIN_ERROR(ctx,
341 GRN_NO_MEMORY_AVAILABLE,
342 "couldn't create a hash to hold columns");
343 return;
344 }
345
346 if (grn_table_columns(ctx, table, NULL, 0, (grn_obj *)columns) >= 0) {
347 GRN_HASH_EACH_BEGIN(ctx, columns, cursor, id) {
348 void *key;
349 grn_id column_id;
350 grn_obj *column;
351
352 grn_hash_cursor_get_key(ctx, cursor, &key);
353 column_id = *((grn_id *)key);
354
355 if (dumper->is_close_opened_object_mode) {
356 grn_ctx_push_temporary_open_space(ctx);
357 }
358
359 column = grn_ctx_at(ctx, column_id);
360 if (!column) {
361 GRN_PLUGIN_CLEAR_ERROR(ctx);
362 goto next_loop;
363 }
364
365 if (grn_obj_is_index_column(ctx, column)) {
366 if (dump_index_column) {
367 dump_column(ctx, dumper, table, column);
368 GRN_PLUGIN_CLEAR_ERROR(ctx);
369 }
370 } else if (grn_obj_is_reference_column(ctx, column)) {
371 if (dump_reference_column) {
372 dump_column(ctx, dumper, table, column);
373 GRN_PLUGIN_CLEAR_ERROR(ctx);
374 }
375 } else {
376 if (dump_data_column) {
377 dump_column(ctx, dumper, table, column);
378 GRN_PLUGIN_CLEAR_ERROR(ctx);
379 }
380 }
381
382 next_loop :
383 if (dumper->is_close_opened_object_mode) {
384 grn_ctx_pop_temporary_open_space(ctx);
385 }
386 } GRN_HASH_EACH_END(ctx, cursor);
387 }
388 grn_hash_close(ctx, columns);
389}
390
391static void
392dump_record_column_vector(grn_ctx *ctx, grn_dumper *dumper, grn_id id,
393 grn_obj *column, grn_id range_id, grn_obj *buf)
394{
395 grn_obj *range;
396 grn_obj_format *format_argument = NULL;
397 grn_obj_format format;
398
399 range = grn_ctx_at(ctx, range_id);
400 if (column->header.flags & GRN_OBJ_WITH_WEIGHT) {
401 format.flags = GRN_OBJ_FORMAT_WITH_WEIGHT;
402 format_argument = &format;
403 }
404
405 if (grn_obj_is_table(ctx, range) ||
406 (range->header.flags & GRN_OBJ_KEY_VAR_SIZE) == 0) {
407 GRN_OBJ_INIT(buf, GRN_UVECTOR, 0, range_id);
408 grn_obj_get_value(ctx, column, id, buf);
409 grn_text_otoj(ctx, dumper->output, buf, format_argument);
410 } else {
411 GRN_OBJ_INIT(buf, GRN_VECTOR, 0, range_id);
412 grn_obj_get_value(ctx, column, id, buf);
413 grn_text_otoj(ctx, dumper->output, buf, format_argument);
414 }
415
416 grn_obj_unlink(ctx, range);
417 grn_obj_unlink(ctx, buf);
418}
419
420static void
421dump_record(grn_ctx *ctx, grn_dumper *dumper,
422 grn_obj *table,
423 grn_id id,
424 grn_obj *columns, int n_columns)
425{
426 int j;
427 grn_obj buf;
428 grn_obj *column_name = &(dumper->column_name_buffer);
429
430 GRN_TEXT_PUTC(ctx, dumper->output, '[');
431 for (j = 0; j < n_columns; j++) {
432 grn_bool is_value_column;
433 grn_id range;
434 grn_obj *column;
435 column = GRN_PTR_VALUE_AT(columns, j);
436 /* TODO: use grn_obj_is_value_accessor() */
437 GRN_BULK_REWIND(column_name);
438 grn_column_name_(ctx, column, column_name);
439 if (GRN_TEXT_LEN(column_name) == GRN_COLUMN_NAME_VALUE_LEN &&
440 !memcmp(GRN_TEXT_VALUE(column_name),
441 GRN_COLUMN_NAME_VALUE,
442 GRN_COLUMN_NAME_VALUE_LEN)) {
443 is_value_column = GRN_TRUE;
444 } else {
445 is_value_column = GRN_FALSE;
446 }
447 range = grn_obj_get_range(ctx, column);
448
449 if (j) { GRN_TEXT_PUTC(ctx, dumper->output, ','); }
450 switch (column->header.type) {
451 case GRN_COLUMN_VAR_SIZE:
452 case GRN_COLUMN_FIX_SIZE:
453 switch (column->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) {
454 case GRN_OBJ_COLUMN_VECTOR:
455 dump_record_column_vector(ctx, dumper, id, column, range, &buf);
456 break;
457 case GRN_OBJ_COLUMN_SCALAR:
458 {
459 GRN_OBJ_INIT(&buf, GRN_BULK, 0, range);
460 grn_obj_get_value(ctx, column, id, &buf);
461 grn_text_otoj(ctx, dumper->output, &buf, NULL);
462 grn_obj_unlink(ctx, &buf);
463 }
464 break;
465 default:
466 GRN_PLUGIN_ERROR(ctx,
467 GRN_OPERATION_NOT_SUPPORTED,
468 "unsupported column type: %#x",
469 column->header.type);
470 break;
471 }
472 break;
473 case GRN_COLUMN_INDEX:
474 break;
475 case GRN_ACCESSOR:
476 {
477 GRN_OBJ_INIT(&buf, GRN_BULK, 0, range);
478 grn_obj_get_value(ctx, column, id, &buf);
479 /* XXX maybe, grn_obj_get_range() should not unconditionally return
480 GRN_DB_INT32 when column is GRN_ACCESSOR and
481 GRN_ACCESSOR_GET_VALUE */
482 if (is_value_column) {
483 buf.header.domain = grn_obj_get_range(ctx, table);
484 }
485 grn_text_otoj(ctx, dumper->output, &buf, NULL);
486 grn_obj_unlink(ctx, &buf);
487 }
488 break;
489 default:
490 GRN_PLUGIN_ERROR(ctx,
491 GRN_OPERATION_NOT_SUPPORTED,
492 "unsupported header type %#x",
493 column->header.type);
494 break;
495 }
496 }
497 GRN_TEXT_PUTC(ctx, dumper->output, ']');
498 if (GRN_TEXT_LEN(dumper->output) >= DUMP_FLUSH_THRESHOLD_SIZE) {
499 grn_ctx_output_flush(ctx, 0);
500 }
501}
502
503static void
504dump_records(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table)
505{
506 grn_table_cursor *cursor;
507 int i, n_columns;
508 grn_obj columns;
509 grn_bool have_index_column = GRN_FALSE;
510 grn_bool have_data_column = GRN_FALSE;
511
512 if (grn_table_size(ctx, table) == 0) {
513 return;
514 }
515
516 if (dumper->is_close_opened_object_mode) {
517 grn_ctx_push_temporary_open_space(ctx);
518 }
519
520 GRN_PTR_INIT(&columns, GRN_OBJ_VECTOR, GRN_ID_NIL);
521
522 if (table->header.type == GRN_TABLE_NO_KEY) {
523 grn_obj *id_accessor;
524 id_accessor = grn_obj_column(ctx,
525 table,
526 GRN_COLUMN_NAME_ID,
527 GRN_COLUMN_NAME_ID_LEN);
528 GRN_PTR_PUT(ctx, &columns, id_accessor);
529 } else if (table->header.domain != GRN_ID_NIL) {
530 grn_obj *key_accessor;
531 key_accessor = grn_obj_column(ctx,
532 table,
533 GRN_COLUMN_NAME_KEY,
534 GRN_COLUMN_NAME_KEY_LEN);
535 GRN_PTR_PUT(ctx, &columns, key_accessor);
536 }
537
538 if (grn_obj_get_range(ctx, table) != GRN_ID_NIL) {
539 grn_obj *value_accessor;
540 value_accessor = grn_obj_column(ctx,
541 table,
542 GRN_COLUMN_NAME_VALUE,
543 GRN_COLUMN_NAME_VALUE_LEN);
544 GRN_PTR_PUT(ctx, &columns, value_accessor);
545 }
546
547 {
548 grn_hash *real_columns;
549
550 real_columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0,
551 GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY);
552 grn_table_columns(ctx, table, NULL, 0, (grn_obj *)real_columns);
553 GRN_HASH_EACH_BEGIN(ctx, real_columns, cursor, id) {
554 void *key;
555 grn_id column_id;
556 grn_obj *column;
557
558 if (dumper->is_close_opened_object_mode) {
559 grn_ctx_push_temporary_open_space(ctx);
560 }
561
562 grn_hash_cursor_get_key(ctx, cursor, &key);
563 column_id = *((grn_id *)key);
564
565 column = grn_ctx_at(ctx, column_id);
566 if (column) {
567 if (grn_obj_is_index_column(ctx, column)) {
568 have_index_column = GRN_TRUE;
569 if (dumper->is_close_opened_object_mode) {
570 grn_ctx_pop_temporary_open_space(ctx);
571 }
572 } else {
573 have_data_column = GRN_TRUE;
574 GRN_PTR_PUT(ctx, &columns, column);
575 if (dumper->is_close_opened_object_mode) {
576 grn_ctx_merge_temporary_open_space(ctx);
577 }
578 }
579 } else {
580 GRN_PLUGIN_CLEAR_ERROR(ctx);
581 if (dumper->is_close_opened_object_mode) {
582 grn_ctx_pop_temporary_open_space(ctx);
583 }
584 }
585 } GRN_HASH_EACH_END(ctx, cursor);
586 grn_hash_close(ctx, real_columns);
587 }
588
589 n_columns = GRN_BULK_VSIZE(&columns) / sizeof(grn_obj *);
590
591 if (have_index_column && !have_data_column) {
592 goto exit;
593 }
594
595 if (GRN_TEXT_LEN(dumper->output) > 0) {
596 GRN_TEXT_PUTC(ctx, dumper->output, '\n');
597 }
598
599 GRN_TEXT_PUTS(ctx, dumper->output, "load --table ");
600 dump_obj_name(ctx, dumper, table);
601 GRN_TEXT_PUTS(ctx, dumper->output, "\n[\n");
602
603 GRN_TEXT_PUTC(ctx, dumper->output, '[');
604 for (i = 0; i < n_columns; i++) {
605 grn_obj *column;
606 grn_obj *column_name = &(dumper->column_name_buffer);
607
608 column = GRN_PTR_VALUE_AT(&columns, i);
609 if (i) { GRN_TEXT_PUTC(ctx, dumper->output, ','); }
610 GRN_BULK_REWIND(column_name);
611 grn_column_name_(ctx, column, column_name);
612 grn_text_otoj(ctx, dumper->output, column_name, NULL);
613 }
614 GRN_TEXT_PUTS(ctx, dumper->output, "],\n");
615
616 if (table->header.type == GRN_TABLE_HASH_KEY && dumper->is_sort_hash_table) {
617 grn_obj *sorted;
618 grn_table_sort_key sort_keys[1];
619 uint32_t n_sort_keys = 1;
620 grn_bool is_first_record = GRN_TRUE;
621
622 sort_keys[0].key = grn_obj_column(ctx, table,
623 GRN_COLUMN_NAME_KEY,
624 GRN_COLUMN_NAME_KEY_LEN);
625 sort_keys[0].flags = GRN_TABLE_SORT_ASC;
626 sort_keys[0].offset = 0;
627 sorted = grn_table_create(ctx,
628 NULL, 0, NULL,
629 GRN_TABLE_NO_KEY,
630 NULL,
631 table);
632 grn_table_sort(ctx,
633 table, 0, -1,
634 sorted,
635 sort_keys, n_sort_keys);
636 cursor = grn_table_cursor_open(ctx,
637 sorted,
638 NULL, 0, NULL, 0,
639 0, -1,
640 0);
641 while (grn_table_cursor_next(ctx, cursor) != GRN_ID_NIL) {
642 void *value_raw;
643 grn_id id;
644
645 grn_table_cursor_get_value(ctx, cursor, &value_raw);
646 id = *((grn_id *)value_raw);
647
648 if (is_first_record) {
649 is_first_record = GRN_FALSE;
650 } else {
651 GRN_TEXT_PUTS(ctx, dumper->output, ",\n");
652 }
653 dump_record(ctx, dumper, table, id, &columns, n_columns);
654 }
655 GRN_TEXT_PUTS(ctx, dumper->output, "\n]\n");
656 grn_obj_close(ctx, sorted);
657 grn_obj_unlink(ctx, sort_keys[0].key);
658 } else {
659 grn_obj delete_commands;
660 grn_id old_id = GRN_ID_NIL;
661 grn_id id;
662
663 GRN_TEXT_INIT(&delete_commands, 0);
664 cursor = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1,
665 GRN_CURSOR_BY_KEY);
666 while ((id = grn_table_cursor_next(ctx, cursor)) != GRN_ID_NIL) {
667 if (old_id != GRN_ID_NIL) { GRN_TEXT_PUTS(ctx, dumper->output, ",\n"); }
668 if (table->header.type == GRN_TABLE_NO_KEY && old_id + 1 < id) {
669 grn_id current_id;
670 for (current_id = old_id + 1; current_id < id; current_id++) {
671 GRN_TEXT_PUTS(ctx, dumper->output, "[],\n");
672 GRN_TEXT_PUTS(ctx, &delete_commands, "delete --table ");
673 dump_obj_name_raw(ctx, &delete_commands, table);
674 GRN_TEXT_PUTS(ctx, &delete_commands, " --id ");
675 grn_text_lltoa(ctx, &delete_commands, current_id);
676 GRN_TEXT_PUTC(ctx, &delete_commands, '\n');
677 }
678 }
679 dump_record(ctx, dumper, table, id, &columns, n_columns);
680
681 old_id = id;
682 }
683 grn_table_cursor_close(ctx, cursor);
684 GRN_TEXT_PUTS(ctx, dumper->output, "\n]\n");
685 GRN_TEXT_PUT(ctx, dumper->output,
686 GRN_TEXT_VALUE(&delete_commands),
687 GRN_TEXT_LEN(&delete_commands));
688 GRN_OBJ_FIN(ctx, &delete_commands);
689 }
690exit :
691 for (i = 0; i < n_columns; i++) {
692 grn_obj *column;
693
694 column = GRN_PTR_VALUE_AT(&columns, i);
695 if (column->header.type == GRN_ACCESSOR) {
696 grn_obj_close(ctx, column);
697 }
698 }
699 GRN_OBJ_FIN(ctx, &columns);
700
701 if (dumper->is_close_opened_object_mode) {
702 grn_ctx_pop_temporary_open_space(ctx);
703 }
704}
705
706static void
707dump_table(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table)
708{
709 grn_obj *domain = NULL;
710 grn_id range_id;
711 grn_obj *range = NULL;
712 grn_table_flags flags;
713 grn_table_flags default_flags = GRN_OBJ_PERSISTENT;
714 grn_obj *default_tokenizer;
715 grn_obj *normalizer;
716 grn_obj *token_filters;
717
718 switch (table->header.type) {
719 case GRN_TABLE_HASH_KEY:
720 case GRN_TABLE_PAT_KEY:
721 case GRN_TABLE_DAT_KEY:
722 domain = grn_ctx_at(ctx, table->header.domain);
723 break;
724 default:
725 break;
726 }
727
728 if (GRN_TEXT_LEN(dumper->output) > 0) {
729 GRN_TEXT_PUTC(ctx, dumper->output, '\n');
730 grn_ctx_output_flush(ctx, 0);
731 }
732
733 grn_table_get_info(ctx, table,
734 &flags,
735 NULL,
736 &default_tokenizer,
737 &normalizer,
738 &token_filters);
739
740 GRN_TEXT_PUTS(ctx, dumper->output, "table_create ");
741 dump_obj_name(ctx, dumper, table);
742 GRN_TEXT_PUTC(ctx, dumper->output, ' ');
743 grn_dump_table_create_flags(ctx,
744 flags & ~default_flags,
745 dumper->output);
746 if (domain) {
747 GRN_TEXT_PUTC(ctx, dumper->output, ' ');
748 dump_obj_name(ctx, dumper, domain);
749 }
750 range_id = grn_obj_get_range(ctx, table);
751 if (range_id != GRN_ID_NIL) {
752 range = grn_ctx_at(ctx, range_id);
753 if (!range) {
754 // ERR(GRN_RANGE_ERROR, "couldn't get table's value_type object");
755 return;
756 }
757 if (table->header.type != GRN_TABLE_NO_KEY) {
758 GRN_TEXT_PUTC(ctx, dumper->output, ' ');
759 } else {
760 GRN_TEXT_PUTS(ctx, dumper->output, " --value_type ");
761 }
762 dump_obj_name(ctx, dumper, range);
763 grn_obj_unlink(ctx, range);
764 }
765 if (default_tokenizer) {
766 GRN_TEXT_PUTS(ctx, dumper->output, " --default_tokenizer ");
767 dump_obj_name(ctx, dumper, default_tokenizer);
768 }
769 if (normalizer) {
770 GRN_TEXT_PUTS(ctx, dumper->output, " --normalizer ");
771 dump_obj_name(ctx, dumper, normalizer);
772 }
773 if (table->header.type != GRN_TABLE_NO_KEY) {
774 int n_token_filters;
775
776 n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *);
777 if (n_token_filters > 0) {
778 int i;
779 GRN_TEXT_PUTS(ctx, dumper->output, " --token_filters ");
780 for (i = 0; i < n_token_filters; i++) {
781 grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i);
782 if (i > 0) {
783 GRN_TEXT_PUTC(ctx, dumper->output, ',');
784 }
785 dump_obj_name(ctx, dumper, token_filter);
786 }
787 }
788 }
789
790 GRN_TEXT_PUTC(ctx, dumper->output, '\n');
791
792 dump_columns(ctx, dumper, table, GRN_TRUE, GRN_FALSE, GRN_FALSE);
793}
794
795static void
796dump_schema(grn_ctx *ctx, grn_dumper *dumper)
797{
798 GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) {
799 void *name;
800 int name_size;
801 grn_obj *object;
802
803 if (grn_id_is_builtin(ctx, id)) {
804 continue;
805 }
806
807 name_size = grn_table_cursor_get_key(ctx, cursor, &name);
808 if (grn_obj_name_is_column(ctx, name, name_size)) {
809 continue;
810 }
811
812 if (dumper->is_close_opened_object_mode) {
813 grn_ctx_push_temporary_open_space(ctx);
814 }
815
816 if ((object = grn_ctx_at(ctx, id))) {
817 switch (object->header.type) {
818 case GRN_TABLE_HASH_KEY:
819 case GRN_TABLE_PAT_KEY:
820 case GRN_TABLE_DAT_KEY:
821 case GRN_TABLE_NO_KEY:
822 dump_table(ctx, dumper, object);
823 break;
824 default:
825 break;
826 }
827 } else {
828 /* XXX: this clause is executed when MeCab tokenizer is enabled in
829 database but the groonga isn't supported MeCab.
830 We should return error mesage about it and error exit status
831 but it's too difficult for this architecture. :< */
832 GRN_PLUGIN_CLEAR_ERROR(ctx);
833 }
834
835 if (dumper->is_close_opened_object_mode) {
836 grn_ctx_pop_temporary_open_space(ctx);
837 }
838 } GRN_DB_EACH_END(ctx, cursor);
839
840 if (!dumper->have_reference_column) {
841 return;
842 }
843
844 GRN_TEXT_PUTC(ctx, dumper->output, '\n');
845 grn_ctx_output_flush(ctx, 0);
846
847 GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) {
848 void *name;
849 int name_size;
850 grn_obj *object;
851
852 if (grn_id_is_builtin(ctx, id)) {
853 continue;
854 }
855
856 name_size = grn_table_cursor_get_key(ctx, cursor, &name);
857 if (grn_obj_name_is_column(ctx, name, name_size)) {
858 continue;
859 }
860
861 if (dumper->is_close_opened_object_mode) {
862 grn_ctx_push_temporary_open_space(ctx);
863 }
864
865 if ((object = grn_ctx_at(ctx, id))) {
866 switch (object->header.type) {
867 case GRN_TABLE_HASH_KEY:
868 case GRN_TABLE_PAT_KEY:
869 case GRN_TABLE_DAT_KEY:
870 case GRN_TABLE_NO_KEY:
871 dump_columns(ctx, dumper, object, GRN_FALSE, GRN_TRUE, GRN_FALSE);
872 break;
873 default:
874 break;
875 }
876 } else {
877 /* XXX: this clause is executed when MeCab tokenizer is enabled in
878 database but the groonga isn't supported MeCab.
879 We should return error mesage about it and error exit status
880 but it's too difficult for this architecture. :< */
881 GRN_PLUGIN_CLEAR_ERROR(ctx);
882 }
883
884 if (dumper->is_close_opened_object_mode) {
885 grn_ctx_pop_temporary_open_space(ctx);
886 }
887 } GRN_DB_EACH_END(ctx, cursor);
888}
889
890static void
891dump_selected_tables_records(grn_ctx *ctx, grn_dumper *dumper, grn_obj *tables)
892{
893 const char *p, *e;
894
895 p = GRN_TEXT_VALUE(tables);
896 e = p + GRN_TEXT_LEN(tables);
897 while (p < e) {
898 int len;
899 grn_obj *table;
900 const char *token, *token_e;
901
902 if ((len = grn_isspace(p, ctx->encoding))) {
903 p += len;
904 continue;
905 }
906
907 token = p;
908 if (!(('a' <= *p && *p <= 'z') ||
909 ('A' <= *p && *p <= 'Z') ||
910 (*p == '_'))) {
911 while (p < e && !grn_isspace(p, ctx->encoding)) {
912 p++;
913 }
914 GRN_LOG(ctx, GRN_LOG_WARNING, "invalid table name is ignored: <%.*s>\n",
915 (int)(p - token), token);
916 continue;
917 }
918 while (p < e &&
919 (('a' <= *p && *p <= 'z') ||
920 ('A' <= *p && *p <= 'Z') ||
921 ('0' <= *p && *p <= '9') ||
922 (*p == '_'))) {
923 p++;
924 }
925 token_e = p;
926 while (p < e && (len = grn_isspace(p, ctx->encoding))) {
927 p += len;
928 continue;
929 }
930 if (p < e && *p == ',') {
931 p++;
932 }
933
934 table = grn_ctx_get(ctx, token, token_e - token);
935 if (!table) {
936 GRN_LOG(ctx, GRN_LOG_WARNING,
937 "nonexistent table name is ignored: <%.*s>\n",
938 (int)(token_e - token), token);
939 continue;
940 }
941
942 if (grn_obj_is_table(ctx, table)) {
943 dump_records(ctx, dumper, table);
944 }
945 grn_obj_unlink(ctx, table);
946 }
947}
948
949static void
950dump_all_records(grn_ctx *ctx, grn_dumper *dumper)
951{
952 GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) {
953 void *name;
954 int name_size;
955 grn_obj *table;
956
957 if (grn_id_is_builtin(ctx, id)) {
958 continue;
959 }
960
961 name_size = grn_table_cursor_get_key(ctx, cursor, &name);
962 if (grn_obj_name_is_column(ctx, name, name_size)) {
963 continue;
964 }
965
966 if (dumper->is_close_opened_object_mode) {
967 grn_ctx_push_temporary_open_space(ctx);
968 }
969
970 table = grn_ctx_at(ctx, id);
971 if (!table) {
972 /* XXX: this clause is executed when MeCab tokenizer is enabled in
973 database but the groonga isn't supported MeCab.
974 We should return error mesage about it and error exit status
975 but it's too difficult for this architecture. :< */
976 GRN_PLUGIN_CLEAR_ERROR(ctx);
977 goto next_loop;
978 }
979
980 if (grn_obj_is_table(ctx, table)) {
981 dump_records(ctx, dumper, table);
982 }
983
984 next_loop :
985 if (dumper->is_close_opened_object_mode) {
986 grn_ctx_pop_temporary_open_space(ctx);
987 }
988 } GRN_DB_EACH_END(ctx, cursor);
989}
990
991static void
992dump_indexes(grn_ctx *ctx, grn_dumper *dumper)
993{
994 if (!dumper->have_index_column) {
995 return;
996 }
997
998 if (GRN_TEXT_LEN(dumper->output) > 0) {
999 GRN_TEXT_PUTC(ctx, dumper->output, '\n');
1000 }
1001
1002 GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) {
1003 void *name;
1004 int name_size;
1005 grn_obj *object;
1006
1007 if (grn_id_is_builtin(ctx, id)) {
1008 continue;
1009 }
1010
1011 name_size = grn_table_cursor_get_key(ctx, cursor, &name);
1012 if (grn_obj_name_is_column(ctx, name, name_size)) {
1013 continue;
1014 }
1015
1016 if (dumper->is_close_opened_object_mode) {
1017 grn_ctx_push_temporary_open_space(ctx);
1018 }
1019
1020 object = grn_ctx_at(ctx, id);
1021 if (!object) {
1022 /* XXX: this clause is executed when MeCab tokenizer is enabled in
1023 database but the groonga isn't supported MeCab.
1024 We should return error mesage about it and error exit status
1025 but it's too difficult for this architecture. :< */
1026 GRN_PLUGIN_CLEAR_ERROR(ctx);
1027 goto next_loop;
1028 }
1029
1030 if (grn_obj_is_table(ctx, object)) {
1031 dump_columns(ctx, dumper, object, GRN_FALSE, GRN_FALSE, GRN_TRUE);
1032 }
1033
1034 next_loop :
1035 if (dumper->is_close_opened_object_mode) {
1036 grn_ctx_pop_temporary_open_space(ctx);
1037 }
1038 } GRN_DB_EACH_END(ctx, cursor);
1039}
1040
1041static grn_obj *
1042command_dump(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
1043{
1044 grn_dumper dumper;
1045 grn_obj *tables;
1046 grn_bool is_dump_plugins;
1047 grn_bool is_dump_schema;
1048 grn_bool is_dump_records;
1049 grn_bool is_dump_indexes;
1050 grn_bool is_dump_configs;
1051
1052 dumper.output = ctx->impl->output.buf;
1053 if (grn_thread_get_limit() == 1) {
1054 dumper.is_close_opened_object_mode = GRN_TRUE;
1055 } else {
1056 dumper.is_close_opened_object_mode = GRN_FALSE;
1057 }
1058 dumper.have_reference_column = GRN_FALSE;
1059 dumper.have_index_column = GRN_FALSE;
1060
1061 tables = grn_plugin_proc_get_var(ctx, user_data, "tables", -1);
1062 is_dump_plugins = grn_plugin_proc_get_var_bool(ctx, user_data,
1063 "dump_plugins", -1,
1064 GRN_TRUE);
1065 is_dump_schema = grn_plugin_proc_get_var_bool(ctx, user_data,
1066 "dump_schema", -1,
1067 GRN_TRUE);
1068 is_dump_records = grn_plugin_proc_get_var_bool(ctx, user_data,
1069 "dump_records", -1,
1070 GRN_TRUE);
1071 is_dump_indexes = grn_plugin_proc_get_var_bool(ctx, user_data,
1072 "dump_indexes", -1,
1073 GRN_TRUE);
1074 is_dump_configs = grn_plugin_proc_get_var_bool(ctx, user_data,
1075 "dump_configs", -1,
1076 GRN_TRUE);
1077 dumper.is_sort_hash_table =
1078 grn_plugin_proc_get_var_bool(ctx, user_data,
1079 "sort_hash_table", -1,
1080 GRN_FALSE);
1081 GRN_TEXT_INIT(&(dumper.column_name_buffer), 0);
1082
1083 grn_ctx_set_output_type(ctx, GRN_CONTENT_GROONGA_COMMAND_LIST);
1084
1085 dumper_collect_statistics(ctx, &dumper);
1086
1087 if (is_dump_configs) {
1088 dump_configs(ctx, &dumper);
1089 }
1090 if (is_dump_plugins) {
1091 dump_plugins(ctx, &dumper);
1092 }
1093 if (is_dump_schema) {
1094 dump_schema(ctx, &dumper);
1095 }
1096 if (is_dump_records) {
1097 /* To update index columns correctly, we first create the whole schema, then
1098 load non-derivative records, while skipping records of index columns. That
1099 way, Groonga will silently do the job of updating index columns for us. */
1100 if (GRN_TEXT_LEN(tables) > 0) {
1101 dump_selected_tables_records(ctx, &dumper, tables);
1102 } else {
1103 dump_all_records(ctx, &dumper);
1104 }
1105 }
1106 if (is_dump_indexes) {
1107 dump_indexes(ctx, &dumper);
1108 }
1109 /* remove the last newline because another one will be added by the caller.
1110 maybe, the caller of proc functions currently doesn't consider the
1111 possibility of multiple-line output from proc functions. */
1112 if (GRN_BULK_VSIZE(dumper.output) > 0) {
1113 grn_bulk_truncate(ctx, dumper.output, GRN_BULK_VSIZE(dumper.output) - 1);
1114 }
1115
1116 GRN_OBJ_FIN(ctx, &(dumper.column_name_buffer));
1117
1118 return NULL;
1119}
1120
1121void
1122grn_proc_init_dump(grn_ctx *ctx)
1123{
1124 grn_expr_var vars[7];
1125
1126 grn_plugin_expr_var_init(ctx, &(vars[0]), "tables", -1);
1127 grn_plugin_expr_var_init(ctx, &(vars[1]), "dump_plugins", -1);
1128 grn_plugin_expr_var_init(ctx, &(vars[2]), "dump_schema", -1);
1129 grn_plugin_expr_var_init(ctx, &(vars[3]), "dump_records", -1);
1130 grn_plugin_expr_var_init(ctx, &(vars[4]), "dump_indexes", -1);
1131 grn_plugin_expr_var_init(ctx, &(vars[5]), "dump_configs", -1);
1132 grn_plugin_expr_var_init(ctx, &(vars[6]), "sort_hash_table", -1);
1133 grn_plugin_command_create(ctx,
1134 "dump", -1,
1135 command_dump,
1136 sizeof(vars) / sizeof(vars[0]),
1137 vars);
1138}
1139