1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2009-2017 Brazil |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License version 2.1 as published by the Free Software Foundation. |
8 | |
9 | This library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with this library; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | #include "../grn_proc.h" |
20 | #include "../grn_ctx_impl.h" |
21 | #include "../grn_db.h" |
22 | #include "../grn_str.h" |
23 | |
24 | #include <groonga/plugin.h> |
25 | |
26 | static const size_t DUMP_FLUSH_THRESHOLD_SIZE = 256 * 1024; |
27 | |
28 | typedef struct { |
29 | grn_obj *output; |
30 | grn_bool is_close_opened_object_mode; |
31 | grn_bool have_reference_column; |
32 | grn_bool have_index_column; |
33 | grn_bool is_sort_hash_table; |
34 | grn_obj column_name_buffer; |
35 | } grn_dumper; |
36 | |
37 | static void |
38 | dumper_collect_statistics_table(grn_ctx *ctx, |
39 | grn_dumper *dumper, |
40 | grn_obj *table) |
41 | { |
42 | grn_hash *columns; |
43 | |
44 | columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, |
45 | GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY); |
46 | if (!columns) { |
47 | return; |
48 | } |
49 | |
50 | grn_table_columns(ctx, table, NULL, 0, (grn_obj *)columns); |
51 | GRN_HASH_EACH_BEGIN(ctx, columns, cursor, id) { |
52 | void *key; |
53 | grn_id column_id; |
54 | grn_obj *column; |
55 | |
56 | grn_hash_cursor_get_key(ctx, cursor, &key); |
57 | column_id = *((grn_id *)key); |
58 | |
59 | if (dumper->is_close_opened_object_mode) { |
60 | grn_ctx_push_temporary_open_space(ctx); |
61 | } |
62 | |
63 | column = grn_ctx_at(ctx, column_id); |
64 | if (!column) { |
65 | GRN_PLUGIN_CLEAR_ERROR(ctx); |
66 | goto next_loop; |
67 | } |
68 | |
69 | if (grn_obj_is_index_column(ctx, column)) { |
70 | dumper->have_index_column = GRN_TRUE; |
71 | } else if (grn_obj_is_reference_column(ctx, column)) { |
72 | dumper->have_reference_column = GRN_TRUE; |
73 | } |
74 | |
75 | next_loop : |
76 | if (dumper->is_close_opened_object_mode) { |
77 | grn_ctx_pop_temporary_open_space(ctx); |
78 | } |
79 | } GRN_HASH_EACH_END(ctx, cursor); |
80 | grn_hash_close(ctx, columns); |
81 | } |
82 | |
83 | static void |
84 | dumper_collect_statistics(grn_ctx *ctx, grn_dumper *dumper) |
85 | { |
86 | GRN_DB_EACH_BEGIN_BY_ID(ctx, cursor, id) { |
87 | void *name; |
88 | int name_size; |
89 | grn_obj *object; |
90 | |
91 | if (grn_id_is_builtin(ctx, id)) { |
92 | continue; |
93 | } |
94 | |
95 | name_size = grn_table_cursor_get_key(ctx, cursor, &name); |
96 | if (grn_obj_name_is_column(ctx, name, name_size)) { |
97 | continue; |
98 | } |
99 | |
100 | if (dumper->is_close_opened_object_mode) { |
101 | grn_ctx_push_temporary_open_space(ctx); |
102 | } |
103 | |
104 | object = grn_ctx_at(ctx, id); |
105 | if (!object) { |
106 | /* XXX: this clause is executed when MeCab tokenizer is enabled in |
107 | database but the groonga isn't supported MeCab. |
108 | We should return error mesage about it and error exit status |
109 | but it's too difficult for this architecture. :< */ |
110 | GRN_PLUGIN_CLEAR_ERROR(ctx); |
111 | goto next_loop; |
112 | } |
113 | |
114 | if (!grn_obj_is_table(ctx, object)) { |
115 | goto next_loop; |
116 | } |
117 | |
118 | dumper_collect_statistics_table(ctx, dumper, object); |
119 | |
120 | next_loop : |
121 | if (dumper->is_close_opened_object_mode) { |
122 | grn_ctx_pop_temporary_open_space(ctx); |
123 | } |
124 | } GRN_DB_EACH_END(ctx, cursor); |
125 | } |
126 | |
127 | static void |
128 | dump_value_raw(grn_ctx *ctx, grn_obj *output, const char *value, int value_len) |
129 | { |
130 | grn_obj escaped_value; |
131 | GRN_TEXT_INIT(&escaped_value, 0); |
132 | grn_text_esc(ctx, &escaped_value, value, value_len); |
133 | /* is no character escaped? */ |
134 | /* TODO false positive with spaces inside values */ |
135 | if (GRN_TEXT_LEN(&escaped_value) == value_len + 2) { |
136 | GRN_TEXT_PUT(ctx, output, value, value_len); |
137 | } else { |
138 | GRN_TEXT_PUT(ctx, output, |
139 | GRN_TEXT_VALUE(&escaped_value), GRN_TEXT_LEN(&escaped_value)); |
140 | } |
141 | grn_obj_close(ctx, &escaped_value); |
142 | } |
143 | |
144 | static void |
145 | dump_value(grn_ctx *ctx, grn_dumper *dumper, const char *value, int value_len) |
146 | { |
147 | dump_value_raw(ctx, dumper->output, value, value_len); |
148 | } |
149 | |
150 | static void |
151 | dump_configs(grn_ctx *ctx, grn_dumper *dumper) |
152 | { |
153 | grn_obj *config_cursor; |
154 | |
155 | config_cursor = grn_config_cursor_open(ctx); |
156 | if (!config_cursor) |
157 | return; |
158 | |
159 | while (grn_config_cursor_next(ctx, config_cursor)) { |
160 | const char *key; |
161 | uint32_t key_size; |
162 | const char *value; |
163 | uint32_t value_size; |
164 | |
165 | key_size = grn_config_cursor_get_key(ctx, config_cursor, &key); |
166 | value_size = grn_config_cursor_get_value(ctx, config_cursor, &value); |
167 | |
168 | GRN_TEXT_PUTS(ctx, dumper->output, "config_set " ); |
169 | dump_value(ctx, dumper, key, key_size); |
170 | GRN_TEXT_PUTS(ctx, dumper->output, " " ); |
171 | dump_value(ctx, dumper, value, value_size); |
172 | GRN_TEXT_PUTC(ctx, dumper->output, '\n'); |
173 | } |
174 | grn_obj_close(ctx, config_cursor); |
175 | } |
176 | |
177 | static void |
178 | dump_plugins(grn_ctx *ctx, grn_dumper *dumper) |
179 | { |
180 | grn_obj plugin_names; |
181 | unsigned int i, n; |
182 | |
183 | GRN_TEXT_INIT(&plugin_names, GRN_OBJ_VECTOR); |
184 | |
185 | grn_plugin_get_names(ctx, &plugin_names); |
186 | |
187 | n = grn_vector_size(ctx, &plugin_names); |
188 | if (n == 0) { |
189 | GRN_OBJ_FIN(ctx, &plugin_names); |
190 | return; |
191 | } |
192 | |
193 | if (GRN_TEXT_LEN(dumper->output) > 0) { |
194 | GRN_TEXT_PUTC(ctx, dumper->output, '\n'); |
195 | grn_ctx_output_flush(ctx, 0); |
196 | } |
197 | for (i = 0; i < n; i++) { |
198 | const char *name; |
199 | unsigned int name_size; |
200 | |
201 | name_size = grn_vector_get_element(ctx, &plugin_names, i, &name, NULL, NULL); |
202 | grn_text_printf(ctx, dumper->output, "plugin_register %.*s\n" , |
203 | (int)name_size, name); |
204 | } |
205 | |
206 | GRN_OBJ_FIN(ctx, &plugin_names); |
207 | } |
208 | |
209 | static void |
210 | dump_obj_name_raw(grn_ctx *ctx, grn_obj *output, grn_obj *obj) |
211 | { |
212 | char name[GRN_TABLE_MAX_KEY_SIZE]; |
213 | int name_len; |
214 | name_len = grn_obj_name(ctx, obj, name, GRN_TABLE_MAX_KEY_SIZE); |
215 | dump_value_raw(ctx, output, name, name_len); |
216 | } |
217 | |
218 | static void |
219 | dump_obj_name(grn_ctx *ctx, grn_dumper *dumper, grn_obj *obj) |
220 | { |
221 | dump_obj_name_raw(ctx, dumper->output, obj); |
222 | } |
223 | |
224 | static void |
225 | dump_column_name(grn_ctx *ctx, grn_dumper *dumper, grn_obj *column) |
226 | { |
227 | char name[GRN_TABLE_MAX_KEY_SIZE]; |
228 | int name_len; |
229 | name_len = grn_column_name(ctx, column, name, GRN_TABLE_MAX_KEY_SIZE); |
230 | dump_value(ctx, dumper, name, name_len); |
231 | } |
232 | |
233 | static void |
234 | dump_index_column_sources(grn_ctx *ctx, grn_dumper *dumper, grn_obj *column) |
235 | { |
236 | grn_obj sources; |
237 | grn_id *source_ids; |
238 | int i, n; |
239 | |
240 | GRN_OBJ_INIT(&sources, GRN_BULK, 0, GRN_ID_NIL); |
241 | grn_obj_get_info(ctx, column, GRN_INFO_SOURCE, &sources); |
242 | |
243 | n = GRN_BULK_VSIZE(&sources) / sizeof(grn_id); |
244 | source_ids = (grn_id *)GRN_BULK_HEAD(&sources); |
245 | if (n > 0) { |
246 | GRN_TEXT_PUTC(ctx, dumper->output, ' '); |
247 | } |
248 | for (i = 0; i < n; i++) { |
249 | grn_id source_id; |
250 | grn_obj *source; |
251 | |
252 | source_id = *source_ids; |
253 | source_ids++; |
254 | |
255 | if (dumper->is_close_opened_object_mode) { |
256 | grn_ctx_push_temporary_open_space(ctx); |
257 | } |
258 | |
259 | source = grn_ctx_at(ctx, source_id); |
260 | if (!source) { |
261 | goto next_loop; |
262 | } |
263 | |
264 | if (i) { GRN_TEXT_PUTC(ctx, dumper->output, ','); } |
265 | switch (source->header.type) { |
266 | case GRN_TABLE_PAT_KEY: |
267 | case GRN_TABLE_DAT_KEY: |
268 | case GRN_TABLE_HASH_KEY: |
269 | GRN_TEXT_PUT(ctx, |
270 | dumper->output, |
271 | GRN_COLUMN_NAME_KEY, |
272 | GRN_COLUMN_NAME_KEY_LEN); |
273 | break; |
274 | default: |
275 | dump_column_name(ctx, dumper, source); |
276 | break; |
277 | } |
278 | |
279 | next_loop : |
280 | if (dumper->is_close_opened_object_mode) { |
281 | grn_ctx_pop_temporary_open_space(ctx); |
282 | } |
283 | } |
284 | grn_obj_close(ctx, &sources); |
285 | } |
286 | |
287 | static void |
288 | dump_column(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table, grn_obj *column) |
289 | { |
290 | grn_id type_id; |
291 | grn_obj *type; |
292 | grn_column_flags flags; |
293 | grn_column_flags default_flags = GRN_OBJ_PERSISTENT; |
294 | |
295 | type_id = grn_obj_get_range(ctx, column); |
296 | if (dumper->is_close_opened_object_mode) { |
297 | grn_ctx_push_temporary_open_space(ctx); |
298 | } |
299 | type = grn_ctx_at(ctx, type_id); |
300 | if (!type) { |
301 | /* ERR(GRN_RANGE_ERROR, "couldn't get column's type object"); */ |
302 | goto exit; |
303 | } |
304 | |
305 | GRN_TEXT_PUTS(ctx, dumper->output, "column_create " ); |
306 | dump_obj_name(ctx, dumper, table); |
307 | GRN_TEXT_PUTC(ctx, dumper->output, ' '); |
308 | dump_column_name(ctx, dumper, column); |
309 | GRN_TEXT_PUTC(ctx, dumper->output, ' '); |
310 | if (type->header.type == GRN_TYPE) { |
311 | default_flags |= type->header.flags; |
312 | } |
313 | flags = grn_column_get_flags(ctx, column); |
314 | grn_dump_column_create_flags(ctx, |
315 | flags & ~default_flags, |
316 | dumper->output); |
317 | GRN_TEXT_PUTC(ctx, dumper->output, ' '); |
318 | dump_obj_name(ctx, dumper, type); |
319 | if (column->header.flags & GRN_OBJ_COLUMN_INDEX) { |
320 | dump_index_column_sources(ctx, dumper, column); |
321 | } |
322 | GRN_TEXT_PUTC(ctx, dumper->output, '\n'); |
323 | |
324 | exit : |
325 | if (dumper->is_close_opened_object_mode) { |
326 | grn_ctx_pop_temporary_open_space(ctx); |
327 | } |
328 | } |
329 | |
330 | static void |
331 | dump_columns(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table, |
332 | grn_bool dump_data_column, |
333 | grn_bool dump_reference_column, |
334 | grn_bool dump_index_column) |
335 | { |
336 | grn_hash *columns; |
337 | columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, |
338 | GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY); |
339 | if (!columns) { |
340 | GRN_PLUGIN_ERROR(ctx, |
341 | GRN_NO_MEMORY_AVAILABLE, |
342 | "couldn't create a hash to hold columns" ); |
343 | return; |
344 | } |
345 | |
346 | if (grn_table_columns(ctx, table, NULL, 0, (grn_obj *)columns) >= 0) { |
347 | GRN_HASH_EACH_BEGIN(ctx, columns, cursor, id) { |
348 | void *key; |
349 | grn_id column_id; |
350 | grn_obj *column; |
351 | |
352 | grn_hash_cursor_get_key(ctx, cursor, &key); |
353 | column_id = *((grn_id *)key); |
354 | |
355 | if (dumper->is_close_opened_object_mode) { |
356 | grn_ctx_push_temporary_open_space(ctx); |
357 | } |
358 | |
359 | column = grn_ctx_at(ctx, column_id); |
360 | if (!column) { |
361 | GRN_PLUGIN_CLEAR_ERROR(ctx); |
362 | goto next_loop; |
363 | } |
364 | |
365 | if (grn_obj_is_index_column(ctx, column)) { |
366 | if (dump_index_column) { |
367 | dump_column(ctx, dumper, table, column); |
368 | GRN_PLUGIN_CLEAR_ERROR(ctx); |
369 | } |
370 | } else if (grn_obj_is_reference_column(ctx, column)) { |
371 | if (dump_reference_column) { |
372 | dump_column(ctx, dumper, table, column); |
373 | GRN_PLUGIN_CLEAR_ERROR(ctx); |
374 | } |
375 | } else { |
376 | if (dump_data_column) { |
377 | dump_column(ctx, dumper, table, column); |
378 | GRN_PLUGIN_CLEAR_ERROR(ctx); |
379 | } |
380 | } |
381 | |
382 | next_loop : |
383 | if (dumper->is_close_opened_object_mode) { |
384 | grn_ctx_pop_temporary_open_space(ctx); |
385 | } |
386 | } GRN_HASH_EACH_END(ctx, cursor); |
387 | } |
388 | grn_hash_close(ctx, columns); |
389 | } |
390 | |
391 | static void |
392 | dump_record_column_vector(grn_ctx *ctx, grn_dumper *dumper, grn_id id, |
393 | grn_obj *column, grn_id range_id, grn_obj *buf) |
394 | { |
395 | grn_obj *range; |
396 | grn_obj_format *format_argument = NULL; |
397 | grn_obj_format format; |
398 | |
399 | range = grn_ctx_at(ctx, range_id); |
400 | if (column->header.flags & GRN_OBJ_WITH_WEIGHT) { |
401 | format.flags = GRN_OBJ_FORMAT_WITH_WEIGHT; |
402 | format_argument = &format; |
403 | } |
404 | |
405 | if (grn_obj_is_table(ctx, range) || |
406 | (range->header.flags & GRN_OBJ_KEY_VAR_SIZE) == 0) { |
407 | GRN_OBJ_INIT(buf, GRN_UVECTOR, 0, range_id); |
408 | grn_obj_get_value(ctx, column, id, buf); |
409 | grn_text_otoj(ctx, dumper->output, buf, format_argument); |
410 | } else { |
411 | GRN_OBJ_INIT(buf, GRN_VECTOR, 0, range_id); |
412 | grn_obj_get_value(ctx, column, id, buf); |
413 | grn_text_otoj(ctx, dumper->output, buf, format_argument); |
414 | } |
415 | |
416 | grn_obj_unlink(ctx, range); |
417 | grn_obj_unlink(ctx, buf); |
418 | } |
419 | |
420 | static void |
421 | dump_record(grn_ctx *ctx, grn_dumper *dumper, |
422 | grn_obj *table, |
423 | grn_id id, |
424 | grn_obj *columns, int n_columns) |
425 | { |
426 | int j; |
427 | grn_obj buf; |
428 | grn_obj *column_name = &(dumper->column_name_buffer); |
429 | |
430 | GRN_TEXT_PUTC(ctx, dumper->output, '['); |
431 | for (j = 0; j < n_columns; j++) { |
432 | grn_bool is_value_column; |
433 | grn_id range; |
434 | grn_obj *column; |
435 | column = GRN_PTR_VALUE_AT(columns, j); |
436 | /* TODO: use grn_obj_is_value_accessor() */ |
437 | GRN_BULK_REWIND(column_name); |
438 | grn_column_name_(ctx, column, column_name); |
439 | if (GRN_TEXT_LEN(column_name) == GRN_COLUMN_NAME_VALUE_LEN && |
440 | !memcmp(GRN_TEXT_VALUE(column_name), |
441 | GRN_COLUMN_NAME_VALUE, |
442 | GRN_COLUMN_NAME_VALUE_LEN)) { |
443 | is_value_column = GRN_TRUE; |
444 | } else { |
445 | is_value_column = GRN_FALSE; |
446 | } |
447 | range = grn_obj_get_range(ctx, column); |
448 | |
449 | if (j) { GRN_TEXT_PUTC(ctx, dumper->output, ','); } |
450 | switch (column->header.type) { |
451 | case GRN_COLUMN_VAR_SIZE: |
452 | case GRN_COLUMN_FIX_SIZE: |
453 | switch (column->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) { |
454 | case GRN_OBJ_COLUMN_VECTOR: |
455 | dump_record_column_vector(ctx, dumper, id, column, range, &buf); |
456 | break; |
457 | case GRN_OBJ_COLUMN_SCALAR: |
458 | { |
459 | GRN_OBJ_INIT(&buf, GRN_BULK, 0, range); |
460 | grn_obj_get_value(ctx, column, id, &buf); |
461 | grn_text_otoj(ctx, dumper->output, &buf, NULL); |
462 | grn_obj_unlink(ctx, &buf); |
463 | } |
464 | break; |
465 | default: |
466 | GRN_PLUGIN_ERROR(ctx, |
467 | GRN_OPERATION_NOT_SUPPORTED, |
468 | "unsupported column type: %#x" , |
469 | column->header.type); |
470 | break; |
471 | } |
472 | break; |
473 | case GRN_COLUMN_INDEX: |
474 | break; |
475 | case GRN_ACCESSOR: |
476 | { |
477 | GRN_OBJ_INIT(&buf, GRN_BULK, 0, range); |
478 | grn_obj_get_value(ctx, column, id, &buf); |
479 | /* XXX maybe, grn_obj_get_range() should not unconditionally return |
480 | GRN_DB_INT32 when column is GRN_ACCESSOR and |
481 | GRN_ACCESSOR_GET_VALUE */ |
482 | if (is_value_column) { |
483 | buf.header.domain = grn_obj_get_range(ctx, table); |
484 | } |
485 | grn_text_otoj(ctx, dumper->output, &buf, NULL); |
486 | grn_obj_unlink(ctx, &buf); |
487 | } |
488 | break; |
489 | default: |
490 | GRN_PLUGIN_ERROR(ctx, |
491 | GRN_OPERATION_NOT_SUPPORTED, |
492 | "unsupported header type %#x" , |
493 | column->header.type); |
494 | break; |
495 | } |
496 | } |
497 | GRN_TEXT_PUTC(ctx, dumper->output, ']'); |
498 | if (GRN_TEXT_LEN(dumper->output) >= DUMP_FLUSH_THRESHOLD_SIZE) { |
499 | grn_ctx_output_flush(ctx, 0); |
500 | } |
501 | } |
502 | |
503 | static void |
504 | dump_records(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table) |
505 | { |
506 | grn_table_cursor *cursor; |
507 | int i, n_columns; |
508 | grn_obj columns; |
509 | grn_bool have_index_column = GRN_FALSE; |
510 | grn_bool have_data_column = GRN_FALSE; |
511 | |
512 | if (grn_table_size(ctx, table) == 0) { |
513 | return; |
514 | } |
515 | |
516 | if (dumper->is_close_opened_object_mode) { |
517 | grn_ctx_push_temporary_open_space(ctx); |
518 | } |
519 | |
520 | GRN_PTR_INIT(&columns, GRN_OBJ_VECTOR, GRN_ID_NIL); |
521 | |
522 | if (table->header.type == GRN_TABLE_NO_KEY) { |
523 | grn_obj *id_accessor; |
524 | id_accessor = grn_obj_column(ctx, |
525 | table, |
526 | GRN_COLUMN_NAME_ID, |
527 | GRN_COLUMN_NAME_ID_LEN); |
528 | GRN_PTR_PUT(ctx, &columns, id_accessor); |
529 | } else if (table->header.domain != GRN_ID_NIL) { |
530 | grn_obj *key_accessor; |
531 | key_accessor = grn_obj_column(ctx, |
532 | table, |
533 | GRN_COLUMN_NAME_KEY, |
534 | GRN_COLUMN_NAME_KEY_LEN); |
535 | GRN_PTR_PUT(ctx, &columns, key_accessor); |
536 | } |
537 | |
538 | if (grn_obj_get_range(ctx, table) != GRN_ID_NIL) { |
539 | grn_obj *value_accessor; |
540 | value_accessor = grn_obj_column(ctx, |
541 | table, |
542 | GRN_COLUMN_NAME_VALUE, |
543 | GRN_COLUMN_NAME_VALUE_LEN); |
544 | GRN_PTR_PUT(ctx, &columns, value_accessor); |
545 | } |
546 | |
547 | { |
548 | grn_hash *real_columns; |
549 | |
550 | real_columns = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, |
551 | GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY); |
552 | grn_table_columns(ctx, table, NULL, 0, (grn_obj *)real_columns); |
553 | GRN_HASH_EACH_BEGIN(ctx, real_columns, cursor, id) { |
554 | void *key; |
555 | grn_id column_id; |
556 | grn_obj *column; |
557 | |
558 | if (dumper->is_close_opened_object_mode) { |
559 | grn_ctx_push_temporary_open_space(ctx); |
560 | } |
561 | |
562 | grn_hash_cursor_get_key(ctx, cursor, &key); |
563 | column_id = *((grn_id *)key); |
564 | |
565 | column = grn_ctx_at(ctx, column_id); |
566 | if (column) { |
567 | if (grn_obj_is_index_column(ctx, column)) { |
568 | have_index_column = GRN_TRUE; |
569 | if (dumper->is_close_opened_object_mode) { |
570 | grn_ctx_pop_temporary_open_space(ctx); |
571 | } |
572 | } else { |
573 | have_data_column = GRN_TRUE; |
574 | GRN_PTR_PUT(ctx, &columns, column); |
575 | if (dumper->is_close_opened_object_mode) { |
576 | grn_ctx_merge_temporary_open_space(ctx); |
577 | } |
578 | } |
579 | } else { |
580 | GRN_PLUGIN_CLEAR_ERROR(ctx); |
581 | if (dumper->is_close_opened_object_mode) { |
582 | grn_ctx_pop_temporary_open_space(ctx); |
583 | } |
584 | } |
585 | } GRN_HASH_EACH_END(ctx, cursor); |
586 | grn_hash_close(ctx, real_columns); |
587 | } |
588 | |
589 | n_columns = GRN_BULK_VSIZE(&columns) / sizeof(grn_obj *); |
590 | |
591 | if (have_index_column && !have_data_column) { |
592 | goto exit; |
593 | } |
594 | |
595 | if (GRN_TEXT_LEN(dumper->output) > 0) { |
596 | GRN_TEXT_PUTC(ctx, dumper->output, '\n'); |
597 | } |
598 | |
599 | GRN_TEXT_PUTS(ctx, dumper->output, "load --table " ); |
600 | dump_obj_name(ctx, dumper, table); |
601 | GRN_TEXT_PUTS(ctx, dumper->output, "\n[\n" ); |
602 | |
603 | GRN_TEXT_PUTC(ctx, dumper->output, '['); |
604 | for (i = 0; i < n_columns; i++) { |
605 | grn_obj *column; |
606 | grn_obj *column_name = &(dumper->column_name_buffer); |
607 | |
608 | column = GRN_PTR_VALUE_AT(&columns, i); |
609 | if (i) { GRN_TEXT_PUTC(ctx, dumper->output, ','); } |
610 | GRN_BULK_REWIND(column_name); |
611 | grn_column_name_(ctx, column, column_name); |
612 | grn_text_otoj(ctx, dumper->output, column_name, NULL); |
613 | } |
614 | GRN_TEXT_PUTS(ctx, dumper->output, "],\n" ); |
615 | |
616 | if (table->header.type == GRN_TABLE_HASH_KEY && dumper->is_sort_hash_table) { |
617 | grn_obj *sorted; |
618 | grn_table_sort_key sort_keys[1]; |
619 | uint32_t n_sort_keys = 1; |
620 | grn_bool is_first_record = GRN_TRUE; |
621 | |
622 | sort_keys[0].key = grn_obj_column(ctx, table, |
623 | GRN_COLUMN_NAME_KEY, |
624 | GRN_COLUMN_NAME_KEY_LEN); |
625 | sort_keys[0].flags = GRN_TABLE_SORT_ASC; |
626 | sort_keys[0].offset = 0; |
627 | sorted = grn_table_create(ctx, |
628 | NULL, 0, NULL, |
629 | GRN_TABLE_NO_KEY, |
630 | NULL, |
631 | table); |
632 | grn_table_sort(ctx, |
633 | table, 0, -1, |
634 | sorted, |
635 | sort_keys, n_sort_keys); |
636 | cursor = grn_table_cursor_open(ctx, |
637 | sorted, |
638 | NULL, 0, NULL, 0, |
639 | 0, -1, |
640 | 0); |
641 | while (grn_table_cursor_next(ctx, cursor) != GRN_ID_NIL) { |
642 | void *value_raw; |
643 | grn_id id; |
644 | |
645 | grn_table_cursor_get_value(ctx, cursor, &value_raw); |
646 | id = *((grn_id *)value_raw); |
647 | |
648 | if (is_first_record) { |
649 | is_first_record = GRN_FALSE; |
650 | } else { |
651 | GRN_TEXT_PUTS(ctx, dumper->output, ",\n" ); |
652 | } |
653 | dump_record(ctx, dumper, table, id, &columns, n_columns); |
654 | } |
655 | GRN_TEXT_PUTS(ctx, dumper->output, "\n]\n" ); |
656 | grn_obj_close(ctx, sorted); |
657 | grn_obj_unlink(ctx, sort_keys[0].key); |
658 | } else { |
659 | grn_obj delete_commands; |
660 | grn_id old_id = GRN_ID_NIL; |
661 | grn_id id; |
662 | |
663 | GRN_TEXT_INIT(&delete_commands, 0); |
664 | cursor = grn_table_cursor_open(ctx, table, NULL, 0, NULL, 0, 0, -1, |
665 | GRN_CURSOR_BY_KEY); |
666 | while ((id = grn_table_cursor_next(ctx, cursor)) != GRN_ID_NIL) { |
667 | if (old_id != GRN_ID_NIL) { GRN_TEXT_PUTS(ctx, dumper->output, ",\n" ); } |
668 | if (table->header.type == GRN_TABLE_NO_KEY && old_id + 1 < id) { |
669 | grn_id current_id; |
670 | for (current_id = old_id + 1; current_id < id; current_id++) { |
671 | GRN_TEXT_PUTS(ctx, dumper->output, "[],\n" ); |
672 | GRN_TEXT_PUTS(ctx, &delete_commands, "delete --table " ); |
673 | dump_obj_name_raw(ctx, &delete_commands, table); |
674 | GRN_TEXT_PUTS(ctx, &delete_commands, " --id " ); |
675 | grn_text_lltoa(ctx, &delete_commands, current_id); |
676 | GRN_TEXT_PUTC(ctx, &delete_commands, '\n'); |
677 | } |
678 | } |
679 | dump_record(ctx, dumper, table, id, &columns, n_columns); |
680 | |
681 | old_id = id; |
682 | } |
683 | grn_table_cursor_close(ctx, cursor); |
684 | GRN_TEXT_PUTS(ctx, dumper->output, "\n]\n" ); |
685 | GRN_TEXT_PUT(ctx, dumper->output, |
686 | GRN_TEXT_VALUE(&delete_commands), |
687 | GRN_TEXT_LEN(&delete_commands)); |
688 | GRN_OBJ_FIN(ctx, &delete_commands); |
689 | } |
690 | exit : |
691 | for (i = 0; i < n_columns; i++) { |
692 | grn_obj *column; |
693 | |
694 | column = GRN_PTR_VALUE_AT(&columns, i); |
695 | if (column->header.type == GRN_ACCESSOR) { |
696 | grn_obj_close(ctx, column); |
697 | } |
698 | } |
699 | GRN_OBJ_FIN(ctx, &columns); |
700 | |
701 | if (dumper->is_close_opened_object_mode) { |
702 | grn_ctx_pop_temporary_open_space(ctx); |
703 | } |
704 | } |
705 | |
706 | static void |
707 | dump_table(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table) |
708 | { |
709 | grn_obj *domain = NULL; |
710 | grn_id range_id; |
711 | grn_obj *range = NULL; |
712 | grn_table_flags flags; |
713 | grn_table_flags default_flags = GRN_OBJ_PERSISTENT; |
714 | grn_obj *default_tokenizer; |
715 | grn_obj *normalizer; |
716 | grn_obj *token_filters; |
717 | |
718 | switch (table->header.type) { |
719 | case GRN_TABLE_HASH_KEY: |
720 | case GRN_TABLE_PAT_KEY: |
721 | case GRN_TABLE_DAT_KEY: |
722 | domain = grn_ctx_at(ctx, table->header.domain); |
723 | break; |
724 | default: |
725 | break; |
726 | } |
727 | |
728 | if (GRN_TEXT_LEN(dumper->output) > 0) { |
729 | GRN_TEXT_PUTC(ctx, dumper->output, '\n'); |
730 | grn_ctx_output_flush(ctx, 0); |
731 | } |
732 | |
733 | grn_table_get_info(ctx, table, |
734 | &flags, |
735 | NULL, |
736 | &default_tokenizer, |
737 | &normalizer, |
738 | &token_filters); |
739 | |
740 | GRN_TEXT_PUTS(ctx, dumper->output, "table_create " ); |
741 | dump_obj_name(ctx, dumper, table); |
742 | GRN_TEXT_PUTC(ctx, dumper->output, ' '); |
743 | grn_dump_table_create_flags(ctx, |
744 | flags & ~default_flags, |
745 | dumper->output); |
746 | if (domain) { |
747 | GRN_TEXT_PUTC(ctx, dumper->output, ' '); |
748 | dump_obj_name(ctx, dumper, domain); |
749 | } |
750 | range_id = grn_obj_get_range(ctx, table); |
751 | if (range_id != GRN_ID_NIL) { |
752 | range = grn_ctx_at(ctx, range_id); |
753 | if (!range) { |
754 | // ERR(GRN_RANGE_ERROR, "couldn't get table's value_type object"); |
755 | return; |
756 | } |
757 | if (table->header.type != GRN_TABLE_NO_KEY) { |
758 | GRN_TEXT_PUTC(ctx, dumper->output, ' '); |
759 | } else { |
760 | GRN_TEXT_PUTS(ctx, dumper->output, " --value_type " ); |
761 | } |
762 | dump_obj_name(ctx, dumper, range); |
763 | grn_obj_unlink(ctx, range); |
764 | } |
765 | if (default_tokenizer) { |
766 | GRN_TEXT_PUTS(ctx, dumper->output, " --default_tokenizer " ); |
767 | dump_obj_name(ctx, dumper, default_tokenizer); |
768 | } |
769 | if (normalizer) { |
770 | GRN_TEXT_PUTS(ctx, dumper->output, " --normalizer " ); |
771 | dump_obj_name(ctx, dumper, normalizer); |
772 | } |
773 | if (table->header.type != GRN_TABLE_NO_KEY) { |
774 | int n_token_filters; |
775 | |
776 | n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *); |
777 | if (n_token_filters > 0) { |
778 | int i; |
779 | GRN_TEXT_PUTS(ctx, dumper->output, " --token_filters " ); |
780 | for (i = 0; i < n_token_filters; i++) { |
781 | grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i); |
782 | if (i > 0) { |
783 | GRN_TEXT_PUTC(ctx, dumper->output, ','); |
784 | } |
785 | dump_obj_name(ctx, dumper, token_filter); |
786 | } |
787 | } |
788 | } |
789 | |
790 | GRN_TEXT_PUTC(ctx, dumper->output, '\n'); |
791 | |
792 | dump_columns(ctx, dumper, table, GRN_TRUE, GRN_FALSE, GRN_FALSE); |
793 | } |
794 | |
795 | static void |
796 | dump_schema(grn_ctx *ctx, grn_dumper *dumper) |
797 | { |
798 | GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) { |
799 | void *name; |
800 | int name_size; |
801 | grn_obj *object; |
802 | |
803 | if (grn_id_is_builtin(ctx, id)) { |
804 | continue; |
805 | } |
806 | |
807 | name_size = grn_table_cursor_get_key(ctx, cursor, &name); |
808 | if (grn_obj_name_is_column(ctx, name, name_size)) { |
809 | continue; |
810 | } |
811 | |
812 | if (dumper->is_close_opened_object_mode) { |
813 | grn_ctx_push_temporary_open_space(ctx); |
814 | } |
815 | |
816 | if ((object = grn_ctx_at(ctx, id))) { |
817 | switch (object->header.type) { |
818 | case GRN_TABLE_HASH_KEY: |
819 | case GRN_TABLE_PAT_KEY: |
820 | case GRN_TABLE_DAT_KEY: |
821 | case GRN_TABLE_NO_KEY: |
822 | dump_table(ctx, dumper, object); |
823 | break; |
824 | default: |
825 | break; |
826 | } |
827 | } else { |
828 | /* XXX: this clause is executed when MeCab tokenizer is enabled in |
829 | database but the groonga isn't supported MeCab. |
830 | We should return error mesage about it and error exit status |
831 | but it's too difficult for this architecture. :< */ |
832 | GRN_PLUGIN_CLEAR_ERROR(ctx); |
833 | } |
834 | |
835 | if (dumper->is_close_opened_object_mode) { |
836 | grn_ctx_pop_temporary_open_space(ctx); |
837 | } |
838 | } GRN_DB_EACH_END(ctx, cursor); |
839 | |
840 | if (!dumper->have_reference_column) { |
841 | return; |
842 | } |
843 | |
844 | GRN_TEXT_PUTC(ctx, dumper->output, '\n'); |
845 | grn_ctx_output_flush(ctx, 0); |
846 | |
847 | GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) { |
848 | void *name; |
849 | int name_size; |
850 | grn_obj *object; |
851 | |
852 | if (grn_id_is_builtin(ctx, id)) { |
853 | continue; |
854 | } |
855 | |
856 | name_size = grn_table_cursor_get_key(ctx, cursor, &name); |
857 | if (grn_obj_name_is_column(ctx, name, name_size)) { |
858 | continue; |
859 | } |
860 | |
861 | if (dumper->is_close_opened_object_mode) { |
862 | grn_ctx_push_temporary_open_space(ctx); |
863 | } |
864 | |
865 | if ((object = grn_ctx_at(ctx, id))) { |
866 | switch (object->header.type) { |
867 | case GRN_TABLE_HASH_KEY: |
868 | case GRN_TABLE_PAT_KEY: |
869 | case GRN_TABLE_DAT_KEY: |
870 | case GRN_TABLE_NO_KEY: |
871 | dump_columns(ctx, dumper, object, GRN_FALSE, GRN_TRUE, GRN_FALSE); |
872 | break; |
873 | default: |
874 | break; |
875 | } |
876 | } else { |
877 | /* XXX: this clause is executed when MeCab tokenizer is enabled in |
878 | database but the groonga isn't supported MeCab. |
879 | We should return error mesage about it and error exit status |
880 | but it's too difficult for this architecture. :< */ |
881 | GRN_PLUGIN_CLEAR_ERROR(ctx); |
882 | } |
883 | |
884 | if (dumper->is_close_opened_object_mode) { |
885 | grn_ctx_pop_temporary_open_space(ctx); |
886 | } |
887 | } GRN_DB_EACH_END(ctx, cursor); |
888 | } |
889 | |
890 | static void |
891 | dump_selected_tables_records(grn_ctx *ctx, grn_dumper *dumper, grn_obj *tables) |
892 | { |
893 | const char *p, *e; |
894 | |
895 | p = GRN_TEXT_VALUE(tables); |
896 | e = p + GRN_TEXT_LEN(tables); |
897 | while (p < e) { |
898 | int len; |
899 | grn_obj *table; |
900 | const char *token, *token_e; |
901 | |
902 | if ((len = grn_isspace(p, ctx->encoding))) { |
903 | p += len; |
904 | continue; |
905 | } |
906 | |
907 | token = p; |
908 | if (!(('a' <= *p && *p <= 'z') || |
909 | ('A' <= *p && *p <= 'Z') || |
910 | (*p == '_'))) { |
911 | while (p < e && !grn_isspace(p, ctx->encoding)) { |
912 | p++; |
913 | } |
914 | GRN_LOG(ctx, GRN_LOG_WARNING, "invalid table name is ignored: <%.*s>\n" , |
915 | (int)(p - token), token); |
916 | continue; |
917 | } |
918 | while (p < e && |
919 | (('a' <= *p && *p <= 'z') || |
920 | ('A' <= *p && *p <= 'Z') || |
921 | ('0' <= *p && *p <= '9') || |
922 | (*p == '_'))) { |
923 | p++; |
924 | } |
925 | token_e = p; |
926 | while (p < e && (len = grn_isspace(p, ctx->encoding))) { |
927 | p += len; |
928 | continue; |
929 | } |
930 | if (p < e && *p == ',') { |
931 | p++; |
932 | } |
933 | |
934 | table = grn_ctx_get(ctx, token, token_e - token); |
935 | if (!table) { |
936 | GRN_LOG(ctx, GRN_LOG_WARNING, |
937 | "nonexistent table name is ignored: <%.*s>\n" , |
938 | (int)(token_e - token), token); |
939 | continue; |
940 | } |
941 | |
942 | if (grn_obj_is_table(ctx, table)) { |
943 | dump_records(ctx, dumper, table); |
944 | } |
945 | grn_obj_unlink(ctx, table); |
946 | } |
947 | } |
948 | |
949 | static void |
950 | dump_all_records(grn_ctx *ctx, grn_dumper *dumper) |
951 | { |
952 | GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) { |
953 | void *name; |
954 | int name_size; |
955 | grn_obj *table; |
956 | |
957 | if (grn_id_is_builtin(ctx, id)) { |
958 | continue; |
959 | } |
960 | |
961 | name_size = grn_table_cursor_get_key(ctx, cursor, &name); |
962 | if (grn_obj_name_is_column(ctx, name, name_size)) { |
963 | continue; |
964 | } |
965 | |
966 | if (dumper->is_close_opened_object_mode) { |
967 | grn_ctx_push_temporary_open_space(ctx); |
968 | } |
969 | |
970 | table = grn_ctx_at(ctx, id); |
971 | if (!table) { |
972 | /* XXX: this clause is executed when MeCab tokenizer is enabled in |
973 | database but the groonga isn't supported MeCab. |
974 | We should return error mesage about it and error exit status |
975 | but it's too difficult for this architecture. :< */ |
976 | GRN_PLUGIN_CLEAR_ERROR(ctx); |
977 | goto next_loop; |
978 | } |
979 | |
980 | if (grn_obj_is_table(ctx, table)) { |
981 | dump_records(ctx, dumper, table); |
982 | } |
983 | |
984 | next_loop : |
985 | if (dumper->is_close_opened_object_mode) { |
986 | grn_ctx_pop_temporary_open_space(ctx); |
987 | } |
988 | } GRN_DB_EACH_END(ctx, cursor); |
989 | } |
990 | |
991 | static void |
992 | dump_indexes(grn_ctx *ctx, grn_dumper *dumper) |
993 | { |
994 | if (!dumper->have_index_column) { |
995 | return; |
996 | } |
997 | |
998 | if (GRN_TEXT_LEN(dumper->output) > 0) { |
999 | GRN_TEXT_PUTC(ctx, dumper->output, '\n'); |
1000 | } |
1001 | |
1002 | GRN_DB_EACH_BEGIN_BY_KEY(ctx, cursor, id) { |
1003 | void *name; |
1004 | int name_size; |
1005 | grn_obj *object; |
1006 | |
1007 | if (grn_id_is_builtin(ctx, id)) { |
1008 | continue; |
1009 | } |
1010 | |
1011 | name_size = grn_table_cursor_get_key(ctx, cursor, &name); |
1012 | if (grn_obj_name_is_column(ctx, name, name_size)) { |
1013 | continue; |
1014 | } |
1015 | |
1016 | if (dumper->is_close_opened_object_mode) { |
1017 | grn_ctx_push_temporary_open_space(ctx); |
1018 | } |
1019 | |
1020 | object = grn_ctx_at(ctx, id); |
1021 | if (!object) { |
1022 | /* XXX: this clause is executed when MeCab tokenizer is enabled in |
1023 | database but the groonga isn't supported MeCab. |
1024 | We should return error mesage about it and error exit status |
1025 | but it's too difficult for this architecture. :< */ |
1026 | GRN_PLUGIN_CLEAR_ERROR(ctx); |
1027 | goto next_loop; |
1028 | } |
1029 | |
1030 | if (grn_obj_is_table(ctx, object)) { |
1031 | dump_columns(ctx, dumper, object, GRN_FALSE, GRN_FALSE, GRN_TRUE); |
1032 | } |
1033 | |
1034 | next_loop : |
1035 | if (dumper->is_close_opened_object_mode) { |
1036 | grn_ctx_pop_temporary_open_space(ctx); |
1037 | } |
1038 | } GRN_DB_EACH_END(ctx, cursor); |
1039 | } |
1040 | |
1041 | static grn_obj * |
1042 | command_dump(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) |
1043 | { |
1044 | grn_dumper dumper; |
1045 | grn_obj *tables; |
1046 | grn_bool is_dump_plugins; |
1047 | grn_bool is_dump_schema; |
1048 | grn_bool is_dump_records; |
1049 | grn_bool is_dump_indexes; |
1050 | grn_bool is_dump_configs; |
1051 | |
1052 | dumper.output = ctx->impl->output.buf; |
1053 | if (grn_thread_get_limit() == 1) { |
1054 | dumper.is_close_opened_object_mode = GRN_TRUE; |
1055 | } else { |
1056 | dumper.is_close_opened_object_mode = GRN_FALSE; |
1057 | } |
1058 | dumper.have_reference_column = GRN_FALSE; |
1059 | dumper.have_index_column = GRN_FALSE; |
1060 | |
1061 | tables = grn_plugin_proc_get_var(ctx, user_data, "tables" , -1); |
1062 | is_dump_plugins = grn_plugin_proc_get_var_bool(ctx, user_data, |
1063 | "dump_plugins" , -1, |
1064 | GRN_TRUE); |
1065 | is_dump_schema = grn_plugin_proc_get_var_bool(ctx, user_data, |
1066 | "dump_schema" , -1, |
1067 | GRN_TRUE); |
1068 | is_dump_records = grn_plugin_proc_get_var_bool(ctx, user_data, |
1069 | "dump_records" , -1, |
1070 | GRN_TRUE); |
1071 | is_dump_indexes = grn_plugin_proc_get_var_bool(ctx, user_data, |
1072 | "dump_indexes" , -1, |
1073 | GRN_TRUE); |
1074 | is_dump_configs = grn_plugin_proc_get_var_bool(ctx, user_data, |
1075 | "dump_configs" , -1, |
1076 | GRN_TRUE); |
1077 | dumper.is_sort_hash_table = |
1078 | grn_plugin_proc_get_var_bool(ctx, user_data, |
1079 | "sort_hash_table" , -1, |
1080 | GRN_FALSE); |
1081 | GRN_TEXT_INIT(&(dumper.column_name_buffer), 0); |
1082 | |
1083 | grn_ctx_set_output_type(ctx, GRN_CONTENT_GROONGA_COMMAND_LIST); |
1084 | |
1085 | dumper_collect_statistics(ctx, &dumper); |
1086 | |
1087 | if (is_dump_configs) { |
1088 | dump_configs(ctx, &dumper); |
1089 | } |
1090 | if (is_dump_plugins) { |
1091 | dump_plugins(ctx, &dumper); |
1092 | } |
1093 | if (is_dump_schema) { |
1094 | dump_schema(ctx, &dumper); |
1095 | } |
1096 | if (is_dump_records) { |
1097 | /* To update index columns correctly, we first create the whole schema, then |
1098 | load non-derivative records, while skipping records of index columns. That |
1099 | way, Groonga will silently do the job of updating index columns for us. */ |
1100 | if (GRN_TEXT_LEN(tables) > 0) { |
1101 | dump_selected_tables_records(ctx, &dumper, tables); |
1102 | } else { |
1103 | dump_all_records(ctx, &dumper); |
1104 | } |
1105 | } |
1106 | if (is_dump_indexes) { |
1107 | dump_indexes(ctx, &dumper); |
1108 | } |
1109 | /* remove the last newline because another one will be added by the caller. |
1110 | maybe, the caller of proc functions currently doesn't consider the |
1111 | possibility of multiple-line output from proc functions. */ |
1112 | if (GRN_BULK_VSIZE(dumper.output) > 0) { |
1113 | grn_bulk_truncate(ctx, dumper.output, GRN_BULK_VSIZE(dumper.output) - 1); |
1114 | } |
1115 | |
1116 | GRN_OBJ_FIN(ctx, &(dumper.column_name_buffer)); |
1117 | |
1118 | return NULL; |
1119 | } |
1120 | |
1121 | void |
1122 | grn_proc_init_dump(grn_ctx *ctx) |
1123 | { |
1124 | grn_expr_var vars[7]; |
1125 | |
1126 | grn_plugin_expr_var_init(ctx, &(vars[0]), "tables" , -1); |
1127 | grn_plugin_expr_var_init(ctx, &(vars[1]), "dump_plugins" , -1); |
1128 | grn_plugin_expr_var_init(ctx, &(vars[2]), "dump_schema" , -1); |
1129 | grn_plugin_expr_var_init(ctx, &(vars[3]), "dump_records" , -1); |
1130 | grn_plugin_expr_var_init(ctx, &(vars[4]), "dump_indexes" , -1); |
1131 | grn_plugin_expr_var_init(ctx, &(vars[5]), "dump_configs" , -1); |
1132 | grn_plugin_expr_var_init(ctx, &(vars[6]), "sort_hash_table" , -1); |
1133 | grn_plugin_command_create(ctx, |
1134 | "dump" , -1, |
1135 | command_dump, |
1136 | sizeof(vars) / sizeof(vars[0]), |
1137 | vars); |
1138 | } |
1139 | |