1/* mdb_load.c - memory-mapped database load tool */
2/*
3 * Copyright 2011-2018 Howard Chu, Symas Corp.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted only as authorized by the OpenLDAP
8 * Public License.
9 *
10 * A copy of this license is available in the file LICENSE in the
11 * top-level directory of the distribution or, alternatively, at
12 * <http://www.OpenLDAP.org/license.html>.
13 */
14#include <stdio.h>
15#include <stdlib.h>
16#include <errno.h>
17#include <string.h>
18#include <ctype.h>
19#include <unistd.h>
20#include "lmdb.h"
21
22#define PRINT 1
23#define NOHDR 2
24static int mode;
25
26static char *subname = NULL;
27
28static mdb_size_t lineno;
29static int version;
30
31static int flags;
32
33static char *prog;
34
35static int Eof;
36
37static MDB_envinfo info;
38
39static MDB_val kbuf, dbuf;
40static MDB_val k0buf;
41
42#define Yu MDB_PRIy(u)
43
44#define STRLENOF(s) (sizeof(s)-1)
45
46typedef struct flagbit {
47 int bit;
48 char *name;
49 int len;
50} flagbit;
51
52#define S(s) s, STRLENOF(s)
53
54flagbit dbflags[] = {
55 { MDB_REVERSEKEY, S("reversekey") },
56 { MDB_DUPSORT, S("dupsort") },
57 { MDB_INTEGERKEY, S("integerkey") },
58 { MDB_DUPFIXED, S("dupfixed") },
59 { MDB_INTEGERDUP, S("integerdup") },
60 { MDB_REVERSEDUP, S("reversedup") },
61 { 0, NULL, 0 }
62};
63
64static void readhdr(void)
65{
66 char *ptr;
67
68 flags = 0;
69 while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) {
70 lineno++;
71 if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) {
72 version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION="));
73 if (version > 3) {
74 fprintf(stderr, "%s: line %"Yu": unsupported VERSION %d\n",
75 prog, lineno, version);
76 exit(EXIT_FAILURE);
77 }
78 } else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) {
79 break;
80 } else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) {
81 if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print")))
82 mode |= PRINT;
83 else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) {
84 fprintf(stderr, "%s: line %"Yu": unsupported FORMAT %s\n",
85 prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT="));
86 exit(EXIT_FAILURE);
87 }
88 } else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) {
89 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
90 if (ptr) *ptr = '\0';
91 if (subname) free(subname);
92 subname = strdup((char *)dbuf.mv_data+STRLENOF("database="));
93 } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) {
94 if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) {
95 fprintf(stderr, "%s: line %"Yu": unsupported type %s\n",
96 prog, lineno, (char *)dbuf.mv_data+STRLENOF("type="));
97 exit(EXIT_FAILURE);
98 }
99 } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) {
100 int i;
101 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
102 if (ptr) *ptr = '\0';
103 i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr);
104 if (i != 1) {
105 fprintf(stderr, "%s: line %"Yu": invalid mapaddr %s\n",
106 prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr="));
107 exit(EXIT_FAILURE);
108 }
109 } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) {
110 int i;
111 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
112 if (ptr) *ptr = '\0';
113 i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="),
114 "%" MDB_SCNy(u), &info.me_mapsize);
115 if (i != 1) {
116 fprintf(stderr, "%s: line %"Yu": invalid mapsize %s\n",
117 prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize="));
118 exit(EXIT_FAILURE);
119 }
120 } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) {
121 int i;
122 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
123 if (ptr) *ptr = '\0';
124 i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders);
125 if (i != 1) {
126 fprintf(stderr, "%s: line %"Yu": invalid maxreaders %s\n",
127 prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders="));
128 exit(EXIT_FAILURE);
129 }
130 } else {
131 int i;
132 for (i=0; dbflags[i].bit; i++) {
133 if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) &&
134 ((char *)dbuf.mv_data)[dbflags[i].len] == '=') {
135 flags |= dbflags[i].bit;
136 break;
137 }
138 }
139 if (!dbflags[i].bit) {
140 ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size);
141 if (!ptr) {
142 fprintf(stderr, "%s: line %"Yu": unexpected format\n",
143 prog, lineno);
144 exit(EXIT_FAILURE);
145 } else {
146 *ptr = '\0';
147 fprintf(stderr, "%s: line %"Yu": unrecognized keyword ignored: %s\n",
148 prog, lineno, (char *)dbuf.mv_data);
149 }
150 }
151 }
152 }
153}
154
155static void badend(void)
156{
157 fprintf(stderr, "%s: line %"Yu": unexpected end of input\n",
158 prog, lineno);
159}
160
161static int unhex(unsigned char *c2)
162{
163 int x, c;
164 x = *c2++ & 0x4f;
165 if (x & 0x40)
166 x -= 55;
167 c = x << 4;
168 x = *c2 & 0x4f;
169 if (x & 0x40)
170 x -= 55;
171 c |= x;
172 return c;
173}
174
175static int readline(MDB_val *out, MDB_val *buf)
176{
177 unsigned char *c1, *c2, *end;
178 size_t len, l2;
179 int c;
180
181 if (!(mode & NOHDR)) {
182 c = fgetc(stdin);
183 if (c == EOF) {
184 Eof = 1;
185 return EOF;
186 }
187 if (c != ' ') {
188 lineno++;
189 if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
190badend:
191 Eof = 1;
192 badend();
193 return EOF;
194 }
195 if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END")))
196 return EOF;
197 goto badend;
198 }
199 }
200 if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
201 Eof = 1;
202 return EOF;
203 }
204 lineno++;
205
206 c1 = buf->mv_data;
207 len = strlen((char *)c1);
208 l2 = len;
209
210 /* Is buffer too short? */
211 while (c1[len-1] != '\n') {
212 buf->mv_data = realloc(buf->mv_data, buf->mv_size*2);
213 if (!buf->mv_data) {
214 Eof = 1;
215 fprintf(stderr, "%s: line %"Yu": out of memory, line too long\n",
216 prog, lineno);
217 return EOF;
218 }
219 c1 = buf->mv_data;
220 c1 += l2;
221 if (fgets((char *)c1, buf->mv_size+1, stdin) == NULL) {
222 Eof = 1;
223 badend();
224 return EOF;
225 }
226 buf->mv_size *= 2;
227 len = strlen((char *)c1);
228 l2 += len;
229 }
230 c1 = c2 = buf->mv_data;
231 len = l2;
232 c1[--len] = '\0';
233 end = c1 + len;
234
235 if (mode & PRINT) {
236 while (c2 < end) {
237 if (*c2 == '\\') {
238 if (c2[1] == '\\') {
239 c1++; c2 += 2;
240 } else {
241 if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) {
242 Eof = 1;
243 badend();
244 return EOF;
245 }
246 *c1++ = unhex(++c2);
247 c2 += 2;
248 }
249 } else {
250 /* copies are redundant when no escapes were used */
251 *c1++ = *c2++;
252 }
253 }
254 } else {
255 /* odd length not allowed */
256 if (len & 1) {
257 Eof = 1;
258 badend();
259 return EOF;
260 }
261 while (c2 < end) {
262 if (!isxdigit(*c2) || !isxdigit(c2[1])) {
263 Eof = 1;
264 badend();
265 return EOF;
266 }
267 *c1++ = unhex(c2);
268 c2 += 2;
269 }
270 }
271 c2 = out->mv_data = buf->mv_data;
272 out->mv_size = c1 - c2;
273
274 return 0;
275}
276
277static void usage(void)
278{
279 fprintf(stderr, "usage: %s [-V] [-a] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog);
280 exit(EXIT_FAILURE);
281}
282
283static int greater(const MDB_val *a, const MDB_val *b)
284{
285 return 1;
286}
287
288int main(int argc, char *argv[])
289{
290 int i, rc;
291 MDB_env *env;
292 MDB_txn *txn;
293 MDB_cursor *mc;
294 MDB_dbi dbi;
295 char *envname;
296 int envflags = MDB_NOSYNC, putflags = 0;
297 int dohdr = 0, append = 0;
298 MDB_val prevk;
299
300 prog = argv[0];
301
302 if (argc < 2) {
303 usage();
304 }
305
306 /* -a: append records in input order
307 * -f: load file instead of stdin
308 * -n: use NOSUBDIR flag on env_open
309 * -s: load into named subDB
310 * -N: use NOOVERWRITE on puts
311 * -T: read plaintext
312 * -V: print version and exit
313 */
314 while ((i = getopt(argc, argv, "af:ns:NTV")) != EOF) {
315 switch(i) {
316 case 'V':
317 printf("%s\n", MDB_VERSION_STRING);
318 exit(0);
319 break;
320 case 'a':
321 append = 1;
322 break;
323 case 'f':
324 if (freopen(optarg, "r", stdin) == NULL) {
325 fprintf(stderr, "%s: %s: reopen: %s\n",
326 prog, optarg, strerror(errno));
327 exit(EXIT_FAILURE);
328 }
329 break;
330 case 'n':
331 envflags |= MDB_NOSUBDIR;
332 break;
333 case 's':
334 subname = strdup(optarg);
335 break;
336 case 'N':
337 putflags = MDB_NOOVERWRITE|MDB_NODUPDATA;
338 break;
339 case 'T':
340 mode |= NOHDR | PRINT;
341 break;
342 default:
343 usage();
344 }
345 }
346
347 if (optind != argc - 1)
348 usage();
349
350 dbuf.mv_size = 4096;
351 dbuf.mv_data = malloc(dbuf.mv_size);
352
353 if (!(mode & NOHDR))
354 readhdr();
355
356 envname = argv[optind];
357 rc = mdb_env_create(&env);
358 if (rc) {
359 fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc));
360 return EXIT_FAILURE;
361 }
362
363 mdb_env_set_maxdbs(env, 2);
364
365 if (info.me_maxreaders)
366 mdb_env_set_maxreaders(env, info.me_maxreaders);
367
368 if (info.me_mapsize)
369 mdb_env_set_mapsize(env, info.me_mapsize);
370
371 if (info.me_mapaddr)
372 envflags |= MDB_FIXEDMAP;
373
374 rc = mdb_env_open(env, envname, envflags, 0664);
375 if (rc) {
376 fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc));
377 goto env_close;
378 }
379
380 kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2;
381 kbuf.mv_data = malloc(kbuf.mv_size * 2);
382 k0buf.mv_size = kbuf.mv_size;
383 k0buf.mv_data = (char *)kbuf.mv_data + kbuf.mv_size;
384 prevk.mv_data = k0buf.mv_data;
385
386 while(!Eof) {
387 MDB_val key, data;
388 int batch = 0;
389 int appflag;
390
391 if (!dohdr) {
392 dohdr = 1;
393 } else if (!(mode & NOHDR))
394 readhdr();
395
396 rc = mdb_txn_begin(env, NULL, 0, &txn);
397 if (rc) {
398 fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
399 goto env_close;
400 }
401
402 rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi);
403 if (rc) {
404 fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc));
405 goto txn_abort;
406 }
407 prevk.mv_size = 0;
408 if (append) {
409 mdb_set_compare(txn, dbi, greater);
410 if (flags & MDB_DUPSORT)
411 mdb_set_dupsort(txn, dbi, greater);
412 }
413
414 rc = mdb_cursor_open(txn, dbi, &mc);
415 if (rc) {
416 fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
417 goto txn_abort;
418 }
419
420 while(1) {
421 rc = readline(&key, &kbuf);
422 if (rc) /* rc == EOF */
423 break;
424
425 rc = readline(&data, &dbuf);
426 if (rc) {
427 fprintf(stderr, "%s: line %"Yu": failed to read key value\n", prog, lineno);
428 goto txn_abort;
429 }
430
431 if (append) {
432 appflag = MDB_APPEND;
433 if (flags & MDB_DUPSORT) {
434 if (prevk.mv_size == key.mv_size && !memcmp(prevk.mv_data, key.mv_data, key.mv_size))
435 appflag = MDB_CURRENT|MDB_APPENDDUP;
436 else {
437 memcpy(prevk.mv_data, key.mv_data, key.mv_size);
438 prevk.mv_size = key.mv_size;
439 }
440 }
441 } else {
442 appflag = 0;
443 }
444 rc = mdb_cursor_put(mc, &key, &data, putflags|appflag);
445 if (rc == MDB_KEYEXIST && putflags)
446 continue;
447 if (rc) {
448 fprintf(stderr, "mdb_cursor_put failed, error %d %s\n", rc, mdb_strerror(rc));
449 goto txn_abort;
450 }
451 batch++;
452 if (batch == 100) {
453 rc = mdb_txn_commit(txn);
454 if (rc) {
455 fprintf(stderr, "%s: line %"Yu": txn_commit: %s\n",
456 prog, lineno, mdb_strerror(rc));
457 goto env_close;
458 }
459 rc = mdb_txn_begin(env, NULL, 0, &txn);
460 if (rc) {
461 fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
462 goto env_close;
463 }
464 rc = mdb_cursor_open(txn, dbi, &mc);
465 if (rc) {
466 fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
467 goto txn_abort;
468 }
469 if (appflag & MDB_APPENDDUP) {
470 MDB_val k, d;
471 mdb_cursor_get(mc, &k, &d, MDB_LAST);
472 }
473 batch = 0;
474 }
475 }
476 rc = mdb_txn_commit(txn);
477 txn = NULL;
478 if (rc) {
479 fprintf(stderr, "%s: line %"Yu": txn_commit: %s\n",
480 prog, lineno, mdb_strerror(rc));
481 goto env_close;
482 }
483 mdb_dbi_close(env, dbi);
484 }
485
486txn_abort:
487 mdb_txn_abort(txn);
488env_close:
489 mdb_env_close(env);
490
491 return rc ? EXIT_FAILURE : EXIT_SUCCESS;
492}
493