1 | /* mdb_load.c - memory-mapped database load tool */ |
2 | /* |
3 | * Copyright 2011-2018 Howard Chu, Symas Corp. |
4 | * All rights reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted only as authorized by the OpenLDAP |
8 | * Public License. |
9 | * |
10 | * A copy of this license is available in the file LICENSE in the |
11 | * top-level directory of the distribution or, alternatively, at |
12 | * <http://www.OpenLDAP.org/license.html>. |
13 | */ |
14 | #include <stdio.h> |
15 | #include <stdlib.h> |
16 | #include <errno.h> |
17 | #include <string.h> |
18 | #include <ctype.h> |
19 | #include <unistd.h> |
20 | #include "lmdb.h" |
21 | |
22 | #define PRINT 1 |
23 | #define NOHDR 2 |
24 | static int mode; |
25 | |
26 | static char *subname = NULL; |
27 | |
28 | static mdb_size_t lineno; |
29 | static int version; |
30 | |
31 | static int flags; |
32 | |
33 | static char *prog; |
34 | |
35 | static int Eof; |
36 | |
37 | static MDB_envinfo info; |
38 | |
39 | static MDB_val kbuf, dbuf; |
40 | static MDB_val k0buf; |
41 | |
42 | #define Yu MDB_PRIy(u) |
43 | |
44 | #define STRLENOF(s) (sizeof(s)-1) |
45 | |
46 | typedef struct flagbit { |
47 | int bit; |
48 | char *name; |
49 | int len; |
50 | } flagbit; |
51 | |
52 | #define S(s) s, STRLENOF(s) |
53 | |
54 | flagbit dbflags[] = { |
55 | { MDB_REVERSEKEY, S("reversekey" ) }, |
56 | { MDB_DUPSORT, S("dupsort" ) }, |
57 | { MDB_INTEGERKEY, S("integerkey" ) }, |
58 | { MDB_DUPFIXED, S("dupfixed" ) }, |
59 | { MDB_INTEGERDUP, S("integerdup" ) }, |
60 | { MDB_REVERSEDUP, S("reversedup" ) }, |
61 | { 0, NULL, 0 } |
62 | }; |
63 | |
64 | static void readhdr(void) |
65 | { |
66 | char *ptr; |
67 | |
68 | flags = 0; |
69 | while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) { |
70 | lineno++; |
71 | if (!strncmp(dbuf.mv_data, "VERSION=" , STRLENOF("VERSION=" ))) { |
72 | version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION=" )); |
73 | if (version > 3) { |
74 | fprintf(stderr, "%s: line %" Yu": unsupported VERSION %d\n" , |
75 | prog, lineno, version); |
76 | exit(EXIT_FAILURE); |
77 | } |
78 | } else if (!strncmp(dbuf.mv_data, "HEADER=END" , STRLENOF("HEADER=END" ))) { |
79 | break; |
80 | } else if (!strncmp(dbuf.mv_data, "format=" , STRLENOF("format=" ))) { |
81 | if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT=" ), "print" , STRLENOF("print" ))) |
82 | mode |= PRINT; |
83 | else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT=" ), "bytevalue" , STRLENOF("bytevalue" ))) { |
84 | fprintf(stderr, "%s: line %" Yu": unsupported FORMAT %s\n" , |
85 | prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT=" )); |
86 | exit(EXIT_FAILURE); |
87 | } |
88 | } else if (!strncmp(dbuf.mv_data, "database=" , STRLENOF("database=" ))) { |
89 | ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); |
90 | if (ptr) *ptr = '\0'; |
91 | if (subname) free(subname); |
92 | subname = strdup((char *)dbuf.mv_data+STRLENOF("database=" )); |
93 | } else if (!strncmp(dbuf.mv_data, "type=" , STRLENOF("type=" ))) { |
94 | if (strncmp((char *)dbuf.mv_data+STRLENOF("type=" ), "btree" , STRLENOF("btree" ))) { |
95 | fprintf(stderr, "%s: line %" Yu": unsupported type %s\n" , |
96 | prog, lineno, (char *)dbuf.mv_data+STRLENOF("type=" )); |
97 | exit(EXIT_FAILURE); |
98 | } |
99 | } else if (!strncmp(dbuf.mv_data, "mapaddr=" , STRLENOF("mapaddr=" ))) { |
100 | int i; |
101 | ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); |
102 | if (ptr) *ptr = '\0'; |
103 | i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr=" ), "%p" , &info.me_mapaddr); |
104 | if (i != 1) { |
105 | fprintf(stderr, "%s: line %" Yu": invalid mapaddr %s\n" , |
106 | prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr=" )); |
107 | exit(EXIT_FAILURE); |
108 | } |
109 | } else if (!strncmp(dbuf.mv_data, "mapsize=" , STRLENOF("mapsize=" ))) { |
110 | int i; |
111 | ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); |
112 | if (ptr) *ptr = '\0'; |
113 | i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize=" ), |
114 | "%" MDB_SCNy(u), &info.me_mapsize); |
115 | if (i != 1) { |
116 | fprintf(stderr, "%s: line %" Yu": invalid mapsize %s\n" , |
117 | prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=" )); |
118 | exit(EXIT_FAILURE); |
119 | } |
120 | } else if (!strncmp(dbuf.mv_data, "maxreaders=" , STRLENOF("maxreaders=" ))) { |
121 | int i; |
122 | ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); |
123 | if (ptr) *ptr = '\0'; |
124 | i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders=" ), "%u" , &info.me_maxreaders); |
125 | if (i != 1) { |
126 | fprintf(stderr, "%s: line %" Yu": invalid maxreaders %s\n" , |
127 | prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders=" )); |
128 | exit(EXIT_FAILURE); |
129 | } |
130 | } else { |
131 | int i; |
132 | for (i=0; dbflags[i].bit; i++) { |
133 | if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) && |
134 | ((char *)dbuf.mv_data)[dbflags[i].len] == '=') { |
135 | flags |= dbflags[i].bit; |
136 | break; |
137 | } |
138 | } |
139 | if (!dbflags[i].bit) { |
140 | ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size); |
141 | if (!ptr) { |
142 | fprintf(stderr, "%s: line %" Yu": unexpected format\n" , |
143 | prog, lineno); |
144 | exit(EXIT_FAILURE); |
145 | } else { |
146 | *ptr = '\0'; |
147 | fprintf(stderr, "%s: line %" Yu": unrecognized keyword ignored: %s\n" , |
148 | prog, lineno, (char *)dbuf.mv_data); |
149 | } |
150 | } |
151 | } |
152 | } |
153 | } |
154 | |
155 | static void badend(void) |
156 | { |
157 | fprintf(stderr, "%s: line %" Yu": unexpected end of input\n" , |
158 | prog, lineno); |
159 | } |
160 | |
161 | static int unhex(unsigned char *c2) |
162 | { |
163 | int x, c; |
164 | x = *c2++ & 0x4f; |
165 | if (x & 0x40) |
166 | x -= 55; |
167 | c = x << 4; |
168 | x = *c2 & 0x4f; |
169 | if (x & 0x40) |
170 | x -= 55; |
171 | c |= x; |
172 | return c; |
173 | } |
174 | |
175 | static int readline(MDB_val *out, MDB_val *buf) |
176 | { |
177 | unsigned char *c1, *c2, *end; |
178 | size_t len, l2; |
179 | int c; |
180 | |
181 | if (!(mode & NOHDR)) { |
182 | c = fgetc(stdin); |
183 | if (c == EOF) { |
184 | Eof = 1; |
185 | return EOF; |
186 | } |
187 | if (c != ' ') { |
188 | lineno++; |
189 | if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { |
190 | badend: |
191 | Eof = 1; |
192 | badend(); |
193 | return EOF; |
194 | } |
195 | if (c == 'D' && !strncmp(buf->mv_data, "ATA=END" , STRLENOF("ATA=END" ))) |
196 | return EOF; |
197 | goto badend; |
198 | } |
199 | } |
200 | if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { |
201 | Eof = 1; |
202 | return EOF; |
203 | } |
204 | lineno++; |
205 | |
206 | c1 = buf->mv_data; |
207 | len = strlen((char *)c1); |
208 | l2 = len; |
209 | |
210 | /* Is buffer too short? */ |
211 | while (c1[len-1] != '\n') { |
212 | buf->mv_data = realloc(buf->mv_data, buf->mv_size*2); |
213 | if (!buf->mv_data) { |
214 | Eof = 1; |
215 | fprintf(stderr, "%s: line %" Yu": out of memory, line too long\n" , |
216 | prog, lineno); |
217 | return EOF; |
218 | } |
219 | c1 = buf->mv_data; |
220 | c1 += l2; |
221 | if (fgets((char *)c1, buf->mv_size+1, stdin) == NULL) { |
222 | Eof = 1; |
223 | badend(); |
224 | return EOF; |
225 | } |
226 | buf->mv_size *= 2; |
227 | len = strlen((char *)c1); |
228 | l2 += len; |
229 | } |
230 | c1 = c2 = buf->mv_data; |
231 | len = l2; |
232 | c1[--len] = '\0'; |
233 | end = c1 + len; |
234 | |
235 | if (mode & PRINT) { |
236 | while (c2 < end) { |
237 | if (*c2 == '\\') { |
238 | if (c2[1] == '\\') { |
239 | c1++; c2 += 2; |
240 | } else { |
241 | if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) { |
242 | Eof = 1; |
243 | badend(); |
244 | return EOF; |
245 | } |
246 | *c1++ = unhex(++c2); |
247 | c2 += 2; |
248 | } |
249 | } else { |
250 | /* copies are redundant when no escapes were used */ |
251 | *c1++ = *c2++; |
252 | } |
253 | } |
254 | } else { |
255 | /* odd length not allowed */ |
256 | if (len & 1) { |
257 | Eof = 1; |
258 | badend(); |
259 | return EOF; |
260 | } |
261 | while (c2 < end) { |
262 | if (!isxdigit(*c2) || !isxdigit(c2[1])) { |
263 | Eof = 1; |
264 | badend(); |
265 | return EOF; |
266 | } |
267 | *c1++ = unhex(c2); |
268 | c2 += 2; |
269 | } |
270 | } |
271 | c2 = out->mv_data = buf->mv_data; |
272 | out->mv_size = c1 - c2; |
273 | |
274 | return 0; |
275 | } |
276 | |
277 | static void usage(void) |
278 | { |
279 | fprintf(stderr, "usage: %s [-V] [-a] [-f input] [-n] [-s name] [-N] [-T] dbpath\n" , prog); |
280 | exit(EXIT_FAILURE); |
281 | } |
282 | |
283 | static int greater(const MDB_val *a, const MDB_val *b) |
284 | { |
285 | return 1; |
286 | } |
287 | |
288 | int main(int argc, char *argv[]) |
289 | { |
290 | int i, rc; |
291 | MDB_env *env; |
292 | MDB_txn *txn; |
293 | MDB_cursor *mc; |
294 | MDB_dbi dbi; |
295 | char *envname; |
296 | int envflags = MDB_NOSYNC, putflags = 0; |
297 | int dohdr = 0, append = 0; |
298 | MDB_val prevk; |
299 | |
300 | prog = argv[0]; |
301 | |
302 | if (argc < 2) { |
303 | usage(); |
304 | } |
305 | |
306 | /* -a: append records in input order |
307 | * -f: load file instead of stdin |
308 | * -n: use NOSUBDIR flag on env_open |
309 | * -s: load into named subDB |
310 | * -N: use NOOVERWRITE on puts |
311 | * -T: read plaintext |
312 | * -V: print version and exit |
313 | */ |
314 | while ((i = getopt(argc, argv, "af:ns:NTV" )) != EOF) { |
315 | switch(i) { |
316 | case 'V': |
317 | printf("%s\n" , MDB_VERSION_STRING); |
318 | exit(0); |
319 | break; |
320 | case 'a': |
321 | append = 1; |
322 | break; |
323 | case 'f': |
324 | if (freopen(optarg, "r" , stdin) == NULL) { |
325 | fprintf(stderr, "%s: %s: reopen: %s\n" , |
326 | prog, optarg, strerror(errno)); |
327 | exit(EXIT_FAILURE); |
328 | } |
329 | break; |
330 | case 'n': |
331 | envflags |= MDB_NOSUBDIR; |
332 | break; |
333 | case 's': |
334 | subname = strdup(optarg); |
335 | break; |
336 | case 'N': |
337 | putflags = MDB_NOOVERWRITE|MDB_NODUPDATA; |
338 | break; |
339 | case 'T': |
340 | mode |= NOHDR | PRINT; |
341 | break; |
342 | default: |
343 | usage(); |
344 | } |
345 | } |
346 | |
347 | if (optind != argc - 1) |
348 | usage(); |
349 | |
350 | dbuf.mv_size = 4096; |
351 | dbuf.mv_data = malloc(dbuf.mv_size); |
352 | |
353 | if (!(mode & NOHDR)) |
354 | readhdr(); |
355 | |
356 | envname = argv[optind]; |
357 | rc = mdb_env_create(&env); |
358 | if (rc) { |
359 | fprintf(stderr, "mdb_env_create failed, error %d %s\n" , rc, mdb_strerror(rc)); |
360 | return EXIT_FAILURE; |
361 | } |
362 | |
363 | mdb_env_set_maxdbs(env, 2); |
364 | |
365 | if (info.me_maxreaders) |
366 | mdb_env_set_maxreaders(env, info.me_maxreaders); |
367 | |
368 | if (info.me_mapsize) |
369 | mdb_env_set_mapsize(env, info.me_mapsize); |
370 | |
371 | if (info.me_mapaddr) |
372 | envflags |= MDB_FIXEDMAP; |
373 | |
374 | rc = mdb_env_open(env, envname, envflags, 0664); |
375 | if (rc) { |
376 | fprintf(stderr, "mdb_env_open failed, error %d %s\n" , rc, mdb_strerror(rc)); |
377 | goto env_close; |
378 | } |
379 | |
380 | kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2; |
381 | kbuf.mv_data = malloc(kbuf.mv_size * 2); |
382 | k0buf.mv_size = kbuf.mv_size; |
383 | k0buf.mv_data = (char *)kbuf.mv_data + kbuf.mv_size; |
384 | prevk.mv_data = k0buf.mv_data; |
385 | |
386 | while(!Eof) { |
387 | MDB_val key, data; |
388 | int batch = 0; |
389 | int appflag; |
390 | |
391 | if (!dohdr) { |
392 | dohdr = 1; |
393 | } else if (!(mode & NOHDR)) |
394 | readhdr(); |
395 | |
396 | rc = mdb_txn_begin(env, NULL, 0, &txn); |
397 | if (rc) { |
398 | fprintf(stderr, "mdb_txn_begin failed, error %d %s\n" , rc, mdb_strerror(rc)); |
399 | goto env_close; |
400 | } |
401 | |
402 | rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi); |
403 | if (rc) { |
404 | fprintf(stderr, "mdb_open failed, error %d %s\n" , rc, mdb_strerror(rc)); |
405 | goto txn_abort; |
406 | } |
407 | prevk.mv_size = 0; |
408 | if (append) { |
409 | mdb_set_compare(txn, dbi, greater); |
410 | if (flags & MDB_DUPSORT) |
411 | mdb_set_dupsort(txn, dbi, greater); |
412 | } |
413 | |
414 | rc = mdb_cursor_open(txn, dbi, &mc); |
415 | if (rc) { |
416 | fprintf(stderr, "mdb_cursor_open failed, error %d %s\n" , rc, mdb_strerror(rc)); |
417 | goto txn_abort; |
418 | } |
419 | |
420 | while(1) { |
421 | rc = readline(&key, &kbuf); |
422 | if (rc) /* rc == EOF */ |
423 | break; |
424 | |
425 | rc = readline(&data, &dbuf); |
426 | if (rc) { |
427 | fprintf(stderr, "%s: line %" Yu": failed to read key value\n" , prog, lineno); |
428 | goto txn_abort; |
429 | } |
430 | |
431 | if (append) { |
432 | appflag = MDB_APPEND; |
433 | if (flags & MDB_DUPSORT) { |
434 | if (prevk.mv_size == key.mv_size && !memcmp(prevk.mv_data, key.mv_data, key.mv_size)) |
435 | appflag = MDB_CURRENT|MDB_APPENDDUP; |
436 | else { |
437 | memcpy(prevk.mv_data, key.mv_data, key.mv_size); |
438 | prevk.mv_size = key.mv_size; |
439 | } |
440 | } |
441 | } else { |
442 | appflag = 0; |
443 | } |
444 | rc = mdb_cursor_put(mc, &key, &data, putflags|appflag); |
445 | if (rc == MDB_KEYEXIST && putflags) |
446 | continue; |
447 | if (rc) { |
448 | fprintf(stderr, "mdb_cursor_put failed, error %d %s\n" , rc, mdb_strerror(rc)); |
449 | goto txn_abort; |
450 | } |
451 | batch++; |
452 | if (batch == 100) { |
453 | rc = mdb_txn_commit(txn); |
454 | if (rc) { |
455 | fprintf(stderr, "%s: line %" Yu": txn_commit: %s\n" , |
456 | prog, lineno, mdb_strerror(rc)); |
457 | goto env_close; |
458 | } |
459 | rc = mdb_txn_begin(env, NULL, 0, &txn); |
460 | if (rc) { |
461 | fprintf(stderr, "mdb_txn_begin failed, error %d %s\n" , rc, mdb_strerror(rc)); |
462 | goto env_close; |
463 | } |
464 | rc = mdb_cursor_open(txn, dbi, &mc); |
465 | if (rc) { |
466 | fprintf(stderr, "mdb_cursor_open failed, error %d %s\n" , rc, mdb_strerror(rc)); |
467 | goto txn_abort; |
468 | } |
469 | if (appflag & MDB_APPENDDUP) { |
470 | MDB_val k, d; |
471 | mdb_cursor_get(mc, &k, &d, MDB_LAST); |
472 | } |
473 | batch = 0; |
474 | } |
475 | } |
476 | rc = mdb_txn_commit(txn); |
477 | txn = NULL; |
478 | if (rc) { |
479 | fprintf(stderr, "%s: line %" Yu": txn_commit: %s\n" , |
480 | prog, lineno, mdb_strerror(rc)); |
481 | goto env_close; |
482 | } |
483 | mdb_dbi_close(env, dbi); |
484 | } |
485 | |
486 | txn_abort: |
487 | mdb_txn_abort(txn); |
488 | env_close: |
489 | mdb_env_close(env); |
490 | |
491 | return rc ? EXIT_FAILURE : EXIT_SUCCESS; |
492 | } |
493 | |