1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * pg_backup_archiver.h |
4 | * |
5 | * Private interface to the pg_dump archiver routines. |
6 | * It is NOT intended that these routines be called by any |
7 | * dumper directly. |
8 | * |
9 | * See the headers to pg_restore for more details. |
10 | * |
11 | * Copyright (c) 2000, Philip Warner |
12 | * Rights are granted to use this software in any way so long |
13 | * as this notice is not removed. |
14 | * |
15 | * The author is not responsible for loss or damages that may |
16 | * result from its use. |
17 | * |
18 | * |
19 | * IDENTIFICATION |
20 | * src/bin/pg_dump/pg_backup_archiver.h |
21 | * |
22 | *------------------------------------------------------------------------- |
23 | */ |
24 | #ifndef __PG_BACKUP_ARCHIVE__ |
25 | #define __PG_BACKUP_ARCHIVE__ |
26 | |
27 | |
28 | #include <time.h> |
29 | |
30 | #include "pg_backup.h" |
31 | |
32 | #include "libpq-fe.h" |
33 | #include "pqexpbuffer.h" |
34 | |
35 | #define LOBBUFSIZE 16384 |
36 | |
37 | /* |
38 | * Note: zlib.h must be included *after* libpq-fe.h, because the latter may |
39 | * include ssl.h, which has a naming conflict with zlib.h. |
40 | */ |
41 | #ifdef HAVE_LIBZ |
42 | #include <zlib.h> |
43 | #define GZCLOSE(fh) gzclose(fh) |
44 | #define GZWRITE(p, s, n, fh) gzwrite(fh, p, (n) * (s)) |
45 | #define GZREAD(p, s, n, fh) gzread(fh, p, (n) * (s)) |
46 | #define GZEOF(fh) gzeof(fh) |
47 | #else |
48 | #define GZCLOSE(fh) fclose(fh) |
49 | #define GZWRITE(p, s, n, fh) (fwrite(p, s, n, fh) * (s)) |
50 | #define GZREAD(p, s, n, fh) fread(p, s, n, fh) |
51 | #define GZEOF(fh) feof(fh) |
52 | /* this is just the redefinition of a libz constant */ |
53 | #define Z_DEFAULT_COMPRESSION (-1) |
54 | |
55 | typedef struct _z_stream |
56 | { |
57 | void *next_in; |
58 | void *next_out; |
59 | size_t avail_in; |
60 | size_t avail_out; |
61 | } z_stream; |
62 | typedef z_stream *z_streamp; |
63 | #endif |
64 | |
65 | /* Data block types */ |
66 | #define BLK_DATA 1 |
67 | #define BLK_BLOBS 3 |
68 | |
69 | /* Encode version components into a convenient integer <maj><min><rev> */ |
70 | #define MAKE_ARCHIVE_VERSION(major, minor, rev) (((major) * 256 + (minor)) * 256 + (rev)) |
71 | |
72 | #define ARCHIVE_MAJOR(version) (((version) >> 16) & 255) |
73 | #define ARCHIVE_MINOR(version) (((version) >> 8) & 255) |
74 | #define ARCHIVE_REV(version) (((version) ) & 255) |
75 | |
76 | /* Historical version numbers (checked in code) */ |
77 | #define K_VERS_1_0 MAKE_ARCHIVE_VERSION(1, 0, 0) |
78 | #define K_VERS_1_2 MAKE_ARCHIVE_VERSION(1, 2, 0) /* Allow No ZLIB */ |
79 | #define K_VERS_1_3 MAKE_ARCHIVE_VERSION(1, 3, 0) /* BLOBs */ |
80 | #define K_VERS_1_4 MAKE_ARCHIVE_VERSION(1, 4, 0) /* Date & name in header */ |
81 | #define K_VERS_1_5 MAKE_ARCHIVE_VERSION(1, 5, 0) /* Handle dependencies */ |
82 | #define K_VERS_1_6 MAKE_ARCHIVE_VERSION(1, 6, 0) /* Schema field in TOCs */ |
83 | #define K_VERS_1_7 MAKE_ARCHIVE_VERSION(1, 7, 0) /* File Offset size in |
84 | * header */ |
85 | #define K_VERS_1_8 MAKE_ARCHIVE_VERSION(1, 8, 0) /* change interpretation |
86 | * of ID numbers and |
87 | * dependencies */ |
88 | #define K_VERS_1_9 MAKE_ARCHIVE_VERSION(1, 9, 0) /* add default_with_oids |
89 | * tracking */ |
90 | #define K_VERS_1_10 MAKE_ARCHIVE_VERSION(1, 10, 0) /* add tablespace */ |
91 | #define K_VERS_1_11 MAKE_ARCHIVE_VERSION(1, 11, 0) /* add toc section |
92 | * indicator */ |
93 | #define K_VERS_1_12 MAKE_ARCHIVE_VERSION(1, 12, 0) /* add separate BLOB |
94 | * entries */ |
95 | #define K_VERS_1_13 MAKE_ARCHIVE_VERSION(1, 13, 0) /* change search_path |
96 | * behavior */ |
97 | #define K_VERS_1_14 MAKE_ARCHIVE_VERSION(1, 14, 0) /* add tableam */ |
98 | |
99 | /* Current archive version number (the format we can output) */ |
100 | #define K_VERS_MAJOR 1 |
101 | #define K_VERS_MINOR 14 |
102 | #define K_VERS_REV 0 |
103 | #define K_VERS_SELF MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, K_VERS_REV); |
104 | |
105 | /* Newest format we can read */ |
106 | #define K_VERS_MAX MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, 255) |
107 | |
108 | |
109 | /* Flags to indicate disposition of offsets stored in files */ |
110 | #define K_OFFSET_POS_NOT_SET 1 |
111 | #define K_OFFSET_POS_SET 2 |
112 | #define K_OFFSET_NO_DATA 3 |
113 | |
114 | /* |
115 | * Special exit values from worker children. We reserve 0 for normal |
116 | * success; 1 and other small values should be interpreted as crashes. |
117 | */ |
118 | #define WORKER_OK 0 |
119 | #define WORKER_CREATE_DONE 10 |
120 | #define WORKER_INHIBIT_DATA 11 |
121 | #define WORKER_IGNORED_ERRORS 12 |
122 | |
123 | typedef struct _archiveHandle ArchiveHandle; |
124 | typedef struct _tocEntry TocEntry; |
125 | struct ParallelState; |
126 | |
127 | #define READ_ERROR_EXIT(fd) \ |
128 | do { \ |
129 | if (feof(fd)) \ |
130 | fatal("could not read from input file: end of file"); \ |
131 | else \ |
132 | fatal("could not read from input file: %m"); \ |
133 | } while (0) |
134 | |
135 | #define WRITE_ERROR_EXIT \ |
136 | do { \ |
137 | fatal("could not write to output file: %m"); \ |
138 | } while (0) |
139 | |
140 | typedef enum T_Action |
141 | { |
142 | ACT_DUMP, |
143 | ACT_RESTORE |
144 | } T_Action; |
145 | |
146 | typedef void (*ClosePtrType) (ArchiveHandle *AH); |
147 | typedef void (*ReopenPtrType) (ArchiveHandle *AH); |
148 | typedef void (*ArchiveEntryPtrType) (ArchiveHandle *AH, TocEntry *te); |
149 | |
150 | typedef void (*StartDataPtrType) (ArchiveHandle *AH, TocEntry *te); |
151 | typedef void (*WriteDataPtrType) (ArchiveHandle *AH, const void *data, size_t dLen); |
152 | typedef void (*EndDataPtrType) (ArchiveHandle *AH, TocEntry *te); |
153 | |
154 | typedef void (*StartBlobsPtrType) (ArchiveHandle *AH, TocEntry *te); |
155 | typedef void (*StartBlobPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid); |
156 | typedef void (*EndBlobPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid); |
157 | typedef void (*EndBlobsPtrType) (ArchiveHandle *AH, TocEntry *te); |
158 | |
159 | typedef int (*WriteBytePtrType) (ArchiveHandle *AH, const int i); |
160 | typedef int (*ReadBytePtrType) (ArchiveHandle *AH); |
161 | typedef void (*WriteBufPtrType) (ArchiveHandle *AH, const void *c, size_t len); |
162 | typedef void (*ReadBufPtrType) (ArchiveHandle *AH, void *buf, size_t len); |
163 | typedef void (*) (ArchiveHandle *AH, TocEntry *te); |
164 | typedef void (*) (ArchiveHandle *AH, TocEntry *te); |
165 | typedef void (*) (ArchiveHandle *AH, TocEntry *te); |
166 | typedef void (*PrintTocDataPtrType) (ArchiveHandle *AH, TocEntry *te); |
167 | |
168 | typedef void (*PrepParallelRestorePtrType) (ArchiveHandle *AH); |
169 | typedef void (*ClonePtrType) (ArchiveHandle *AH); |
170 | typedef void (*DeClonePtrType) (ArchiveHandle *AH); |
171 | |
172 | typedef int (*WorkerJobDumpPtrType) (ArchiveHandle *AH, TocEntry *te); |
173 | typedef int (*WorkerJobRestorePtrType) (ArchiveHandle *AH, TocEntry *te); |
174 | |
175 | typedef size_t (*CustomOutPtrType) (ArchiveHandle *AH, const void *buf, size_t len); |
176 | |
177 | typedef enum |
178 | { |
179 | SQL_SCAN = 0, /* normal */ |
180 | SQL_IN_SINGLE_QUOTE, /* '...' literal */ |
181 | SQL_IN_DOUBLE_QUOTE /* "..." identifier */ |
182 | } sqlparseState; |
183 | |
184 | typedef struct |
185 | { |
186 | sqlparseState state; /* see above */ |
187 | bool backSlash; /* next char is backslash quoted? */ |
188 | PQExpBuffer curCmd; /* incomplete line (NULL if not created) */ |
189 | } sqlparseInfo; |
190 | |
191 | typedef enum |
192 | { |
193 | STAGE_NONE = 0, |
194 | STAGE_INITIALIZING, |
195 | STAGE_PROCESSING, |
196 | STAGE_FINALIZING |
197 | } ArchiverStage; |
198 | |
199 | typedef enum |
200 | { |
201 | OUTPUT_SQLCMDS = 0, /* emitting general SQL commands */ |
202 | OUTPUT_COPYDATA, /* writing COPY data */ |
203 | OUTPUT_OTHERDATA /* writing data as INSERT commands */ |
204 | } ArchiverOutput; |
205 | |
206 | /* |
207 | * For historical reasons, ACL items are interspersed with everything else in |
208 | * a dump file's TOC; typically they're right after the object they're for. |
209 | * However, we need to restore data before ACLs, as otherwise a read-only |
210 | * table (ie one where the owner has revoked her own INSERT privilege) causes |
211 | * data restore failures. On the other hand, matview REFRESH commands should |
212 | * come out after ACLs, as otherwise non-superuser-owned matviews might not |
213 | * be able to execute. (If the permissions at the time of dumping would not |
214 | * allow a REFRESH, too bad; we won't fix that for you.) These considerations |
215 | * force us to make three passes over the TOC, restoring the appropriate |
216 | * subset of items in each pass. We assume that the dependency sort resulted |
217 | * in an appropriate ordering of items within each subset. |
218 | * XXX This mechanism should be superseded by tracking dependencies on ACLs |
219 | * properly; but we'll still need it for old dump files even after that. |
220 | */ |
221 | typedef enum |
222 | { |
223 | RESTORE_PASS_MAIN = 0, /* Main pass (most TOC item types) */ |
224 | RESTORE_PASS_ACL, /* ACL item types */ |
225 | RESTORE_PASS_REFRESH /* Matview REFRESH items */ |
226 | |
227 | #define RESTORE_PASS_LAST RESTORE_PASS_REFRESH |
228 | } RestorePass; |
229 | |
230 | typedef enum |
231 | { |
232 | REQ_SCHEMA = 0x01, /* want schema */ |
233 | REQ_DATA = 0x02, /* want data */ |
234 | REQ_SPECIAL = 0x04 /* for special TOC entries */ |
235 | } teReqs; |
236 | |
237 | struct _archiveHandle |
238 | { |
239 | Archive public; /* Public part of archive */ |
240 | int version; /* Version of file */ |
241 | |
242 | char *archiveRemoteVersion; /* When reading an archive, the |
243 | * version of the dumped DB */ |
244 | char *archiveDumpVersion; /* When reading an archive, the version of |
245 | * the dumper */ |
246 | |
247 | size_t intSize; /* Size of an integer in the archive */ |
248 | size_t offSize; /* Size of a file offset in the archive - |
249 | * Added V1.7 */ |
250 | ArchiveFormat format; /* Archive format */ |
251 | |
252 | sqlparseInfo sqlparse; /* state for parsing INSERT data */ |
253 | |
254 | time_t createDate; /* Date archive created */ |
255 | |
256 | /* |
257 | * Fields used when discovering header. A format can always get the |
258 | * previous read bytes from here... |
259 | */ |
260 | int readHeader; /* Used if file header has been read already */ |
261 | char *lookahead; /* Buffer used when reading header to discover |
262 | * format */ |
263 | size_t lookaheadSize; /* Size of allocated buffer */ |
264 | size_t lookaheadLen; /* Length of data in lookahead */ |
265 | pgoff_t lookaheadPos; /* Current read position in lookahead buffer */ |
266 | |
267 | ArchiveEntryPtrType ArchiveEntryPtr; /* Called for each metadata object */ |
268 | StartDataPtrType StartDataPtr; /* Called when table data is about to be |
269 | * dumped */ |
270 | WriteDataPtrType WriteDataPtr; /* Called to send some table data to the |
271 | * archive */ |
272 | EndDataPtrType EndDataPtr; /* Called when table data dump is finished */ |
273 | WriteBytePtrType WriteBytePtr; /* Write a byte to output */ |
274 | ReadBytePtrType ReadBytePtr; /* Read a byte from an archive */ |
275 | WriteBufPtrType WriteBufPtr; /* Write a buffer of output to the archive */ |
276 | ReadBufPtrType ReadBufPtr; /* Read a buffer of input from the archive */ |
277 | ClosePtrType ClosePtr; /* Close the archive */ |
278 | ReopenPtrType ReopenPtr; /* Reopen the archive */ |
279 | WriteExtraTocPtrType WriteExtraTocPtr; /* Write extra TOC entry data |
280 | * associated with the current |
281 | * archive format */ |
282 | ReadExtraTocPtrType ReadExtraTocPtr; /* Read extra info associated with |
283 | * archive format */ |
284 | PrintExtraTocPtrType PrintExtraTocPtr; /* Extra TOC info for format */ |
285 | PrintTocDataPtrType PrintTocDataPtr; |
286 | |
287 | StartBlobsPtrType StartBlobsPtr; |
288 | EndBlobsPtrType EndBlobsPtr; |
289 | StartBlobPtrType StartBlobPtr; |
290 | EndBlobPtrType EndBlobPtr; |
291 | |
292 | SetupWorkerPtrType SetupWorkerPtr; |
293 | WorkerJobDumpPtrType WorkerJobDumpPtr; |
294 | WorkerJobRestorePtrType WorkerJobRestorePtr; |
295 | |
296 | PrepParallelRestorePtrType PrepParallelRestorePtr; |
297 | ClonePtrType ClonePtr; /* Clone format-specific fields */ |
298 | DeClonePtrType DeClonePtr; /* Clean up cloned fields */ |
299 | |
300 | CustomOutPtrType CustomOutPtr; /* Alternative script output routine */ |
301 | |
302 | /* Stuff for direct DB connection */ |
303 | char *archdbname; /* DB name *read* from archive */ |
304 | trivalue promptPassword; |
305 | char *savedPassword; /* password for ropt->username, if known */ |
306 | char *use_role; |
307 | PGconn *connection; |
308 | /* If connCancel isn't NULL, SIGINT handler will send a cancel */ |
309 | PGcancel *volatile connCancel; |
310 | |
311 | int connectToDB; /* Flag to indicate if direct DB connection is |
312 | * required */ |
313 | ArchiverOutput outputKind; /* Flag for what we're currently writing */ |
314 | bool pgCopyIn; /* Currently in libpq 'COPY IN' mode. */ |
315 | |
316 | int loFd; /* BLOB fd */ |
317 | int writingBlob; /* Flag */ |
318 | int blobCount; /* # of blobs restored */ |
319 | |
320 | char *fSpec; /* Archive File Spec */ |
321 | FILE *FH; /* General purpose file handle */ |
322 | void *OF; |
323 | int gzOut; /* Output file */ |
324 | |
325 | struct _tocEntry *toc; /* Header of circular list of TOC entries */ |
326 | int tocCount; /* Number of TOC entries */ |
327 | DumpId maxDumpId; /* largest DumpId among all TOC entries */ |
328 | |
329 | /* arrays created after the TOC list is complete: */ |
330 | struct _tocEntry **tocsByDumpId; /* TOCs indexed by dumpId */ |
331 | DumpId *tableDataId; /* TABLE DATA ids, indexed by table dumpId */ |
332 | |
333 | struct _tocEntry *currToc; /* Used when dumping data */ |
334 | int compression; /* Compression requested on open Possible |
335 | * values for compression: -1 |
336 | * Z_DEFAULT_COMPRESSION 0 COMPRESSION_NONE |
337 | * 1-9 levels for gzip compression */ |
338 | bool dosync; /* data requested to be synced on sight */ |
339 | ArchiveMode mode; /* File mode - r or w */ |
340 | void *formatData; /* Header data specific to file format */ |
341 | |
342 | /* these vars track state to avoid sending redundant SET commands */ |
343 | char *currUser; /* current username, or NULL if unknown */ |
344 | char *currSchema; /* current schema, or NULL */ |
345 | char *currTablespace; /* current tablespace, or NULL */ |
346 | char *currTableAm; /* current table access method, or NULL */ |
347 | |
348 | void *lo_buf; |
349 | size_t lo_buf_used; |
350 | size_t lo_buf_size; |
351 | |
352 | int noTocComments; |
353 | ArchiverStage stage; |
354 | ArchiverStage lastErrorStage; |
355 | RestorePass restorePass; /* used only during parallel restore */ |
356 | struct _tocEntry *currentTE; |
357 | struct _tocEntry *lastErrorTE; |
358 | }; |
359 | |
360 | struct _tocEntry |
361 | { |
362 | struct _tocEntry *prev; |
363 | struct _tocEntry *next; |
364 | CatalogId catalogId; |
365 | DumpId dumpId; |
366 | teSection section; |
367 | bool hadDumper; /* Archiver was passed a dumper routine (used |
368 | * in restore) */ |
369 | char *tag; /* index tag */ |
370 | char *namespace; /* null or empty string if not in a schema */ |
371 | char *tablespace; /* null if not in a tablespace; empty string |
372 | * means use database default */ |
373 | char *tableam; /* table access method, only for TABLE tags */ |
374 | char *owner; |
375 | char *desc; |
376 | char *defn; |
377 | char *dropStmt; |
378 | char *copyStmt; |
379 | DumpId *dependencies; /* dumpIds of objects this one depends on */ |
380 | int nDeps; /* number of dependencies */ |
381 | |
382 | DataDumperPtr dataDumper; /* Routine to dump data for object */ |
383 | void *dataDumperArg; /* Arg for above routine */ |
384 | void *formatData; /* TOC Entry data specific to file format */ |
385 | |
386 | /* working state while dumping/restoring */ |
387 | pgoff_t dataLength; /* item's data size; 0 if none or unknown */ |
388 | teReqs reqs; /* do we need schema and/or data of object */ |
389 | bool created; /* set for DATA member if TABLE was created */ |
390 | |
391 | /* working state (needed only for parallel restore) */ |
392 | struct _tocEntry *pending_prev; /* list links for pending-items list; */ |
393 | struct _tocEntry *pending_next; /* NULL if not in that list */ |
394 | int depCount; /* number of dependencies not yet restored */ |
395 | DumpId *revDeps; /* dumpIds of objects depending on this one */ |
396 | int nRevDeps; /* number of such dependencies */ |
397 | DumpId *lockDeps; /* dumpIds of objects this one needs lock on */ |
398 | int nLockDeps; /* number of such dependencies */ |
399 | }; |
400 | |
401 | extern int parallel_restore(ArchiveHandle *AH, TocEntry *te); |
402 | extern void on_exit_close_archive(Archive *AHX); |
403 | |
404 | extern void warn_or_exit_horribly(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3); |
405 | |
406 | /* Options for ArchiveEntry */ |
407 | typedef struct _archiveOpts |
408 | { |
409 | const char *tag; |
410 | const char *namespace; |
411 | const char *tablespace; |
412 | const char *tableam; |
413 | const char *owner; |
414 | const char *description; |
415 | teSection section; |
416 | const char *createStmt; |
417 | const char *dropStmt; |
418 | const char *copyStmt; |
419 | const DumpId *deps; |
420 | int nDeps; |
421 | DataDumperPtr dumpFn; |
422 | void *dumpArg; |
423 | } ArchiveOpts; |
424 | #define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__} |
425 | /* Called to add a TOC entry */ |
426 | extern TocEntry *ArchiveEntry(Archive *AHX, CatalogId catalogId, |
427 | DumpId dumpId, ArchiveOpts *opts); |
428 | |
429 | extern void WriteTOC(ArchiveHandle *AH); |
430 | extern void ReadTOC(ArchiveHandle *AH); |
431 | extern void WriteHead(ArchiveHandle *AH); |
432 | extern void ReadHead(ArchiveHandle *AH); |
433 | extern void WriteToc(ArchiveHandle *AH); |
434 | extern void ReadToc(ArchiveHandle *AH); |
435 | extern void WriteDataChunks(ArchiveHandle *AH, struct ParallelState *pstate); |
436 | extern void WriteDataChunksForTocEntry(ArchiveHandle *AH, TocEntry *te); |
437 | extern ArchiveHandle *CloneArchive(ArchiveHandle *AH); |
438 | extern void DeCloneArchive(ArchiveHandle *AH); |
439 | |
440 | extern teReqs TocIDRequired(ArchiveHandle *AH, DumpId id); |
441 | TocEntry *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id); |
442 | extern bool checkSeek(FILE *fp); |
443 | |
444 | #define appendStringLiteralAHX(buf,str,AH) \ |
445 | appendStringLiteral(buf, str, (AH)->public.encoding, (AH)->public.std_strings) |
446 | |
447 | #define appendByteaLiteralAHX(buf,str,len,AH) \ |
448 | appendByteaLiteral(buf, str, len, (AH)->public.std_strings) |
449 | |
450 | /* |
451 | * Mandatory routines for each supported format |
452 | */ |
453 | |
454 | extern size_t WriteInt(ArchiveHandle *AH, int i); |
455 | extern int ReadInt(ArchiveHandle *AH); |
456 | extern char *ReadStr(ArchiveHandle *AH); |
457 | extern size_t WriteStr(ArchiveHandle *AH, const char *s); |
458 | |
459 | int ReadOffset(ArchiveHandle *, pgoff_t *); |
460 | size_t WriteOffset(ArchiveHandle *, pgoff_t, int); |
461 | |
462 | extern void StartRestoreBlobs(ArchiveHandle *AH); |
463 | extern void StartRestoreBlob(ArchiveHandle *AH, Oid oid, bool drop); |
464 | extern void EndRestoreBlob(ArchiveHandle *AH, Oid oid); |
465 | extern void EndRestoreBlobs(ArchiveHandle *AH); |
466 | |
467 | extern void InitArchiveFmt_Custom(ArchiveHandle *AH); |
468 | extern void InitArchiveFmt_Null(ArchiveHandle *AH); |
469 | extern void InitArchiveFmt_Directory(ArchiveHandle *AH); |
470 | extern void InitArchiveFmt_Tar(ArchiveHandle *AH); |
471 | |
472 | extern bool (char *); |
473 | |
474 | extern void ReconnectToServer(ArchiveHandle *AH, const char *dbname, const char *newUser); |
475 | extern void DropBlobIfExists(ArchiveHandle *AH, Oid oid); |
476 | |
477 | void ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH); |
478 | int ahprintf(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3); |
479 | |
480 | #endif |
481 | |