1/*-------------------------------------------------------------------------
2 *
3 * pg_backup_archiver.h
4 *
5 * Private interface to the pg_dump archiver routines.
6 * It is NOT intended that these routines be called by any
7 * dumper directly.
8 *
9 * See the headers to pg_restore for more details.
10 *
11 * Copyright (c) 2000, Philip Warner
12 * Rights are granted to use this software in any way so long
13 * as this notice is not removed.
14 *
15 * The author is not responsible for loss or damages that may
16 * result from its use.
17 *
18 *
19 * IDENTIFICATION
20 * src/bin/pg_dump/pg_backup_archiver.h
21 *
22 *-------------------------------------------------------------------------
23 */
24#ifndef __PG_BACKUP_ARCHIVE__
25#define __PG_BACKUP_ARCHIVE__
26
27
28#include <time.h>
29
30#include "pg_backup.h"
31
32#include "libpq-fe.h"
33#include "pqexpbuffer.h"
34
35#define LOBBUFSIZE 16384
36
37/*
38 * Note: zlib.h must be included *after* libpq-fe.h, because the latter may
39 * include ssl.h, which has a naming conflict with zlib.h.
40 */
41#ifdef HAVE_LIBZ
42#include <zlib.h>
43#define GZCLOSE(fh) gzclose(fh)
44#define GZWRITE(p, s, n, fh) gzwrite(fh, p, (n) * (s))
45#define GZREAD(p, s, n, fh) gzread(fh, p, (n) * (s))
46#define GZEOF(fh) gzeof(fh)
47#else
48#define GZCLOSE(fh) fclose(fh)
49#define GZWRITE(p, s, n, fh) (fwrite(p, s, n, fh) * (s))
50#define GZREAD(p, s, n, fh) fread(p, s, n, fh)
51#define GZEOF(fh) feof(fh)
52/* this is just the redefinition of a libz constant */
53#define Z_DEFAULT_COMPRESSION (-1)
54
55typedef struct _z_stream
56{
57 void *next_in;
58 void *next_out;
59 size_t avail_in;
60 size_t avail_out;
61} z_stream;
62typedef z_stream *z_streamp;
63#endif
64
65/* Data block types */
66#define BLK_DATA 1
67#define BLK_BLOBS 3
68
69/* Encode version components into a convenient integer <maj><min><rev> */
70#define MAKE_ARCHIVE_VERSION(major, minor, rev) (((major) * 256 + (minor)) * 256 + (rev))
71
72#define ARCHIVE_MAJOR(version) (((version) >> 16) & 255)
73#define ARCHIVE_MINOR(version) (((version) >> 8) & 255)
74#define ARCHIVE_REV(version) (((version) ) & 255)
75
76/* Historical version numbers (checked in code) */
77#define K_VERS_1_0 MAKE_ARCHIVE_VERSION(1, 0, 0)
78#define K_VERS_1_2 MAKE_ARCHIVE_VERSION(1, 2, 0) /* Allow No ZLIB */
79#define K_VERS_1_3 MAKE_ARCHIVE_VERSION(1, 3, 0) /* BLOBs */
80#define K_VERS_1_4 MAKE_ARCHIVE_VERSION(1, 4, 0) /* Date & name in header */
81#define K_VERS_1_5 MAKE_ARCHIVE_VERSION(1, 5, 0) /* Handle dependencies */
82#define K_VERS_1_6 MAKE_ARCHIVE_VERSION(1, 6, 0) /* Schema field in TOCs */
83#define K_VERS_1_7 MAKE_ARCHIVE_VERSION(1, 7, 0) /* File Offset size in
84 * header */
85#define K_VERS_1_8 MAKE_ARCHIVE_VERSION(1, 8, 0) /* change interpretation
86 * of ID numbers and
87 * dependencies */
88#define K_VERS_1_9 MAKE_ARCHIVE_VERSION(1, 9, 0) /* add default_with_oids
89 * tracking */
90#define K_VERS_1_10 MAKE_ARCHIVE_VERSION(1, 10, 0) /* add tablespace */
91#define K_VERS_1_11 MAKE_ARCHIVE_VERSION(1, 11, 0) /* add toc section
92 * indicator */
93#define K_VERS_1_12 MAKE_ARCHIVE_VERSION(1, 12, 0) /* add separate BLOB
94 * entries */
95#define K_VERS_1_13 MAKE_ARCHIVE_VERSION(1, 13, 0) /* change search_path
96 * behavior */
97#define K_VERS_1_14 MAKE_ARCHIVE_VERSION(1, 14, 0) /* add tableam */
98
99/* Current archive version number (the format we can output) */
100#define K_VERS_MAJOR 1
101#define K_VERS_MINOR 14
102#define K_VERS_REV 0
103#define K_VERS_SELF MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, K_VERS_REV);
104
105/* Newest format we can read */
106#define K_VERS_MAX MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, 255)
107
108
109/* Flags to indicate disposition of offsets stored in files */
110#define K_OFFSET_POS_NOT_SET 1
111#define K_OFFSET_POS_SET 2
112#define K_OFFSET_NO_DATA 3
113
114/*
115 * Special exit values from worker children. We reserve 0 for normal
116 * success; 1 and other small values should be interpreted as crashes.
117 */
118#define WORKER_OK 0
119#define WORKER_CREATE_DONE 10
120#define WORKER_INHIBIT_DATA 11
121#define WORKER_IGNORED_ERRORS 12
122
123typedef struct _archiveHandle ArchiveHandle;
124typedef struct _tocEntry TocEntry;
125struct ParallelState;
126
127#define READ_ERROR_EXIT(fd) \
128 do { \
129 if (feof(fd)) \
130 fatal("could not read from input file: end of file"); \
131 else \
132 fatal("could not read from input file: %m"); \
133 } while (0)
134
135#define WRITE_ERROR_EXIT \
136 do { \
137 fatal("could not write to output file: %m"); \
138 } while (0)
139
140typedef enum T_Action
141{
142 ACT_DUMP,
143 ACT_RESTORE
144} T_Action;
145
146typedef void (*ClosePtrType) (ArchiveHandle *AH);
147typedef void (*ReopenPtrType) (ArchiveHandle *AH);
148typedef void (*ArchiveEntryPtrType) (ArchiveHandle *AH, TocEntry *te);
149
150typedef void (*StartDataPtrType) (ArchiveHandle *AH, TocEntry *te);
151typedef void (*WriteDataPtrType) (ArchiveHandle *AH, const void *data, size_t dLen);
152typedef void (*EndDataPtrType) (ArchiveHandle *AH, TocEntry *te);
153
154typedef void (*StartBlobsPtrType) (ArchiveHandle *AH, TocEntry *te);
155typedef void (*StartBlobPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid);
156typedef void (*EndBlobPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid);
157typedef void (*EndBlobsPtrType) (ArchiveHandle *AH, TocEntry *te);
158
159typedef int (*WriteBytePtrType) (ArchiveHandle *AH, const int i);
160typedef int (*ReadBytePtrType) (ArchiveHandle *AH);
161typedef void (*WriteBufPtrType) (ArchiveHandle *AH, const void *c, size_t len);
162typedef void (*ReadBufPtrType) (ArchiveHandle *AH, void *buf, size_t len);
163typedef void (*WriteExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
164typedef void (*ReadExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
165typedef void (*PrintExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
166typedef void (*PrintTocDataPtrType) (ArchiveHandle *AH, TocEntry *te);
167
168typedef void (*PrepParallelRestorePtrType) (ArchiveHandle *AH);
169typedef void (*ClonePtrType) (ArchiveHandle *AH);
170typedef void (*DeClonePtrType) (ArchiveHandle *AH);
171
172typedef int (*WorkerJobDumpPtrType) (ArchiveHandle *AH, TocEntry *te);
173typedef int (*WorkerJobRestorePtrType) (ArchiveHandle *AH, TocEntry *te);
174
175typedef size_t (*CustomOutPtrType) (ArchiveHandle *AH, const void *buf, size_t len);
176
177typedef enum
178{
179 SQL_SCAN = 0, /* normal */
180 SQL_IN_SINGLE_QUOTE, /* '...' literal */
181 SQL_IN_DOUBLE_QUOTE /* "..." identifier */
182} sqlparseState;
183
184typedef struct
185{
186 sqlparseState state; /* see above */
187 bool backSlash; /* next char is backslash quoted? */
188 PQExpBuffer curCmd; /* incomplete line (NULL if not created) */
189} sqlparseInfo;
190
191typedef enum
192{
193 STAGE_NONE = 0,
194 STAGE_INITIALIZING,
195 STAGE_PROCESSING,
196 STAGE_FINALIZING
197} ArchiverStage;
198
199typedef enum
200{
201 OUTPUT_SQLCMDS = 0, /* emitting general SQL commands */
202 OUTPUT_COPYDATA, /* writing COPY data */
203 OUTPUT_OTHERDATA /* writing data as INSERT commands */
204} ArchiverOutput;
205
206/*
207 * For historical reasons, ACL items are interspersed with everything else in
208 * a dump file's TOC; typically they're right after the object they're for.
209 * However, we need to restore data before ACLs, as otherwise a read-only
210 * table (ie one where the owner has revoked her own INSERT privilege) causes
211 * data restore failures. On the other hand, matview REFRESH commands should
212 * come out after ACLs, as otherwise non-superuser-owned matviews might not
213 * be able to execute. (If the permissions at the time of dumping would not
214 * allow a REFRESH, too bad; we won't fix that for you.) These considerations
215 * force us to make three passes over the TOC, restoring the appropriate
216 * subset of items in each pass. We assume that the dependency sort resulted
217 * in an appropriate ordering of items within each subset.
218 * XXX This mechanism should be superseded by tracking dependencies on ACLs
219 * properly; but we'll still need it for old dump files even after that.
220 */
221typedef enum
222{
223 RESTORE_PASS_MAIN = 0, /* Main pass (most TOC item types) */
224 RESTORE_PASS_ACL, /* ACL item types */
225 RESTORE_PASS_REFRESH /* Matview REFRESH items */
226
227#define RESTORE_PASS_LAST RESTORE_PASS_REFRESH
228} RestorePass;
229
230typedef enum
231{
232 REQ_SCHEMA = 0x01, /* want schema */
233 REQ_DATA = 0x02, /* want data */
234 REQ_SPECIAL = 0x04 /* for special TOC entries */
235} teReqs;
236
237struct _archiveHandle
238{
239 Archive public; /* Public part of archive */
240 int version; /* Version of file */
241
242 char *archiveRemoteVersion; /* When reading an archive, the
243 * version of the dumped DB */
244 char *archiveDumpVersion; /* When reading an archive, the version of
245 * the dumper */
246
247 size_t intSize; /* Size of an integer in the archive */
248 size_t offSize; /* Size of a file offset in the archive -
249 * Added V1.7 */
250 ArchiveFormat format; /* Archive format */
251
252 sqlparseInfo sqlparse; /* state for parsing INSERT data */
253
254 time_t createDate; /* Date archive created */
255
256 /*
257 * Fields used when discovering header. A format can always get the
258 * previous read bytes from here...
259 */
260 int readHeader; /* Used if file header has been read already */
261 char *lookahead; /* Buffer used when reading header to discover
262 * format */
263 size_t lookaheadSize; /* Size of allocated buffer */
264 size_t lookaheadLen; /* Length of data in lookahead */
265 pgoff_t lookaheadPos; /* Current read position in lookahead buffer */
266
267 ArchiveEntryPtrType ArchiveEntryPtr; /* Called for each metadata object */
268 StartDataPtrType StartDataPtr; /* Called when table data is about to be
269 * dumped */
270 WriteDataPtrType WriteDataPtr; /* Called to send some table data to the
271 * archive */
272 EndDataPtrType EndDataPtr; /* Called when table data dump is finished */
273 WriteBytePtrType WriteBytePtr; /* Write a byte to output */
274 ReadBytePtrType ReadBytePtr; /* Read a byte from an archive */
275 WriteBufPtrType WriteBufPtr; /* Write a buffer of output to the archive */
276 ReadBufPtrType ReadBufPtr; /* Read a buffer of input from the archive */
277 ClosePtrType ClosePtr; /* Close the archive */
278 ReopenPtrType ReopenPtr; /* Reopen the archive */
279 WriteExtraTocPtrType WriteExtraTocPtr; /* Write extra TOC entry data
280 * associated with the current
281 * archive format */
282 ReadExtraTocPtrType ReadExtraTocPtr; /* Read extra info associated with
283 * archive format */
284 PrintExtraTocPtrType PrintExtraTocPtr; /* Extra TOC info for format */
285 PrintTocDataPtrType PrintTocDataPtr;
286
287 StartBlobsPtrType StartBlobsPtr;
288 EndBlobsPtrType EndBlobsPtr;
289 StartBlobPtrType StartBlobPtr;
290 EndBlobPtrType EndBlobPtr;
291
292 SetupWorkerPtrType SetupWorkerPtr;
293 WorkerJobDumpPtrType WorkerJobDumpPtr;
294 WorkerJobRestorePtrType WorkerJobRestorePtr;
295
296 PrepParallelRestorePtrType PrepParallelRestorePtr;
297 ClonePtrType ClonePtr; /* Clone format-specific fields */
298 DeClonePtrType DeClonePtr; /* Clean up cloned fields */
299
300 CustomOutPtrType CustomOutPtr; /* Alternative script output routine */
301
302 /* Stuff for direct DB connection */
303 char *archdbname; /* DB name *read* from archive */
304 trivalue promptPassword;
305 char *savedPassword; /* password for ropt->username, if known */
306 char *use_role;
307 PGconn *connection;
308 /* If connCancel isn't NULL, SIGINT handler will send a cancel */
309 PGcancel *volatile connCancel;
310
311 int connectToDB; /* Flag to indicate if direct DB connection is
312 * required */
313 ArchiverOutput outputKind; /* Flag for what we're currently writing */
314 bool pgCopyIn; /* Currently in libpq 'COPY IN' mode. */
315
316 int loFd; /* BLOB fd */
317 int writingBlob; /* Flag */
318 int blobCount; /* # of blobs restored */
319
320 char *fSpec; /* Archive File Spec */
321 FILE *FH; /* General purpose file handle */
322 void *OF;
323 int gzOut; /* Output file */
324
325 struct _tocEntry *toc; /* Header of circular list of TOC entries */
326 int tocCount; /* Number of TOC entries */
327 DumpId maxDumpId; /* largest DumpId among all TOC entries */
328
329 /* arrays created after the TOC list is complete: */
330 struct _tocEntry **tocsByDumpId; /* TOCs indexed by dumpId */
331 DumpId *tableDataId; /* TABLE DATA ids, indexed by table dumpId */
332
333 struct _tocEntry *currToc; /* Used when dumping data */
334 int compression; /* Compression requested on open Possible
335 * values for compression: -1
336 * Z_DEFAULT_COMPRESSION 0 COMPRESSION_NONE
337 * 1-9 levels for gzip compression */
338 bool dosync; /* data requested to be synced on sight */
339 ArchiveMode mode; /* File mode - r or w */
340 void *formatData; /* Header data specific to file format */
341
342 /* these vars track state to avoid sending redundant SET commands */
343 char *currUser; /* current username, or NULL if unknown */
344 char *currSchema; /* current schema, or NULL */
345 char *currTablespace; /* current tablespace, or NULL */
346 char *currTableAm; /* current table access method, or NULL */
347
348 void *lo_buf;
349 size_t lo_buf_used;
350 size_t lo_buf_size;
351
352 int noTocComments;
353 ArchiverStage stage;
354 ArchiverStage lastErrorStage;
355 RestorePass restorePass; /* used only during parallel restore */
356 struct _tocEntry *currentTE;
357 struct _tocEntry *lastErrorTE;
358};
359
360struct _tocEntry
361{
362 struct _tocEntry *prev;
363 struct _tocEntry *next;
364 CatalogId catalogId;
365 DumpId dumpId;
366 teSection section;
367 bool hadDumper; /* Archiver was passed a dumper routine (used
368 * in restore) */
369 char *tag; /* index tag */
370 char *namespace; /* null or empty string if not in a schema */
371 char *tablespace; /* null if not in a tablespace; empty string
372 * means use database default */
373 char *tableam; /* table access method, only for TABLE tags */
374 char *owner;
375 char *desc;
376 char *defn;
377 char *dropStmt;
378 char *copyStmt;
379 DumpId *dependencies; /* dumpIds of objects this one depends on */
380 int nDeps; /* number of dependencies */
381
382 DataDumperPtr dataDumper; /* Routine to dump data for object */
383 void *dataDumperArg; /* Arg for above routine */
384 void *formatData; /* TOC Entry data specific to file format */
385
386 /* working state while dumping/restoring */
387 pgoff_t dataLength; /* item's data size; 0 if none or unknown */
388 teReqs reqs; /* do we need schema and/or data of object */
389 bool created; /* set for DATA member if TABLE was created */
390
391 /* working state (needed only for parallel restore) */
392 struct _tocEntry *pending_prev; /* list links for pending-items list; */
393 struct _tocEntry *pending_next; /* NULL if not in that list */
394 int depCount; /* number of dependencies not yet restored */
395 DumpId *revDeps; /* dumpIds of objects depending on this one */
396 int nRevDeps; /* number of such dependencies */
397 DumpId *lockDeps; /* dumpIds of objects this one needs lock on */
398 int nLockDeps; /* number of such dependencies */
399};
400
401extern int parallel_restore(ArchiveHandle *AH, TocEntry *te);
402extern void on_exit_close_archive(Archive *AHX);
403
404extern void warn_or_exit_horribly(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3);
405
406/* Options for ArchiveEntry */
407typedef struct _archiveOpts
408{
409 const char *tag;
410 const char *namespace;
411 const char *tablespace;
412 const char *tableam;
413 const char *owner;
414 const char *description;
415 teSection section;
416 const char *createStmt;
417 const char *dropStmt;
418 const char *copyStmt;
419 const DumpId *deps;
420 int nDeps;
421 DataDumperPtr dumpFn;
422 void *dumpArg;
423} ArchiveOpts;
424#define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__}
425/* Called to add a TOC entry */
426extern TocEntry *ArchiveEntry(Archive *AHX, CatalogId catalogId,
427 DumpId dumpId, ArchiveOpts *opts);
428
429extern void WriteTOC(ArchiveHandle *AH);
430extern void ReadTOC(ArchiveHandle *AH);
431extern void WriteHead(ArchiveHandle *AH);
432extern void ReadHead(ArchiveHandle *AH);
433extern void WriteToc(ArchiveHandle *AH);
434extern void ReadToc(ArchiveHandle *AH);
435extern void WriteDataChunks(ArchiveHandle *AH, struct ParallelState *pstate);
436extern void WriteDataChunksForTocEntry(ArchiveHandle *AH, TocEntry *te);
437extern ArchiveHandle *CloneArchive(ArchiveHandle *AH);
438extern void DeCloneArchive(ArchiveHandle *AH);
439
440extern teReqs TocIDRequired(ArchiveHandle *AH, DumpId id);
441TocEntry *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id);
442extern bool checkSeek(FILE *fp);
443
444#define appendStringLiteralAHX(buf,str,AH) \
445 appendStringLiteral(buf, str, (AH)->public.encoding, (AH)->public.std_strings)
446
447#define appendByteaLiteralAHX(buf,str,len,AH) \
448 appendByteaLiteral(buf, str, len, (AH)->public.std_strings)
449
450/*
451 * Mandatory routines for each supported format
452 */
453
454extern size_t WriteInt(ArchiveHandle *AH, int i);
455extern int ReadInt(ArchiveHandle *AH);
456extern char *ReadStr(ArchiveHandle *AH);
457extern size_t WriteStr(ArchiveHandle *AH, const char *s);
458
459int ReadOffset(ArchiveHandle *, pgoff_t *);
460size_t WriteOffset(ArchiveHandle *, pgoff_t, int);
461
462extern void StartRestoreBlobs(ArchiveHandle *AH);
463extern void StartRestoreBlob(ArchiveHandle *AH, Oid oid, bool drop);
464extern void EndRestoreBlob(ArchiveHandle *AH, Oid oid);
465extern void EndRestoreBlobs(ArchiveHandle *AH);
466
467extern void InitArchiveFmt_Custom(ArchiveHandle *AH);
468extern void InitArchiveFmt_Null(ArchiveHandle *AH);
469extern void InitArchiveFmt_Directory(ArchiveHandle *AH);
470extern void InitArchiveFmt_Tar(ArchiveHandle *AH);
471
472extern bool isValidTarHeader(char *header);
473
474extern void ReconnectToServer(ArchiveHandle *AH, const char *dbname, const char *newUser);
475extern void DropBlobIfExists(ArchiveHandle *AH, Oid oid);
476
477void ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH);
478int ahprintf(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3);
479
480#endif
481