1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * libpq_fetch.c |
4 | * Functions for fetching files from a remote server. |
5 | * |
6 | * Copyright (c) 2013-2019, PostgreSQL Global Development Group |
7 | * |
8 | *------------------------------------------------------------------------- |
9 | */ |
10 | #include "postgres_fe.h" |
11 | |
12 | #include <sys/stat.h> |
13 | #include <dirent.h> |
14 | #include <fcntl.h> |
15 | #include <unistd.h> |
16 | |
17 | #include "pg_rewind.h" |
18 | #include "datapagemap.h" |
19 | #include "fetch.h" |
20 | #include "file_ops.h" |
21 | #include "filemap.h" |
22 | |
23 | #include "libpq-fe.h" |
24 | #include "catalog/pg_type_d.h" |
25 | #include "fe_utils/connect.h" |
26 | #include "port/pg_bswap.h" |
27 | |
28 | static PGconn *conn = NULL; |
29 | |
30 | /* |
31 | * Files are fetched max CHUNKSIZE bytes at a time. |
32 | * |
33 | * (This only applies to files that are copied in whole, or for truncated |
34 | * files where we copy the tail. Relation files, where we know the individual |
35 | * blocks that need to be fetched, are fetched in BLCKSZ chunks.) |
36 | */ |
37 | #define CHUNKSIZE 1000000 |
38 | |
39 | static void receiveFileChunks(const char *sql); |
40 | static void execute_pagemap(datapagemap_t *pagemap, const char *path); |
41 | static char *run_simple_query(const char *sql); |
42 | static void run_simple_command(const char *sql); |
43 | |
44 | void |
45 | libpqConnect(const char *connstr) |
46 | { |
47 | char *str; |
48 | PGresult *res; |
49 | |
50 | conn = PQconnectdb(connstr); |
51 | if (PQstatus(conn) == CONNECTION_BAD) |
52 | pg_fatal("could not connect to server: %s" , |
53 | PQerrorMessage(conn)); |
54 | |
55 | if (showprogress) |
56 | pg_log_info("connected to server" ); |
57 | |
58 | /* disable all types of timeouts */ |
59 | run_simple_command("SET statement_timeout = 0" ); |
60 | run_simple_command("SET lock_timeout = 0" ); |
61 | run_simple_command("SET idle_in_transaction_session_timeout = 0" ); |
62 | |
63 | res = PQexec(conn, ALWAYS_SECURE_SEARCH_PATH_SQL); |
64 | if (PQresultStatus(res) != PGRES_TUPLES_OK) |
65 | pg_fatal("could not clear search_path: %s" , |
66 | PQresultErrorMessage(res)); |
67 | PQclear(res); |
68 | |
69 | /* |
70 | * Check that the server is not in hot standby mode. There is no |
71 | * fundamental reason that couldn't be made to work, but it doesn't |
72 | * currently because we use a temporary table. Better to check for it |
73 | * explicitly than error out, for a better error message. |
74 | */ |
75 | str = run_simple_query("SELECT pg_is_in_recovery()" ); |
76 | if (strcmp(str, "f" ) != 0) |
77 | pg_fatal("source server must not be in recovery mode" ); |
78 | pg_free(str); |
79 | |
80 | /* |
81 | * Also check that full_page_writes is enabled. We can get torn pages if |
82 | * a page is modified while we read it with pg_read_binary_file(), and we |
83 | * rely on full page images to fix them. |
84 | */ |
85 | str = run_simple_query("SHOW full_page_writes" ); |
86 | if (strcmp(str, "on" ) != 0) |
87 | pg_fatal("full_page_writes must be enabled in the source server" ); |
88 | pg_free(str); |
89 | |
90 | /* |
91 | * Although we don't do any "real" updates, we do work with a temporary |
92 | * table. We don't care about synchronous commit for that. It doesn't |
93 | * otherwise matter much, but if the server is using synchronous |
94 | * replication, and replication isn't working for some reason, we don't |
95 | * want to get stuck, waiting for it to start working again. |
96 | */ |
97 | run_simple_command("SET synchronous_commit = off" ); |
98 | } |
99 | |
100 | /* |
101 | * Runs a query that returns a single value. |
102 | * The result should be pg_free'd after use. |
103 | */ |
104 | static char * |
105 | run_simple_query(const char *sql) |
106 | { |
107 | PGresult *res; |
108 | char *result; |
109 | |
110 | res = PQexec(conn, sql); |
111 | |
112 | if (PQresultStatus(res) != PGRES_TUPLES_OK) |
113 | pg_fatal("error running query (%s) in source server: %s" , |
114 | sql, PQresultErrorMessage(res)); |
115 | |
116 | /* sanity check the result set */ |
117 | if (PQnfields(res) != 1 || PQntuples(res) != 1 || PQgetisnull(res, 0, 0)) |
118 | pg_fatal("unexpected result set from query" ); |
119 | |
120 | result = pg_strdup(PQgetvalue(res, 0, 0)); |
121 | |
122 | PQclear(res); |
123 | |
124 | return result; |
125 | } |
126 | |
127 | /* |
128 | * Runs a command. |
129 | * In the event of a failure, exit immediately. |
130 | */ |
131 | static void |
132 | run_simple_command(const char *sql) |
133 | { |
134 | PGresult *res; |
135 | |
136 | res = PQexec(conn, sql); |
137 | |
138 | if (PQresultStatus(res) != PGRES_COMMAND_OK) |
139 | pg_fatal("error running query (%s) in source server: %s" , |
140 | sql, PQresultErrorMessage(res)); |
141 | |
142 | PQclear(res); |
143 | } |
144 | |
145 | /* |
146 | * Calls pg_current_wal_insert_lsn() function |
147 | */ |
148 | XLogRecPtr |
149 | libpqGetCurrentXlogInsertLocation(void) |
150 | { |
151 | XLogRecPtr result; |
152 | uint32 hi; |
153 | uint32 lo; |
154 | char *val; |
155 | |
156 | val = run_simple_query("SELECT pg_current_wal_insert_lsn()" ); |
157 | |
158 | if (sscanf(val, "%X/%X" , &hi, &lo) != 2) |
159 | pg_fatal("unrecognized result \"%s\" for current WAL insert location" , val); |
160 | |
161 | result = ((uint64) hi) << 32 | lo; |
162 | |
163 | pg_free(val); |
164 | |
165 | return result; |
166 | } |
167 | |
168 | /* |
169 | * Get a list of all files in the data directory. |
170 | */ |
171 | void |
172 | libpqProcessFileList(void) |
173 | { |
174 | PGresult *res; |
175 | const char *sql; |
176 | int i; |
177 | |
178 | /* |
179 | * Create a recursive directory listing of the whole data directory. |
180 | * |
181 | * The WITH RECURSIVE part does most of the work. The second part gets the |
182 | * targets of the symlinks in pg_tblspc directory. |
183 | * |
184 | * XXX: There is no backend function to get a symbolic link's target in |
185 | * general, so if the admin has put any custom symbolic links in the data |
186 | * directory, they won't be copied correctly. |
187 | */ |
188 | sql = |
189 | "WITH RECURSIVE files (path, filename, size, isdir) AS (\n" |
190 | " SELECT '' AS path, filename, size, isdir FROM\n" |
191 | " (SELECT pg_ls_dir('.', true, false) AS filename) AS fn,\n" |
192 | " pg_stat_file(fn.filename, true) AS this\n" |
193 | " UNION ALL\n" |
194 | " SELECT parent.path || parent.filename || '/' AS path,\n" |
195 | " fn, this.size, this.isdir\n" |
196 | " FROM files AS parent,\n" |
197 | " pg_ls_dir(parent.path || parent.filename, true, false) AS fn,\n" |
198 | " pg_stat_file(parent.path || parent.filename || '/' || fn, true) AS this\n" |
199 | " WHERE parent.isdir = 't'\n" |
200 | ")\n" |
201 | "SELECT path || filename, size, isdir,\n" |
202 | " pg_tablespace_location(pg_tablespace.oid) AS link_target\n" |
203 | "FROM files\n" |
204 | "LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc/'\n" |
205 | " AND oid::text = files.filename\n" ; |
206 | res = PQexec(conn, sql); |
207 | |
208 | if (PQresultStatus(res) != PGRES_TUPLES_OK) |
209 | pg_fatal("could not fetch file list: %s" , |
210 | PQresultErrorMessage(res)); |
211 | |
212 | /* sanity check the result set */ |
213 | if (PQnfields(res) != 4) |
214 | pg_fatal("unexpected result set while fetching file list" ); |
215 | |
216 | /* Read result to local variables */ |
217 | for (i = 0; i < PQntuples(res); i++) |
218 | { |
219 | char *path = PQgetvalue(res, i, 0); |
220 | int64 filesize = atol(PQgetvalue(res, i, 1)); |
221 | bool isdir = (strcmp(PQgetvalue(res, i, 2), "t" ) == 0); |
222 | char *link_target = PQgetvalue(res, i, 3); |
223 | file_type_t type; |
224 | |
225 | if (PQgetisnull(res, 0, 1)) |
226 | { |
227 | /* |
228 | * The file was removed from the server while the query was |
229 | * running. Ignore it. |
230 | */ |
231 | continue; |
232 | } |
233 | |
234 | if (link_target[0]) |
235 | type = FILE_TYPE_SYMLINK; |
236 | else if (isdir) |
237 | type = FILE_TYPE_DIRECTORY; |
238 | else |
239 | type = FILE_TYPE_REGULAR; |
240 | |
241 | process_source_file(path, type, filesize, link_target); |
242 | } |
243 | PQclear(res); |
244 | } |
245 | |
246 | /*---- |
247 | * Runs a query, which returns pieces of files from the remote source data |
248 | * directory, and overwrites the corresponding parts of target files with |
249 | * the received parts. The result set is expected to be of format: |
250 | * |
251 | * path text -- path in the data directory, e.g "base/1/123" |
252 | * begin int8 -- offset within the file |
253 | * chunk bytea -- file content |
254 | *---- |
255 | */ |
256 | static void |
257 | receiveFileChunks(const char *sql) |
258 | { |
259 | PGresult *res; |
260 | |
261 | if (PQsendQueryParams(conn, sql, 0, NULL, NULL, NULL, NULL, 1) != 1) |
262 | pg_fatal("could not send query: %s" , PQerrorMessage(conn)); |
263 | |
264 | pg_log_debug("getting file chunks" ); |
265 | |
266 | if (PQsetSingleRowMode(conn) != 1) |
267 | pg_fatal("could not set libpq connection to single row mode" ); |
268 | |
269 | while ((res = PQgetResult(conn)) != NULL) |
270 | { |
271 | char *filename; |
272 | int filenamelen; |
273 | int64 chunkoff; |
274 | char chunkoff_str[32]; |
275 | int chunksize; |
276 | char *chunk; |
277 | |
278 | switch (PQresultStatus(res)) |
279 | { |
280 | case PGRES_SINGLE_TUPLE: |
281 | break; |
282 | |
283 | case PGRES_TUPLES_OK: |
284 | PQclear(res); |
285 | continue; /* final zero-row result */ |
286 | |
287 | default: |
288 | pg_fatal("unexpected result while fetching remote files: %s" , |
289 | PQresultErrorMessage(res)); |
290 | } |
291 | |
292 | /* sanity check the result set */ |
293 | if (PQnfields(res) != 3 || PQntuples(res) != 1) |
294 | pg_fatal("unexpected result set size while fetching remote files" ); |
295 | |
296 | if (PQftype(res, 0) != TEXTOID || |
297 | PQftype(res, 1) != INT8OID || |
298 | PQftype(res, 2) != BYTEAOID) |
299 | { |
300 | pg_fatal("unexpected data types in result set while fetching remote files: %u %u %u" , |
301 | PQftype(res, 0), PQftype(res, 1), PQftype(res, 2)); |
302 | } |
303 | |
304 | if (PQfformat(res, 0) != 1 && |
305 | PQfformat(res, 1) != 1 && |
306 | PQfformat(res, 2) != 1) |
307 | { |
308 | pg_fatal("unexpected result format while fetching remote files" ); |
309 | } |
310 | |
311 | if (PQgetisnull(res, 0, 0) || |
312 | PQgetisnull(res, 0, 1)) |
313 | { |
314 | pg_fatal("unexpected null values in result while fetching remote files" ); |
315 | } |
316 | |
317 | if (PQgetlength(res, 0, 1) != sizeof(int64)) |
318 | pg_fatal("unexpected result length while fetching remote files" ); |
319 | |
320 | /* Read result set to local variables */ |
321 | memcpy(&chunkoff, PQgetvalue(res, 0, 1), sizeof(int64)); |
322 | chunkoff = pg_ntoh64(chunkoff); |
323 | chunksize = PQgetlength(res, 0, 2); |
324 | |
325 | filenamelen = PQgetlength(res, 0, 0); |
326 | filename = pg_malloc(filenamelen + 1); |
327 | memcpy(filename, PQgetvalue(res, 0, 0), filenamelen); |
328 | filename[filenamelen] = '\0'; |
329 | |
330 | chunk = PQgetvalue(res, 0, 2); |
331 | |
332 | /* |
333 | * If a file has been deleted on the source, remove it on the target |
334 | * as well. Note that multiple unlink() calls may happen on the same |
335 | * file if multiple data chunks are associated with it, hence ignore |
336 | * unconditionally anything missing. If this file is not a relation |
337 | * data file, then it has been already truncated when creating the |
338 | * file chunk list at the previous execution of the filemap. |
339 | */ |
340 | if (PQgetisnull(res, 0, 2)) |
341 | { |
342 | pg_log_debug("received null value for chunk for file \"%s\", file has been deleted" , |
343 | filename); |
344 | remove_target_file(filename, true); |
345 | pg_free(filename); |
346 | PQclear(res); |
347 | continue; |
348 | } |
349 | |
350 | /* |
351 | * Separate step to keep platform-dependent format code out of |
352 | * translatable strings. |
353 | */ |
354 | snprintf(chunkoff_str, sizeof(chunkoff_str), INT64_FORMAT, chunkoff); |
355 | pg_log_debug("received chunk for file \"%s\", offset %s, size %d" , |
356 | filename, chunkoff_str, chunksize); |
357 | |
358 | open_target_file(filename, false); |
359 | |
360 | write_target_range(chunk, chunkoff, chunksize); |
361 | |
362 | pg_free(filename); |
363 | |
364 | PQclear(res); |
365 | } |
366 | } |
367 | |
368 | /* |
369 | * Receive a single file as a malloc'd buffer. |
370 | */ |
371 | char * |
372 | libpqGetFile(const char *filename, size_t *filesize) |
373 | { |
374 | PGresult *res; |
375 | char *result; |
376 | int len; |
377 | const char *paramValues[1]; |
378 | |
379 | paramValues[0] = filename; |
380 | res = PQexecParams(conn, "SELECT pg_read_binary_file($1)" , |
381 | 1, NULL, paramValues, NULL, NULL, 1); |
382 | |
383 | if (PQresultStatus(res) != PGRES_TUPLES_OK) |
384 | pg_fatal("could not fetch remote file \"%s\": %s" , |
385 | filename, PQresultErrorMessage(res)); |
386 | |
387 | /* sanity check the result set */ |
388 | if (PQntuples(res) != 1 || PQgetisnull(res, 0, 0)) |
389 | pg_fatal("unexpected result set while fetching remote file \"%s\"" , |
390 | filename); |
391 | |
392 | /* Read result to local variables */ |
393 | len = PQgetlength(res, 0, 0); |
394 | result = pg_malloc(len + 1); |
395 | memcpy(result, PQgetvalue(res, 0, 0), len); |
396 | result[len] = '\0'; |
397 | |
398 | PQclear(res); |
399 | |
400 | pg_log_debug("fetched file \"%s\", length %d" , filename, len); |
401 | |
402 | if (filesize) |
403 | *filesize = len; |
404 | return result; |
405 | } |
406 | |
407 | /* |
408 | * Write a file range to a temporary table in the server. |
409 | * |
410 | * The range is sent to the server as a COPY formatted line, to be inserted |
411 | * into the 'fetchchunks' temporary table. It is used in receiveFileChunks() |
412 | * function to actually fetch the data. |
413 | */ |
414 | static void |
415 | fetch_file_range(const char *path, uint64 begin, uint64 end) |
416 | { |
417 | char linebuf[MAXPGPATH + 23]; |
418 | |
419 | /* Split the range into CHUNKSIZE chunks */ |
420 | while (end - begin > 0) |
421 | { |
422 | unsigned int len; |
423 | |
424 | /* Fine as long as CHUNKSIZE is not bigger than UINT32_MAX */ |
425 | if (end - begin > CHUNKSIZE) |
426 | len = CHUNKSIZE; |
427 | else |
428 | len = (unsigned int) (end - begin); |
429 | |
430 | snprintf(linebuf, sizeof(linebuf), "%s\t" UINT64_FORMAT "\t%u\n" , path, begin, len); |
431 | |
432 | if (PQputCopyData(conn, linebuf, strlen(linebuf)) != 1) |
433 | pg_fatal("could not send COPY data: %s" , |
434 | PQerrorMessage(conn)); |
435 | |
436 | begin += len; |
437 | } |
438 | } |
439 | |
440 | /* |
441 | * Fetch all changed blocks from remote source data directory. |
442 | */ |
443 | void |
444 | libpq_executeFileMap(filemap_t *map) |
445 | { |
446 | file_entry_t *entry; |
447 | const char *sql; |
448 | PGresult *res; |
449 | int i; |
450 | |
451 | /* |
452 | * First create a temporary table, and load it with the blocks that we |
453 | * need to fetch. |
454 | */ |
455 | sql = "CREATE TEMPORARY TABLE fetchchunks(path text, begin int8, len int4);" ; |
456 | run_simple_command(sql); |
457 | |
458 | sql = "COPY fetchchunks FROM STDIN" ; |
459 | res = PQexec(conn, sql); |
460 | |
461 | if (PQresultStatus(res) != PGRES_COPY_IN) |
462 | pg_fatal("could not send file list: %s" , |
463 | PQresultErrorMessage(res)); |
464 | PQclear(res); |
465 | |
466 | for (i = 0; i < map->narray; i++) |
467 | { |
468 | entry = map->array[i]; |
469 | |
470 | /* If this is a relation file, copy the modified blocks */ |
471 | execute_pagemap(&entry->pagemap, entry->path); |
472 | |
473 | switch (entry->action) |
474 | { |
475 | case FILE_ACTION_NONE: |
476 | /* nothing else to do */ |
477 | break; |
478 | |
479 | case FILE_ACTION_COPY: |
480 | /* Truncate the old file out of the way, if any */ |
481 | open_target_file(entry->path, true); |
482 | fetch_file_range(entry->path, 0, entry->newsize); |
483 | break; |
484 | |
485 | case FILE_ACTION_TRUNCATE: |
486 | truncate_target_file(entry->path, entry->newsize); |
487 | break; |
488 | |
489 | case FILE_ACTION_COPY_TAIL: |
490 | fetch_file_range(entry->path, entry->oldsize, entry->newsize); |
491 | break; |
492 | |
493 | case FILE_ACTION_REMOVE: |
494 | remove_target(entry); |
495 | break; |
496 | |
497 | case FILE_ACTION_CREATE: |
498 | create_target(entry); |
499 | break; |
500 | } |
501 | } |
502 | |
503 | if (PQputCopyEnd(conn, NULL) != 1) |
504 | pg_fatal("could not send end-of-COPY: %s" , |
505 | PQerrorMessage(conn)); |
506 | |
507 | while ((res = PQgetResult(conn)) != NULL) |
508 | { |
509 | if (PQresultStatus(res) != PGRES_COMMAND_OK) |
510 | pg_fatal("unexpected result while sending file list: %s" , |
511 | PQresultErrorMessage(res)); |
512 | PQclear(res); |
513 | } |
514 | |
515 | /* |
516 | * We've now copied the list of file ranges that we need to fetch to the |
517 | * temporary table. Now, actually fetch all of those ranges. |
518 | */ |
519 | sql = |
520 | "SELECT path, begin,\n" |
521 | " pg_read_binary_file(path, begin, len, true) AS chunk\n" |
522 | "FROM fetchchunks\n" ; |
523 | |
524 | receiveFileChunks(sql); |
525 | } |
526 | |
527 | static void |
528 | execute_pagemap(datapagemap_t *pagemap, const char *path) |
529 | { |
530 | datapagemap_iterator_t *iter; |
531 | BlockNumber blkno; |
532 | off_t offset; |
533 | |
534 | iter = datapagemap_iterate(pagemap); |
535 | while (datapagemap_next(iter, &blkno)) |
536 | { |
537 | offset = blkno * BLCKSZ; |
538 | |
539 | fetch_file_range(path, offset, offset + BLCKSZ); |
540 | } |
541 | pg_free(iter); |
542 | } |
543 | |