1/*-------------------------------------------------------------------------
2 *
3 * genfile.c
4 * Functions for direct access to files
5 *
6 *
7 * Copyright (c) 2004-2019, PostgreSQL Global Development Group
8 *
9 * Author: Andreas Pflug <pgadmin@pse-consulting.de>
10 *
11 * IDENTIFICATION
12 * src/backend/utils/adt/genfile.c
13 *
14 *-------------------------------------------------------------------------
15 */
16#include "postgres.h"
17
18#include <sys/file.h>
19#include <sys/stat.h>
20#include <unistd.h>
21#include <dirent.h>
22
23#include "access/htup_details.h"
24#include "access/xlog_internal.h"
25#include "catalog/pg_authid.h"
26#include "catalog/pg_tablespace_d.h"
27#include "catalog/pg_type.h"
28#include "funcapi.h"
29#include "mb/pg_wchar.h"
30#include "miscadmin.h"
31#include "postmaster/syslogger.h"
32#include "storage/fd.h"
33#include "utils/builtins.h"
34#include "utils/memutils.h"
35#include "utils/syscache.h"
36#include "utils/timestamp.h"
37
38typedef struct
39{
40 char *location;
41 DIR *dirdesc;
42 bool include_dot_dirs;
43} directory_fctx;
44
45
46/*
47 * Convert a "text" filename argument to C string, and check it's allowable.
48 *
49 * Filename may be absolute or relative to the DataDir, but we only allow
50 * absolute paths that match DataDir or Log_directory.
51 *
52 * This does a privilege check against the 'pg_read_server_files' role, so
53 * this function is really only appropriate for callers who are only checking
54 * 'read' access. Do not use this function if you are looking for a check
55 * for 'write' or 'program' access without updating it to access the type
56 * of check as an argument and checking the appropriate role membership.
57 */
58static char *
59convert_and_check_filename(text *arg)
60{
61 char *filename;
62
63 filename = text_to_cstring(arg);
64 canonicalize_path(filename); /* filename can change length here */
65
66 /*
67 * Members of the 'pg_read_server_files' role are allowed to access any
68 * files on the server as the PG user, so no need to do any further checks
69 * here.
70 */
71 if (is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_SERVER_FILES))
72 return filename;
73
74 /* User isn't a member of the default role, so check if it's allowable */
75 if (is_absolute_path(filename))
76 {
77 /* Disallow '/a/b/data/..' */
78 if (path_contains_parent_reference(filename))
79 ereport(ERROR,
80 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
81 (errmsg("reference to parent directory (\"..\") not allowed"))));
82
83 /*
84 * Allow absolute paths if within DataDir or Log_directory, even
85 * though Log_directory might be outside DataDir.
86 */
87 if (!path_is_prefix_of_path(DataDir, filename) &&
88 (!is_absolute_path(Log_directory) ||
89 !path_is_prefix_of_path(Log_directory, filename)))
90 ereport(ERROR,
91 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
92 (errmsg("absolute path not allowed"))));
93 }
94 else if (!path_is_relative_and_below_cwd(filename))
95 ereport(ERROR,
96 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
97 (errmsg("path must be in or below the current directory"))));
98
99 return filename;
100}
101
102
103/*
104 * Read a section of a file, returning it as bytea
105 *
106 * Caller is responsible for all permissions checking.
107 *
108 * We read the whole of the file when bytes_to_read is negative.
109 */
110static bytea *
111read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
112 bool missing_ok)
113{
114 bytea *buf;
115 size_t nbytes;
116 FILE *file;
117
118 if (bytes_to_read < 0)
119 {
120 if (seek_offset < 0)
121 bytes_to_read = -seek_offset;
122 else
123 {
124 struct stat fst;
125
126 if (stat(filename, &fst) < 0)
127 {
128 if (missing_ok && errno == ENOENT)
129 return NULL;
130 else
131 ereport(ERROR,
132 (errcode_for_file_access(),
133 errmsg("could not stat file \"%s\": %m", filename)));
134 }
135
136 bytes_to_read = fst.st_size - seek_offset;
137 }
138 }
139
140 /* not sure why anyone thought that int64 length was a good idea */
141 if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
142 ereport(ERROR,
143 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
144 errmsg("requested length too large")));
145
146 if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
147 {
148 if (missing_ok && errno == ENOENT)
149 return NULL;
150 else
151 ereport(ERROR,
152 (errcode_for_file_access(),
153 errmsg("could not open file \"%s\" for reading: %m",
154 filename)));
155 }
156
157 if (fseeko(file, (off_t) seek_offset,
158 (seek_offset >= 0) ? SEEK_SET : SEEK_END) != 0)
159 ereport(ERROR,
160 (errcode_for_file_access(),
161 errmsg("could not seek in file \"%s\": %m", filename)));
162
163 buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
164
165 nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
166
167 if (ferror(file))
168 ereport(ERROR,
169 (errcode_for_file_access(),
170 errmsg("could not read file \"%s\": %m", filename)));
171
172 SET_VARSIZE(buf, nbytes + VARHDRSZ);
173
174 FreeFile(file);
175
176 return buf;
177}
178
179/*
180 * Similar to read_binary_file, but we verify that the contents are valid
181 * in the database encoding.
182 */
183static text *
184read_text_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
185 bool missing_ok)
186{
187 bytea *buf;
188
189 buf = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok);
190
191 if (buf != NULL)
192 {
193 /* Make sure the input is valid */
194 pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
195
196 /* OK, we can cast it to text safely */
197 return (text *) buf;
198 }
199 else
200 return NULL;
201}
202
203/*
204 * Read a section of a file, returning it as text
205 *
206 * This function is kept to support adminpack 1.0.
207 */
208Datum
209pg_read_file(PG_FUNCTION_ARGS)
210{
211 text *filename_t = PG_GETARG_TEXT_PP(0);
212 int64 seek_offset = 0;
213 int64 bytes_to_read = -1;
214 bool missing_ok = false;
215 char *filename;
216 text *result;
217
218 if (!superuser())
219 ereport(ERROR,
220 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
221 (errmsg("must be superuser to read files with adminpack 1.0"),
222 /* translator: %s is a SQL function name */
223 errhint("Consider using %s, which is part of core, instead.",
224 "pg_file_read()"))));
225
226 /* handle optional arguments */
227 if (PG_NARGS() >= 3)
228 {
229 seek_offset = PG_GETARG_INT64(1);
230 bytes_to_read = PG_GETARG_INT64(2);
231
232 if (bytes_to_read < 0)
233 ereport(ERROR,
234 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
235 errmsg("requested length cannot be negative")));
236 }
237 if (PG_NARGS() >= 4)
238 missing_ok = PG_GETARG_BOOL(3);
239
240 filename = convert_and_check_filename(filename_t);
241
242 result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok);
243 if (result)
244 PG_RETURN_TEXT_P(result);
245 else
246 PG_RETURN_NULL();
247}
248
249/*
250 * Read a section of a file, returning it as text
251 *
252 * No superuser check done here- instead privileges are handled by the
253 * GRANT system.
254 */
255Datum
256pg_read_file_v2(PG_FUNCTION_ARGS)
257{
258 text *filename_t = PG_GETARG_TEXT_PP(0);
259 int64 seek_offset = 0;
260 int64 bytes_to_read = -1;
261 bool missing_ok = false;
262 char *filename;
263 text *result;
264
265 /* handle optional arguments */
266 if (PG_NARGS() >= 3)
267 {
268 seek_offset = PG_GETARG_INT64(1);
269 bytes_to_read = PG_GETARG_INT64(2);
270
271 if (bytes_to_read < 0)
272 ereport(ERROR,
273 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
274 errmsg("requested length cannot be negative")));
275 }
276 if (PG_NARGS() >= 4)
277 missing_ok = PG_GETARG_BOOL(3);
278
279 filename = convert_and_check_filename(filename_t);
280
281 result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok);
282 if (result)
283 PG_RETURN_TEXT_P(result);
284 else
285 PG_RETURN_NULL();
286}
287
288/*
289 * Read a section of a file, returning it as bytea
290 */
291Datum
292pg_read_binary_file(PG_FUNCTION_ARGS)
293{
294 text *filename_t = PG_GETARG_TEXT_PP(0);
295 int64 seek_offset = 0;
296 int64 bytes_to_read = -1;
297 bool missing_ok = false;
298 char *filename;
299 bytea *result;
300
301 /* handle optional arguments */
302 if (PG_NARGS() >= 3)
303 {
304 seek_offset = PG_GETARG_INT64(1);
305 bytes_to_read = PG_GETARG_INT64(2);
306
307 if (bytes_to_read < 0)
308 ereport(ERROR,
309 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
310 errmsg("requested length cannot be negative")));
311 }
312 if (PG_NARGS() >= 4)
313 missing_ok = PG_GETARG_BOOL(3);
314
315 filename = convert_and_check_filename(filename_t);
316
317 result = read_binary_file(filename, seek_offset,
318 bytes_to_read, missing_ok);
319 if (result)
320 PG_RETURN_BYTEA_P(result);
321 else
322 PG_RETURN_NULL();
323}
324
325
326/*
327 * Wrapper functions for the 1 and 3 argument variants of pg_read_file_v2()
328 * and pg_binary_read_file().
329 *
330 * These are necessary to pass the sanity check in opr_sanity, which checks
331 * that all built-in functions that share the implementing C function take
332 * the same number of arguments.
333 */
334Datum
335pg_read_file_off_len(PG_FUNCTION_ARGS)
336{
337 return pg_read_file_v2(fcinfo);
338}
339
340Datum
341pg_read_file_all(PG_FUNCTION_ARGS)
342{
343 return pg_read_file_v2(fcinfo);
344}
345
346Datum
347pg_read_binary_file_off_len(PG_FUNCTION_ARGS)
348{
349 return pg_read_binary_file(fcinfo);
350}
351
352Datum
353pg_read_binary_file_all(PG_FUNCTION_ARGS)
354{
355 return pg_read_binary_file(fcinfo);
356}
357
358/*
359 * stat a file
360 */
361Datum
362pg_stat_file(PG_FUNCTION_ARGS)
363{
364 text *filename_t = PG_GETARG_TEXT_PP(0);
365 char *filename;
366 struct stat fst;
367 Datum values[6];
368 bool isnull[6];
369 HeapTuple tuple;
370 TupleDesc tupdesc;
371 bool missing_ok = false;
372
373 /* check the optional argument */
374 if (PG_NARGS() == 2)
375 missing_ok = PG_GETARG_BOOL(1);
376
377 filename = convert_and_check_filename(filename_t);
378
379 if (stat(filename, &fst) < 0)
380 {
381 if (missing_ok && errno == ENOENT)
382 PG_RETURN_NULL();
383 else
384 ereport(ERROR,
385 (errcode_for_file_access(),
386 errmsg("could not stat file \"%s\": %m", filename)));
387 }
388
389 /*
390 * This record type had better match the output parameters declared for me
391 * in pg_proc.h.
392 */
393 tupdesc = CreateTemplateTupleDesc(6);
394 TupleDescInitEntry(tupdesc, (AttrNumber) 1,
395 "size", INT8OID, -1, 0);
396 TupleDescInitEntry(tupdesc, (AttrNumber) 2,
397 "access", TIMESTAMPTZOID, -1, 0);
398 TupleDescInitEntry(tupdesc, (AttrNumber) 3,
399 "modification", TIMESTAMPTZOID, -1, 0);
400 TupleDescInitEntry(tupdesc, (AttrNumber) 4,
401 "change", TIMESTAMPTZOID, -1, 0);
402 TupleDescInitEntry(tupdesc, (AttrNumber) 5,
403 "creation", TIMESTAMPTZOID, -1, 0);
404 TupleDescInitEntry(tupdesc, (AttrNumber) 6,
405 "isdir", BOOLOID, -1, 0);
406 BlessTupleDesc(tupdesc);
407
408 memset(isnull, false, sizeof(isnull));
409
410 values[0] = Int64GetDatum((int64) fst.st_size);
411 values[1] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_atime));
412 values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_mtime));
413 /* Unix has file status change time, while Win32 has creation time */
414#if !defined(WIN32) && !defined(__CYGWIN__)
415 values[3] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
416 isnull[4] = true;
417#else
418 isnull[3] = true;
419 values[4] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
420#endif
421 values[5] = BoolGetDatum(S_ISDIR(fst.st_mode));
422
423 tuple = heap_form_tuple(tupdesc, values, isnull);
424
425 pfree(filename);
426
427 PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
428}
429
430/*
431 * stat a file (1 argument version)
432 *
433 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
434 * which checks that all built-in functions that share the implementing C
435 * function take the same number of arguments
436 */
437Datum
438pg_stat_file_1arg(PG_FUNCTION_ARGS)
439{
440 return pg_stat_file(fcinfo);
441}
442
443/*
444 * List a directory (returns the filenames only)
445 */
446Datum
447pg_ls_dir(PG_FUNCTION_ARGS)
448{
449 FuncCallContext *funcctx;
450 struct dirent *de;
451 directory_fctx *fctx;
452 MemoryContext oldcontext;
453
454 if (SRF_IS_FIRSTCALL())
455 {
456 bool missing_ok = false;
457 bool include_dot_dirs = false;
458
459 /* check the optional arguments */
460 if (PG_NARGS() == 3)
461 {
462 if (!PG_ARGISNULL(1))
463 missing_ok = PG_GETARG_BOOL(1);
464 if (!PG_ARGISNULL(2))
465 include_dot_dirs = PG_GETARG_BOOL(2);
466 }
467
468 funcctx = SRF_FIRSTCALL_INIT();
469 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
470
471 fctx = palloc(sizeof(directory_fctx));
472 fctx->location = convert_and_check_filename(PG_GETARG_TEXT_PP(0));
473
474 fctx->include_dot_dirs = include_dot_dirs;
475 fctx->dirdesc = AllocateDir(fctx->location);
476
477 if (!fctx->dirdesc)
478 {
479 if (missing_ok && errno == ENOENT)
480 {
481 MemoryContextSwitchTo(oldcontext);
482 SRF_RETURN_DONE(funcctx);
483 }
484 else
485 ereport(ERROR,
486 (errcode_for_file_access(),
487 errmsg("could not open directory \"%s\": %m",
488 fctx->location)));
489 }
490 funcctx->user_fctx = fctx;
491 MemoryContextSwitchTo(oldcontext);
492 }
493
494 funcctx = SRF_PERCALL_SETUP();
495 fctx = (directory_fctx *) funcctx->user_fctx;
496
497 while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
498 {
499 if (!fctx->include_dot_dirs &&
500 (strcmp(de->d_name, ".") == 0 ||
501 strcmp(de->d_name, "..") == 0))
502 continue;
503
504 SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(de->d_name));
505 }
506
507 FreeDir(fctx->dirdesc);
508
509 SRF_RETURN_DONE(funcctx);
510}
511
512/*
513 * List a directory (1 argument version)
514 *
515 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
516 * which checks that all built-in functions that share the implementing C
517 * function take the same number of arguments.
518 */
519Datum
520pg_ls_dir_1arg(PG_FUNCTION_ARGS)
521{
522 return pg_ls_dir(fcinfo);
523}
524
525/* Generic function to return a directory listing of files */
526static Datum
527pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
528{
529 FuncCallContext *funcctx;
530 struct dirent *de;
531 directory_fctx *fctx;
532
533 if (SRF_IS_FIRSTCALL())
534 {
535 MemoryContext oldcontext;
536 TupleDesc tupdesc;
537
538 funcctx = SRF_FIRSTCALL_INIT();
539 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
540
541 fctx = palloc(sizeof(directory_fctx));
542
543 tupdesc = CreateTemplateTupleDesc(3);
544 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
545 TEXTOID, -1, 0);
546 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "size",
547 INT8OID, -1, 0);
548 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "modification",
549 TIMESTAMPTZOID, -1, 0);
550 funcctx->tuple_desc = BlessTupleDesc(tupdesc);
551
552 fctx->location = pstrdup(dir);
553 fctx->dirdesc = AllocateDir(fctx->location);
554
555 if (!fctx->dirdesc)
556 {
557 if (missing_ok && errno == ENOENT)
558 {
559 MemoryContextSwitchTo(oldcontext);
560 SRF_RETURN_DONE(funcctx);
561 }
562 else
563 ereport(ERROR,
564 (errcode_for_file_access(),
565 errmsg("could not open directory \"%s\": %m",
566 fctx->location)));
567 }
568
569 funcctx->user_fctx = fctx;
570 MemoryContextSwitchTo(oldcontext);
571 }
572
573 funcctx = SRF_PERCALL_SETUP();
574 fctx = (directory_fctx *) funcctx->user_fctx;
575
576 while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
577 {
578 Datum values[3];
579 bool nulls[3];
580 char path[MAXPGPATH * 2];
581 struct stat attrib;
582 HeapTuple tuple;
583
584 /* Skip hidden files */
585 if (de->d_name[0] == '.')
586 continue;
587
588 /* Get the file info */
589 snprintf(path, sizeof(path), "%s/%s", fctx->location, de->d_name);
590 if (stat(path, &attrib) < 0)
591 ereport(ERROR,
592 (errcode_for_file_access(),
593 errmsg("could not stat directory \"%s\": %m", dir)));
594
595 /* Ignore anything but regular files */
596 if (!S_ISREG(attrib.st_mode))
597 continue;
598
599 values[0] = CStringGetTextDatum(de->d_name);
600 values[1] = Int64GetDatum((int64) attrib.st_size);
601 values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
602 memset(nulls, 0, sizeof(nulls));
603
604 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
605 SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
606 }
607
608 FreeDir(fctx->dirdesc);
609 SRF_RETURN_DONE(funcctx);
610}
611
612/* Function to return the list of files in the log directory */
613Datum
614pg_ls_logdir(PG_FUNCTION_ARGS)
615{
616 return pg_ls_dir_files(fcinfo, Log_directory, false);
617}
618
619/* Function to return the list of files in the WAL directory */
620Datum
621pg_ls_waldir(PG_FUNCTION_ARGS)
622{
623 return pg_ls_dir_files(fcinfo, XLOGDIR, false);
624}
625
626/*
627 * Generic function to return the list of files in pgsql_tmp
628 */
629static Datum
630pg_ls_tmpdir(FunctionCallInfo fcinfo, Oid tblspc)
631{
632 char path[MAXPGPATH];
633
634 if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tblspc)))
635 ereport(ERROR,
636 (errcode(ERRCODE_UNDEFINED_OBJECT),
637 errmsg("tablespace with OID %u does not exist",
638 tblspc)));
639
640 TempTablespacePath(path, tblspc);
641 return pg_ls_dir_files(fcinfo, path, true);
642}
643
644/*
645 * Function to return the list of temporary files in the pg_default tablespace's
646 * pgsql_tmp directory
647 */
648Datum
649pg_ls_tmpdir_noargs(PG_FUNCTION_ARGS)
650{
651 return pg_ls_tmpdir(fcinfo, DEFAULTTABLESPACE_OID);
652}
653
654/*
655 * Function to return the list of temporary files in the specified tablespace's
656 * pgsql_tmp directory
657 */
658Datum
659pg_ls_tmpdir_1arg(PG_FUNCTION_ARGS)
660{
661 return pg_ls_tmpdir(fcinfo, PG_GETARG_OID(0));
662}
663
664/*
665 * Function to return the list of files in the WAL archive status directory.
666 */
667Datum
668pg_ls_archive_statusdir(PG_FUNCTION_ARGS)
669{
670 return pg_ls_dir_files(fcinfo, XLOGDIR "/archive_status", true);
671}
672