1 | /* |
2 | * psql - the PostgreSQL interactive terminal |
3 | * |
4 | * Copyright (c) 2000-2019, PostgreSQL Global Development Group |
5 | * |
6 | * src/bin/psql/copy.c |
7 | */ |
8 | #include "postgres_fe.h" |
9 | #include "copy.h" |
10 | |
11 | #include <signal.h> |
12 | #include <sys/stat.h> |
13 | #ifndef WIN32 |
14 | #include <unistd.h> /* for isatty */ |
15 | #else |
16 | #include <io.h> /* I think */ |
17 | #endif |
18 | |
19 | #include "libpq-fe.h" |
20 | #include "pqexpbuffer.h" |
21 | |
22 | #include "settings.h" |
23 | #include "common.h" |
24 | #include "prompt.h" |
25 | #include "stringutils.h" |
26 | |
27 | #include "common/logging.h" |
28 | |
29 | /* |
30 | * parse_slash_copy |
31 | * -- parses \copy command line |
32 | * |
33 | * The documented syntax is: |
34 | * \copy tablename [(columnlist)] from|to filename [options] |
35 | * \copy ( query stmt ) to filename [options] |
36 | * |
37 | * where 'filename' can be one of the following: |
38 | * '<file path>' | PROGRAM '<command>' | stdin | stdout | pstdout | pstdout |
39 | * and 'query' can be one of the following: |
40 | * SELECT | UPDATE | INSERT | DELETE |
41 | * |
42 | * An undocumented fact is that you can still write BINARY before the |
43 | * tablename; this is a hangover from the pre-7.3 syntax. The options |
44 | * syntax varies across backend versions, but we avoid all that mess |
45 | * by just transmitting the stuff after the filename literally. |
46 | * |
47 | * table name can be double-quoted and can have a schema part. |
48 | * column names can be double-quoted. |
49 | * filename can be single-quoted like SQL literals. |
50 | * command must be single-quoted like SQL literals. |
51 | * |
52 | * returns a malloc'ed structure with the options, or NULL on parsing error |
53 | */ |
54 | |
55 | struct copy_options |
56 | { |
57 | char *before_tofrom; /* COPY string before TO/FROM */ |
58 | char *after_tofrom; /* COPY string after TO/FROM filename */ |
59 | char *file; /* NULL = stdin/stdout */ |
60 | bool program; /* is 'file' a program to popen? */ |
61 | bool psql_inout; /* true = use psql stdin/stdout */ |
62 | bool from; /* true = FROM, false = TO */ |
63 | }; |
64 | |
65 | |
66 | static void |
67 | free_copy_options(struct copy_options *ptr) |
68 | { |
69 | if (!ptr) |
70 | return; |
71 | free(ptr->before_tofrom); |
72 | free(ptr->after_tofrom); |
73 | free(ptr->file); |
74 | free(ptr); |
75 | } |
76 | |
77 | |
78 | /* concatenate "more" onto "var", freeing the original value of *var */ |
79 | static void |
80 | xstrcat(char **var, const char *more) |
81 | { |
82 | char *newvar; |
83 | |
84 | newvar = psprintf("%s%s" , *var, more); |
85 | free(*var); |
86 | *var = newvar; |
87 | } |
88 | |
89 | |
90 | static struct copy_options * |
91 | parse_slash_copy(const char *args) |
92 | { |
93 | struct copy_options *result; |
94 | char *token; |
95 | const char *whitespace = " \t\n\r" ; |
96 | char nonstd_backslash = standard_strings() ? 0 : '\\'; |
97 | |
98 | if (!args) |
99 | { |
100 | pg_log_error("\\copy: arguments required" ); |
101 | return NULL; |
102 | } |
103 | |
104 | result = pg_malloc0(sizeof(struct copy_options)); |
105 | |
106 | result->before_tofrom = pg_strdup("" ); /* initialize for appending */ |
107 | |
108 | token = strtokx(args, whitespace, ".,()" , "\"" , |
109 | 0, false, false, pset.encoding); |
110 | if (!token) |
111 | goto error; |
112 | |
113 | /* The following can be removed when we drop 7.3 syntax support */ |
114 | if (pg_strcasecmp(token, "binary" ) == 0) |
115 | { |
116 | xstrcat(&result->before_tofrom, token); |
117 | token = strtokx(NULL, whitespace, ".,()" , "\"" , |
118 | 0, false, false, pset.encoding); |
119 | if (!token) |
120 | goto error; |
121 | } |
122 | |
123 | /* Handle COPY (query) case */ |
124 | if (token[0] == '(') |
125 | { |
126 | int parens = 1; |
127 | |
128 | while (parens > 0) |
129 | { |
130 | xstrcat(&result->before_tofrom, " " ); |
131 | xstrcat(&result->before_tofrom, token); |
132 | token = strtokx(NULL, whitespace, "()" , "\"'" , |
133 | nonstd_backslash, true, false, pset.encoding); |
134 | if (!token) |
135 | goto error; |
136 | if (token[0] == '(') |
137 | parens++; |
138 | else if (token[0] == ')') |
139 | parens--; |
140 | } |
141 | } |
142 | |
143 | xstrcat(&result->before_tofrom, " " ); |
144 | xstrcat(&result->before_tofrom, token); |
145 | token = strtokx(NULL, whitespace, ".,()" , "\"" , |
146 | 0, false, false, pset.encoding); |
147 | if (!token) |
148 | goto error; |
149 | |
150 | /* |
151 | * strtokx() will not have returned a multi-character token starting with |
152 | * '.', so we don't need strcmp() here. Likewise for '(', etc, below. |
153 | */ |
154 | if (token[0] == '.') |
155 | { |
156 | /* handle schema . table */ |
157 | xstrcat(&result->before_tofrom, token); |
158 | token = strtokx(NULL, whitespace, ".,()" , "\"" , |
159 | 0, false, false, pset.encoding); |
160 | if (!token) |
161 | goto error; |
162 | xstrcat(&result->before_tofrom, token); |
163 | token = strtokx(NULL, whitespace, ".,()" , "\"" , |
164 | 0, false, false, pset.encoding); |
165 | if (!token) |
166 | goto error; |
167 | } |
168 | |
169 | if (token[0] == '(') |
170 | { |
171 | /* handle parenthesized column list */ |
172 | for (;;) |
173 | { |
174 | xstrcat(&result->before_tofrom, " " ); |
175 | xstrcat(&result->before_tofrom, token); |
176 | token = strtokx(NULL, whitespace, "()" , "\"" , |
177 | 0, false, false, pset.encoding); |
178 | if (!token) |
179 | goto error; |
180 | if (token[0] == ')') |
181 | break; |
182 | } |
183 | xstrcat(&result->before_tofrom, " " ); |
184 | xstrcat(&result->before_tofrom, token); |
185 | token = strtokx(NULL, whitespace, ".,()" , "\"" , |
186 | 0, false, false, pset.encoding); |
187 | if (!token) |
188 | goto error; |
189 | } |
190 | |
191 | if (pg_strcasecmp(token, "from" ) == 0) |
192 | result->from = true; |
193 | else if (pg_strcasecmp(token, "to" ) == 0) |
194 | result->from = false; |
195 | else |
196 | goto error; |
197 | |
198 | /* { 'filename' | PROGRAM 'command' | STDIN | STDOUT | PSTDIN | PSTDOUT } */ |
199 | token = strtokx(NULL, whitespace, ";" , "'" , |
200 | 0, false, false, pset.encoding); |
201 | if (!token) |
202 | goto error; |
203 | |
204 | if (pg_strcasecmp(token, "program" ) == 0) |
205 | { |
206 | int toklen; |
207 | |
208 | token = strtokx(NULL, whitespace, ";" , "'" , |
209 | 0, false, false, pset.encoding); |
210 | if (!token) |
211 | goto error; |
212 | |
213 | /* |
214 | * The shell command must be quoted. This isn't fool-proof, but |
215 | * catches most quoting errors. |
216 | */ |
217 | toklen = strlen(token); |
218 | if (token[0] != '\'' || toklen < 2 || token[toklen - 1] != '\'') |
219 | goto error; |
220 | |
221 | strip_quotes(token, '\'', 0, pset.encoding); |
222 | |
223 | result->program = true; |
224 | result->file = pg_strdup(token); |
225 | } |
226 | else if (pg_strcasecmp(token, "stdin" ) == 0 || |
227 | pg_strcasecmp(token, "stdout" ) == 0) |
228 | { |
229 | result->file = NULL; |
230 | } |
231 | else if (pg_strcasecmp(token, "pstdin" ) == 0 || |
232 | pg_strcasecmp(token, "pstdout" ) == 0) |
233 | { |
234 | result->psql_inout = true; |
235 | result->file = NULL; |
236 | } |
237 | else |
238 | { |
239 | /* filename can be optionally quoted */ |
240 | strip_quotes(token, '\'', 0, pset.encoding); |
241 | result->file = pg_strdup(token); |
242 | expand_tilde(&result->file); |
243 | } |
244 | |
245 | /* Collect the rest of the line (COPY options) */ |
246 | token = strtokx(NULL, "" , NULL, NULL, |
247 | 0, false, false, pset.encoding); |
248 | if (token) |
249 | result->after_tofrom = pg_strdup(token); |
250 | |
251 | return result; |
252 | |
253 | error: |
254 | if (token) |
255 | pg_log_error("\\copy: parse error at \"%s\"" , token); |
256 | else |
257 | pg_log_error("\\copy: parse error at end of line" ); |
258 | free_copy_options(result); |
259 | |
260 | return NULL; |
261 | } |
262 | |
263 | |
264 | /* |
265 | * Execute a \copy command (frontend copy). We have to open a file (or execute |
266 | * a command), then submit a COPY query to the backend and either feed it data |
267 | * from the file or route its response into the file. |
268 | */ |
269 | bool |
270 | do_copy(const char *args) |
271 | { |
272 | PQExpBufferData query; |
273 | FILE *copystream; |
274 | struct copy_options *options; |
275 | bool success; |
276 | |
277 | /* parse options */ |
278 | options = parse_slash_copy(args); |
279 | |
280 | if (!options) |
281 | return false; |
282 | |
283 | /* prepare to read or write the target file */ |
284 | if (options->file && !options->program) |
285 | canonicalize_path(options->file); |
286 | |
287 | if (options->from) |
288 | { |
289 | if (options->file) |
290 | { |
291 | if (options->program) |
292 | { |
293 | fflush(stdout); |
294 | fflush(stderr); |
295 | errno = 0; |
296 | copystream = popen(options->file, PG_BINARY_R); |
297 | } |
298 | else |
299 | copystream = fopen(options->file, PG_BINARY_R); |
300 | } |
301 | else if (!options->psql_inout) |
302 | copystream = pset.cur_cmd_source; |
303 | else |
304 | copystream = stdin; |
305 | } |
306 | else |
307 | { |
308 | if (options->file) |
309 | { |
310 | if (options->program) |
311 | { |
312 | fflush(stdout); |
313 | fflush(stderr); |
314 | errno = 0; |
315 | disable_sigpipe_trap(); |
316 | copystream = popen(options->file, PG_BINARY_W); |
317 | } |
318 | else |
319 | copystream = fopen(options->file, PG_BINARY_W); |
320 | } |
321 | else if (!options->psql_inout) |
322 | copystream = pset.queryFout; |
323 | else |
324 | copystream = stdout; |
325 | } |
326 | |
327 | if (!copystream) |
328 | { |
329 | if (options->program) |
330 | pg_log_error("could not execute command \"%s\": %m" , |
331 | options->file); |
332 | else |
333 | pg_log_error("%s: %m" , |
334 | options->file); |
335 | free_copy_options(options); |
336 | return false; |
337 | } |
338 | |
339 | if (!options->program) |
340 | { |
341 | struct stat st; |
342 | int result; |
343 | |
344 | /* make sure the specified file is not a directory */ |
345 | if ((result = fstat(fileno(copystream), &st)) < 0) |
346 | pg_log_error("could not stat file \"%s\": %m" , |
347 | options->file); |
348 | |
349 | if (result == 0 && S_ISDIR(st.st_mode)) |
350 | pg_log_error("%s: cannot copy from/to a directory" , |
351 | options->file); |
352 | |
353 | if (result < 0 || S_ISDIR(st.st_mode)) |
354 | { |
355 | fclose(copystream); |
356 | free_copy_options(options); |
357 | return false; |
358 | } |
359 | } |
360 | |
361 | /* build the command we will send to the backend */ |
362 | initPQExpBuffer(&query); |
363 | printfPQExpBuffer(&query, "COPY " ); |
364 | appendPQExpBufferStr(&query, options->before_tofrom); |
365 | if (options->from) |
366 | appendPQExpBufferStr(&query, " FROM STDIN " ); |
367 | else |
368 | appendPQExpBufferStr(&query, " TO STDOUT " ); |
369 | if (options->after_tofrom) |
370 | appendPQExpBufferStr(&query, options->after_tofrom); |
371 | |
372 | /* run it like a user command, but with copystream as data source/sink */ |
373 | pset.copyStream = copystream; |
374 | success = SendQuery(query.data); |
375 | pset.copyStream = NULL; |
376 | termPQExpBuffer(&query); |
377 | |
378 | if (options->file != NULL) |
379 | { |
380 | if (options->program) |
381 | { |
382 | int pclose_rc = pclose(copystream); |
383 | |
384 | if (pclose_rc != 0) |
385 | { |
386 | if (pclose_rc < 0) |
387 | pg_log_error("could not close pipe to external command: %m" ); |
388 | else |
389 | { |
390 | char *reason = wait_result_to_str(pclose_rc); |
391 | |
392 | pg_log_error("%s: %s" , options->file, |
393 | reason ? reason : "" ); |
394 | if (reason) |
395 | free(reason); |
396 | } |
397 | success = false; |
398 | } |
399 | restore_sigpipe_trap(); |
400 | } |
401 | else |
402 | { |
403 | if (fclose(copystream) != 0) |
404 | { |
405 | pg_log_error("%s: %m" , options->file); |
406 | success = false; |
407 | } |
408 | } |
409 | } |
410 | free_copy_options(options); |
411 | return success; |
412 | } |
413 | |
414 | |
415 | /* |
416 | * Functions for handling COPY IN/OUT data transfer. |
417 | * |
418 | * If you want to use COPY TO STDOUT/FROM STDIN in your application, |
419 | * this is the code to steal ;) |
420 | */ |
421 | |
422 | /* |
423 | * handleCopyOut |
424 | * receives data as a result of a COPY ... TO STDOUT command |
425 | * |
426 | * conn should be a database connection that you just issued COPY TO on |
427 | * and got back a PGRES_COPY_OUT result. |
428 | * |
429 | * copystream is the file stream for the data to go to. |
430 | * copystream can be NULL to eat the data without writing it anywhere. |
431 | * |
432 | * The final status for the COPY is returned into *res (but note |
433 | * we already reported the error, if it's not a success result). |
434 | * |
435 | * result is true if successful, false if not. |
436 | */ |
437 | bool |
438 | handleCopyOut(PGconn *conn, FILE *copystream, PGresult **res) |
439 | { |
440 | bool OK = true; |
441 | char *buf; |
442 | int ret; |
443 | |
444 | for (;;) |
445 | { |
446 | ret = PQgetCopyData(conn, &buf, 0); |
447 | |
448 | if (ret < 0) |
449 | break; /* done or server/connection error */ |
450 | |
451 | if (buf) |
452 | { |
453 | if (OK && copystream && fwrite(buf, 1, ret, copystream) != ret) |
454 | { |
455 | pg_log_error("could not write COPY data: %m" ); |
456 | /* complain only once, keep reading data from server */ |
457 | OK = false; |
458 | } |
459 | PQfreemem(buf); |
460 | } |
461 | } |
462 | |
463 | if (OK && copystream && fflush(copystream)) |
464 | { |
465 | pg_log_error("could not write COPY data: %m" ); |
466 | OK = false; |
467 | } |
468 | |
469 | if (ret == -2) |
470 | { |
471 | pg_log_error("COPY data transfer failed: %s" , PQerrorMessage(conn)); |
472 | OK = false; |
473 | } |
474 | |
475 | /* |
476 | * Check command status and return to normal libpq state. |
477 | * |
478 | * If for some reason libpq is still reporting PGRES_COPY_OUT state, we |
479 | * would like to forcibly exit that state, since our caller would be |
480 | * unable to distinguish that situation from reaching the next COPY in a |
481 | * command string that happened to contain two consecutive COPY TO STDOUT |
482 | * commands. However, libpq provides no API for doing that, and in |
483 | * principle it's a libpq bug anyway if PQgetCopyData() returns -1 or -2 |
484 | * but hasn't exited COPY_OUT state internally. So we ignore the |
485 | * possibility here. |
486 | */ |
487 | *res = PQgetResult(conn); |
488 | if (PQresultStatus(*res) != PGRES_COMMAND_OK) |
489 | { |
490 | pg_log_info("%s" , PQerrorMessage(conn)); |
491 | OK = false; |
492 | } |
493 | |
494 | return OK; |
495 | } |
496 | |
497 | /* |
498 | * handleCopyIn |
499 | * sends data to complete a COPY ... FROM STDIN command |
500 | * |
501 | * conn should be a database connection that you just issued COPY FROM on |
502 | * and got back a PGRES_COPY_IN result. |
503 | * copystream is the file stream to read the data from. |
504 | * isbinary can be set from PQbinaryTuples(). |
505 | * The final status for the COPY is returned into *res (but note |
506 | * we already reported the error, if it's not a success result). |
507 | * |
508 | * result is true if successful, false if not. |
509 | */ |
510 | |
511 | /* read chunk size for COPY IN - size is not critical */ |
512 | #define COPYBUFSIZ 8192 |
513 | |
514 | bool |
515 | handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res) |
516 | { |
517 | bool OK; |
518 | char buf[COPYBUFSIZ]; |
519 | bool showprompt; |
520 | |
521 | /* |
522 | * Establish longjmp destination for exiting from wait-for-input. (This is |
523 | * only effective while sigint_interrupt_enabled is TRUE.) |
524 | */ |
525 | if (sigsetjmp(sigint_interrupt_jmp, 1) != 0) |
526 | { |
527 | /* got here with longjmp */ |
528 | |
529 | /* Terminate data transfer */ |
530 | PQputCopyEnd(conn, |
531 | (PQprotocolVersion(conn) < 3) ? NULL : |
532 | _("canceled by user" )); |
533 | |
534 | OK = false; |
535 | goto copyin_cleanup; |
536 | } |
537 | |
538 | /* Prompt if interactive input */ |
539 | if (isatty(fileno(copystream))) |
540 | { |
541 | showprompt = true; |
542 | if (!pset.quiet) |
543 | puts(_("Enter data to be copied followed by a newline.\n" |
544 | "End with a backslash and a period on a line by itself, or an EOF signal." )); |
545 | } |
546 | else |
547 | showprompt = false; |
548 | |
549 | OK = true; |
550 | |
551 | if (isbinary) |
552 | { |
553 | /* interactive input probably silly, but give one prompt anyway */ |
554 | if (showprompt) |
555 | { |
556 | const char *prompt = get_prompt(PROMPT_COPY, NULL); |
557 | |
558 | fputs(prompt, stdout); |
559 | fflush(stdout); |
560 | } |
561 | |
562 | for (;;) |
563 | { |
564 | int buflen; |
565 | |
566 | /* enable longjmp while waiting for input */ |
567 | sigint_interrupt_enabled = true; |
568 | |
569 | buflen = fread(buf, 1, COPYBUFSIZ, copystream); |
570 | |
571 | sigint_interrupt_enabled = false; |
572 | |
573 | if (buflen <= 0) |
574 | break; |
575 | |
576 | if (PQputCopyData(conn, buf, buflen) <= 0) |
577 | { |
578 | OK = false; |
579 | break; |
580 | } |
581 | } |
582 | } |
583 | else |
584 | { |
585 | bool copydone = false; |
586 | |
587 | while (!copydone) |
588 | { /* for each input line ... */ |
589 | bool firstload; |
590 | bool linedone; |
591 | |
592 | if (showprompt) |
593 | { |
594 | const char *prompt = get_prompt(PROMPT_COPY, NULL); |
595 | |
596 | fputs(prompt, stdout); |
597 | fflush(stdout); |
598 | } |
599 | |
600 | firstload = true; |
601 | linedone = false; |
602 | |
603 | while (!linedone) |
604 | { /* for each bufferload in line ... */ |
605 | int linelen; |
606 | char *fgresult; |
607 | |
608 | /* enable longjmp while waiting for input */ |
609 | sigint_interrupt_enabled = true; |
610 | |
611 | fgresult = fgets(buf, sizeof(buf), copystream); |
612 | |
613 | sigint_interrupt_enabled = false; |
614 | |
615 | if (!fgresult) |
616 | { |
617 | copydone = true; |
618 | break; |
619 | } |
620 | |
621 | linelen = strlen(buf); |
622 | |
623 | /* current line is done? */ |
624 | if (linelen > 0 && buf[linelen - 1] == '\n') |
625 | linedone = true; |
626 | |
627 | /* check for EOF marker, but not on a partial line */ |
628 | if (firstload) |
629 | { |
630 | /* |
631 | * This code erroneously assumes '\.' on a line alone |
632 | * inside a quoted CSV string terminates the \copy. |
633 | * http://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org |
634 | */ |
635 | if (strcmp(buf, "\\.\n" ) == 0 || |
636 | strcmp(buf, "\\.\r\n" ) == 0) |
637 | { |
638 | copydone = true; |
639 | break; |
640 | } |
641 | |
642 | firstload = false; |
643 | } |
644 | |
645 | if (PQputCopyData(conn, buf, linelen) <= 0) |
646 | { |
647 | OK = false; |
648 | copydone = true; |
649 | break; |
650 | } |
651 | } |
652 | |
653 | if (copystream == pset.cur_cmd_source) |
654 | { |
655 | pset.lineno++; |
656 | pset.stmt_lineno++; |
657 | } |
658 | } |
659 | } |
660 | |
661 | /* Check for read error */ |
662 | if (ferror(copystream)) |
663 | OK = false; |
664 | |
665 | /* |
666 | * Terminate data transfer. We can't send an error message if we're using |
667 | * protocol version 2. |
668 | */ |
669 | if (PQputCopyEnd(conn, |
670 | (OK || PQprotocolVersion(conn) < 3) ? NULL : |
671 | _("aborted because of read failure" )) <= 0) |
672 | OK = false; |
673 | |
674 | copyin_cleanup: |
675 | |
676 | /* |
677 | * Clear the EOF flag on the stream, in case copying ended due to an EOF |
678 | * signal. This allows an interactive TTY session to perform another COPY |
679 | * FROM STDIN later. (In non-STDIN cases, we're about to close the file |
680 | * anyway, so it doesn't matter.) Although we don't ever test the flag |
681 | * with feof(), some fread() implementations won't read more data if it's |
682 | * set. This also clears the error flag, but we already checked that. |
683 | */ |
684 | clearerr(copystream); |
685 | |
686 | /* |
687 | * Check command status and return to normal libpq state. |
688 | * |
689 | * We do not want to return with the status still PGRES_COPY_IN: our |
690 | * caller would be unable to distinguish that situation from reaching the |
691 | * next COPY in a command string that happened to contain two consecutive |
692 | * COPY FROM STDIN commands. We keep trying PQputCopyEnd() in the hope |
693 | * it'll work eventually. (What's actually likely to happen is that in |
694 | * attempting to flush the data, libpq will eventually realize that the |
695 | * connection is lost. But that's fine; it will get us out of COPY_IN |
696 | * state, which is what we need.) |
697 | */ |
698 | while (*res = PQgetResult(conn), PQresultStatus(*res) == PGRES_COPY_IN) |
699 | { |
700 | OK = false; |
701 | PQclear(*res); |
702 | /* We can't send an error message if we're using protocol version 2 */ |
703 | PQputCopyEnd(conn, |
704 | (PQprotocolVersion(conn) < 3) ? NULL : |
705 | _("trying to exit copy mode" )); |
706 | } |
707 | if (PQresultStatus(*res) != PGRES_COMMAND_OK) |
708 | { |
709 | pg_log_info("%s" , PQerrorMessage(conn)); |
710 | OK = false; |
711 | } |
712 | |
713 | return OK; |
714 | } |
715 | |