1/*
2 * Copyright 2012-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * Subprocess library, modeled after Python's subprocess module
18 * (http://docs.python.org/2/library/subprocess.html)
19 *
20 * This library defines one class (Subprocess) which represents a child
21 * process. Subprocess has two constructors: one that takes a vector<string>
22 * and executes the given executable without using the shell, and one
23 * that takes a string and executes the given command using the shell.
24 * Subprocess allows you to redirect the child's standard input, standard
25 * output, and standard error to/from child descriptors in the parent,
26 * or to create communication pipes between the child and the parent.
27 *
28 * The simplest example is a thread-safe [1] version of the system() library
29 * function:
30 * Subprocess(cmd).wait();
31 * which executes the command using the default shell and waits for it
32 * to complete, returning the exit status.
33 *
34 * A thread-safe [1] version of popen() (type="r", to read from the child):
35 * Subprocess proc(cmd, Subprocess::Options().pipeStdout());
36 * // read from proc.stdoutFd()
37 * proc.wait();
38 *
39 * A thread-safe [1] version of popen() (type="w", to write to the child):
40 * Subprocess proc(cmd, Subprocess::Options().pipeStdin());
41 * // write to proc.stdinFd()
42 * proc.wait();
43 *
44 * If you want to redirect both stdin and stdout to pipes, you can, but note
45 * that you're subject to a variety of deadlocks. You'll want to use
46 * nonblocking I/O, like the callback version of communicate().
47 *
48 * The string or IOBuf-based variants of communicate() are the simplest way
49 * to communicate with a child via its standard input, standard output, and
50 * standard error. They buffer everything in memory, so they are not great
51 * for large amounts of data (or long-running processes), but they are much
52 * simpler than the callback version.
53 *
54 * == A note on thread-safety ==
55 *
56 * [1] "thread-safe" refers ONLY to the fact that Subprocess is very careful
57 * to fork in a way that does not cause grief in multithreaded programs.
58 *
59 * Caveat: If your system does not have the atomic pipe2 system call, it is
60 * not safe to concurrently call Subprocess from different threads.
61 * Therefore, it is best to have a single thread be responsible for spawning
62 * subprocesses.
63 *
64 * A particular instances of Subprocess is emphatically **not** thread-safe.
65 * If you need to simultaneously communicate via the pipes, and interact
66 * with the Subprocess state, your best bet is to:
67 * - takeOwnershipOfPipes() to separate the pipe I/O from the subprocess.
68 * - Only interact with the Subprocess from one thread at a time.
69 *
70 * The current implementation of communicate() cannot be safely interrupted.
71 * To do so correctly, one would need to use EventFD, or open a dedicated
72 * pipe to be messaged from a different thread -- in particular, kill() will
73 * not do, since a descendant may keep the pipes open indefinitely.
74 *
75 * So, once you call communicate(), you must wait for it to return, and not
76 * touch the pipes from other threads. closeParentFd() is emphatically
77 * unsafe to call concurrently, and even sendSignal() is not a good idea.
78 * You can perhaps give the Subprocess's PID to a different thread before
79 * starting communicate(), and use that PID to send a signal without
80 * accessing the Subprocess object. In that case, you will need a mutex
81 * that ensures you don't wait() before you sent said signal. In a
82 * nutshell, don't do this.
83 *
84 * In fact, signals are inherently concurrency-unsafe on Unix: if you signal
85 * a PID, while another thread is in waitpid(), the signal may fire either
86 * before or after the process is reaped. This means that your signal can,
87 * in pathological circumstances, be delivered to the wrong process (ouch!).
88 * To avoid this, you should only use non-blocking waits (i.e. poll()), and
89 * make sure to serialize your signals (i.e. kill()) with the waits --
90 * either wait & signal from the same thread, or use a mutex.
91 */
92
93#pragma once
94
95#include <signal.h>
96#include <sys/types.h>
97
98#if __APPLE__
99#include <sys/wait.h>
100#else
101#include <wait.h>
102#endif
103
104#include <exception>
105#include <string>
106#include <vector>
107
108#include <boost/container/flat_map.hpp>
109#include <boost/operators.hpp>
110
111#include <folly/Exception.h>
112#include <folly/File.h>
113#include <folly/FileUtil.h>
114#include <folly/Function.h>
115#include <folly/MapUtil.h>
116#include <folly/Optional.h>
117#include <folly/Portability.h>
118#include <folly/Range.h>
119#include <folly/gen/String.h>
120#include <folly/io/IOBufQueue.h>
121#include <folly/portability/SysResource.h>
122
123namespace folly {
124
125/**
126 * Class to wrap a process return code.
127 */
128class Subprocess;
129class ProcessReturnCode {
130 public:
131 enum State {
132 // Subprocess starts in the constructor, so this state designates only
133 // default-initialized or moved-out ProcessReturnCodes.
134 NOT_STARTED,
135 RUNNING,
136 EXITED,
137 KILLED,
138 };
139
140 static ProcessReturnCode makeNotStarted() {
141 return ProcessReturnCode(RV_NOT_STARTED);
142 }
143
144 static ProcessReturnCode makeRunning() {
145 return ProcessReturnCode(RV_RUNNING);
146 }
147
148 static ProcessReturnCode make(int status);
149
150 // Default-initialized for convenience. Subprocess::returnCode() will
151 // never produce this value.
152 ProcessReturnCode() : rawStatus_(RV_NOT_STARTED) {}
153
154 // Trivially copyable
155 ProcessReturnCode(const ProcessReturnCode& p) = default;
156 ProcessReturnCode& operator=(const ProcessReturnCode& p) = default;
157 // Non-default move: In order for Subprocess to be movable, the "moved
158 // out" state must not be "running", or ~Subprocess() will abort.
159 ProcessReturnCode(ProcessReturnCode&& p) noexcept;
160 ProcessReturnCode& operator=(ProcessReturnCode&& p) noexcept;
161
162 /**
163 * Process state. One of:
164 * NOT_STARTED: process hasn't been started successfully
165 * RUNNING: process is currently running
166 * EXITED: process exited (successfully or not)
167 * KILLED: process was killed by a signal.
168 */
169 State state() const;
170
171 /**
172 * Helper wrappers around state().
173 */
174 bool notStarted() const {
175 return state() == NOT_STARTED;
176 }
177 bool running() const {
178 return state() == RUNNING;
179 }
180 bool exited() const {
181 return state() == EXITED;
182 }
183 bool killed() const {
184 return state() == KILLED;
185 }
186
187 /**
188 * Exit status. Only valid if state() == EXITED; throws otherwise.
189 */
190 int exitStatus() const;
191
192 /**
193 * Signal that caused the process's termination. Only valid if
194 * state() == KILLED; throws otherwise.
195 */
196 int killSignal() const;
197
198 /**
199 * Was a core file generated? Only valid if state() == KILLED; throws
200 * otherwise.
201 */
202 bool coreDumped() const;
203
204 /**
205 * String representation; one of
206 * "not started"
207 * "running"
208 * "exited with status <status>"
209 * "killed by signal <signal>"
210 * "killed by signal <signal> (core dumped)"
211 */
212 std::string str() const;
213
214 /**
215 * Helper function to enforce a precondition based on this.
216 * Throws std::logic_error if in an unexpected state.
217 */
218 void enforce(State state) const;
219
220 private:
221 explicit ProcessReturnCode(int rv) : rawStatus_(rv) {}
222 static constexpr int RV_NOT_STARTED = -2;
223 static constexpr int RV_RUNNING = -1;
224
225 int rawStatus_;
226};
227
228/**
229 * Base exception thrown by the Subprocess methods.
230 */
231class FOLLY_EXPORT SubprocessError : public std::runtime_error {
232 public:
233 using std::runtime_error::runtime_error;
234};
235
236/**
237 * Exception thrown by *Checked methods of Subprocess.
238 */
239class FOLLY_EXPORT CalledProcessError : public SubprocessError {
240 public:
241 explicit CalledProcessError(ProcessReturnCode rc);
242 ~CalledProcessError() throw() override = default;
243 ProcessReturnCode returnCode() const {
244 return returnCode_;
245 }
246
247 private:
248 ProcessReturnCode returnCode_;
249};
250
251/**
252 * Exception thrown if the subprocess cannot be started.
253 */
254class FOLLY_EXPORT SubprocessSpawnError : public SubprocessError {
255 public:
256 SubprocessSpawnError(const char* executable, int errCode, int errnoValue);
257 ~SubprocessSpawnError() throw() override = default;
258 int errnoValue() const {
259 return errnoValue_;
260 }
261
262 private:
263 int errnoValue_;
264};
265
266/**
267 * Subprocess.
268 */
269class Subprocess {
270 public:
271 static const int CLOSE = -1;
272 static const int PIPE = -2;
273 static const int PIPE_IN = -3;
274 static const int PIPE_OUT = -4;
275
276 /**
277 * See Subprocess::Options::dangerousPostForkPreExecCallback() for usage.
278 * Every derived class should include the following warning:
279 *
280 * DANGER: This class runs after fork in a child processes. Be fast, the
281 * parent thread is waiting, but remember that other parent threads are
282 * running and may mutate your state. Avoid mutating any data belonging to
283 * the parent. Avoid interacting with non-POD data that originated in the
284 * parent. Avoid any libraries that may internally reference non-POD data.
285 * Especially beware parent mutexes -- for example, glog's LOG() uses one.
286 */
287 struct DangerousPostForkPreExecCallback {
288 virtual ~DangerousPostForkPreExecCallback() {}
289 // This must return 0 on success, or an `errno` error code.
290 virtual int operator()() = 0;
291 };
292
293 /**
294 * Class representing various options: file descriptor behavior, and
295 * whether to use $PATH for searching for the executable,
296 *
297 * By default, we don't use $PATH, file descriptors are closed if
298 * the close-on-exec flag is set (fcntl FD_CLOEXEC) and inherited
299 * otherwise.
300 */
301 class Options {
302 friend class Subprocess;
303
304 public:
305 Options() {} // E.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58328
306
307 /**
308 * Change action for file descriptor fd.
309 *
310 * "action" may be another file descriptor number (dup2()ed before the
311 * child execs), or one of CLOSE, PIPE_IN, and PIPE_OUT.
312 *
313 * CLOSE: close the file descriptor in the child
314 * PIPE_IN: open a pipe *from* the child
315 * PIPE_OUT: open a pipe *to* the child
316 *
317 * PIPE is a shortcut; same as PIPE_IN for stdin (fd 0), same as
318 * PIPE_OUT for stdout (fd 1) or stderr (fd 2), and an error for
319 * other file descriptors.
320 */
321 Options& fd(int fd, int action);
322
323 /**
324 * Shortcut to change the action for standard input.
325 */
326 Options& stdinFd(int action) {
327 return fd(STDIN_FILENO, action);
328 }
329
330 /**
331 * Shortcut to change the action for standard output.
332 */
333 Options& stdoutFd(int action) {
334 return fd(STDOUT_FILENO, action);
335 }
336
337 /**
338 * Shortcut to change the action for standard error.
339 * Note that stderr(1) will redirect the standard error to the same
340 * file descriptor as standard output; the equivalent of bash's "2>&1"
341 */
342 Options& stderrFd(int action) {
343 return fd(STDERR_FILENO, action);
344 }
345
346 Options& pipeStdin() {
347 return fd(STDIN_FILENO, PIPE_IN);
348 }
349 Options& pipeStdout() {
350 return fd(STDOUT_FILENO, PIPE_OUT);
351 }
352 Options& pipeStderr() {
353 return fd(STDERR_FILENO, PIPE_OUT);
354 }
355
356 /**
357 * Close all other fds (other than standard input, output, error,
358 * and file descriptors explicitly specified with fd()).
359 *
360 * This is potentially slow; it's generally a better idea to
361 * set the close-on-exec flag on all file descriptors that shouldn't
362 * be inherited by the child.
363 *
364 * Even with this option set, standard input, output, and error are
365 * not closed; use stdin(CLOSE), stdout(CLOSE), stderr(CLOSE) if you
366 * desire this.
367 */
368 Options& closeOtherFds() {
369 closeOtherFds_ = true;
370 return *this;
371 }
372
373 /**
374 * Use the search path ($PATH) when searching for the executable.
375 */
376 Options& usePath() {
377 usePath_ = true;
378 return *this;
379 }
380
381 /**
382 * Change the child's working directory, after the vfork.
383 */
384 Options& chdir(const std::string& dir) {
385 childDir_ = dir;
386 return *this;
387 }
388
389#if __linux__
390 /**
391 * Child will receive a signal when the parent exits.
392 */
393 Options& parentDeathSignal(int sig) {
394 parentDeathSignal_ = sig;
395 return *this;
396 }
397#endif
398
399 /**
400 * Child will be made a process group leader when it starts. Upside: one
401 * can reliably kill all its non-daemonizing descendants. Downside: the
402 * child will not receive Ctrl-C etc during interactive use.
403 */
404 Options& processGroupLeader() {
405 processGroupLeader_ = true;
406 return *this;
407 }
408
409 /**
410 * Detach the spawned process, to allow destroying the Subprocess object
411 * without waiting for the child process to finish.
412 *
413 * This causes the code to fork twice before executing the command.
414 * The intermediate child process will exit immediately, causing the process
415 * running the executable to be reparented to init (pid 1).
416 *
417 * Subprocess objects created with detach() enabled will already be in an
418 * "EXITED" state when the constructor returns. The caller should not call
419 * wait() or poll() on the Subprocess, and pid() will return -1.
420 */
421 Options& detach() {
422 detach_ = true;
423 return *this;
424 }
425
426 /**
427 * *** READ THIS WHOLE DOCBLOCK BEFORE USING ***
428 *
429 * Run this callback in the child after the fork, just before the
430 * exec(), and after the child's state has been completely set up:
431 * - signal handlers have been reset to default handling and unblocked
432 * - the working directory was set
433 * - closed any file descriptors specified via Options()
434 * - set child process flags (see code)
435 *
436 * This is EXTREMELY DANGEROUS. For example, this innocuous-looking code
437 * can cause a fraction of your Subprocess launches to hang forever:
438 *
439 * LOG(INFO) << "Hello from the child";
440 *
441 * The reason is that glog has an internal mutex. If your fork() happens
442 * when the parent has the mutex locked, the child will wait forever.
443 *
444 * == GUIDELINES ==
445 *
446 * - Be quick -- the parent thread is blocked until you exit.
447 * - Remember that other parent threads are running, and may mutate your
448 * state.
449 * - Avoid mutating any data belonging to the parent.
450 * - Avoid interacting with non-POD data that came from the parent.
451 * - Avoid any libraries that may internally reference non-POD state.
452 * - Especially beware parent mutexes, e.g. LOG() uses a global mutex.
453 * - Avoid invoking the parent's destructors (you can accidentally
454 * delete files, terminate network connections, etc).
455 * - Read http://ewontfix.com/7/
456 */
457 Options& dangerousPostForkPreExecCallback(
458 DangerousPostForkPreExecCallback* cob) {
459 dangerousPostForkPreExecCallback_ = cob;
460 return *this;
461 }
462
463#if __linux__
464 /**
465 * This is an experimental feature, it is best you don't use it at this
466 * point of time.
467 * Although folly would support cloning with custom flags in some form, this
468 * API might change in the near future. So use the following assuming it is
469 * experimental. (Apr 11, 2017)
470 *
471 * This unlocks Subprocess to support clone flags, many of them need
472 * CAP_SYS_ADMIN permissions. It might also require you to go through the
473 * implementation to understand what happens before, between and after the
474 * fork-and-exec.
475 *
476 * `man 2 clone` would be a starting point for knowing about the available
477 * flags.
478 */
479 using clone_flags_t = uint64_t;
480 Options& useCloneWithFlags(clone_flags_t cloneFlags) noexcept {
481 cloneFlags_ = cloneFlags;
482 return *this;
483 }
484#endif
485
486 private:
487 typedef boost::container::flat_map<int, int> FdMap;
488 FdMap fdActions_;
489 bool closeOtherFds_{false};
490 bool usePath_{false};
491 bool processGroupLeader_{false};
492 bool detach_{false};
493 std::string childDir_; // "" keeps the parent's working directory
494#if __linux__
495 int parentDeathSignal_{0};
496#endif
497 DangerousPostForkPreExecCallback* dangerousPostForkPreExecCallback_{
498 nullptr};
499#if __linux__
500 // none means `vfork()` instead of a custom `clone()`
501 // Optional<> is used because value of '0' means do clone without any flags.
502 Optional<clone_flags_t> cloneFlags_;
503#endif
504 };
505
506 // Non-copiable, but movable
507 Subprocess(const Subprocess&) = delete;
508 Subprocess& operator=(const Subprocess&) = delete;
509 Subprocess(Subprocess&&) = default;
510 Subprocess& operator=(Subprocess&&) = default;
511
512 /**
513 * Create an uninitialized subprocess.
514 *
515 * In this state it can only be destroyed, or assigned to using the move
516 * assignment operator.
517 */
518 Subprocess();
519
520 /**
521 * Create a subprocess from the given arguments. argv[0] must be listed.
522 * If not-null, executable must be the actual executable
523 * being used (otherwise it's the same as argv[0]).
524 *
525 * If env is not-null, it must contain name=value strings to be used
526 * as the child's environment; otherwise, we inherit the environment
527 * from the parent. env must be null if options.usePath is set.
528 */
529 explicit Subprocess(
530 const std::vector<std::string>& argv,
531 const Options& options = Options(),
532 const char* executable = nullptr,
533 const std::vector<std::string>* env = nullptr);
534 ~Subprocess();
535
536 /**
537 * Create a subprocess run as a shell command (as shell -c 'command')
538 *
539 * The shell to use is taken from the environment variable $SHELL,
540 * or /bin/sh if $SHELL is unset.
541 */
542 // clang-format off
543 [[deprecated(
544 "Prefer not running in a shell or use `shellify`.")]]
545 explicit Subprocess(
546 const std::string& cmd,
547 const Options& options = Options(),
548 const std::vector<std::string>* env = nullptr);
549 // clang-format on
550
551 ////
552 //// The methods below only manipulate the process state, and do not
553 //// affect its communication pipes.
554 ////
555
556 /**
557 * Return the child's pid, or -1 if the child wasn't successfully spawned
558 * or has already been wait()ed upon.
559 */
560 pid_t pid() const;
561
562 /**
563 * Return the child's status (as per wait()) if the process has already
564 * been waited on, -1 if the process is still running, or -2 if the
565 * process hasn't been successfully started. NOTE that this does not call
566 * waitpid() or Subprocess::poll(), but simply returns the status stored
567 * in the Subprocess object.
568 */
569 ProcessReturnCode returnCode() const {
570 return returnCode_;
571 }
572
573 /**
574 * Poll the child's status and return it. Return the exit status if the
575 * subprocess had quit, or RUNNING otherwise. Throws an std::logic_error
576 * if called on a Subprocess whose status is no longer RUNNING. No other
577 * exceptions are possible. Aborts on egregious violations of contract,
578 * e.g. if you wait for the underlying process without going through this
579 * Subprocess instance.
580 */
581 ProcessReturnCode poll(struct rusage* ru = nullptr);
582
583 /**
584 * Poll the child's status. If the process is still running, return false.
585 * Otherwise, return true if the process exited with status 0 (success),
586 * or throw CalledProcessError if the process exited with a non-zero status.
587 */
588 bool pollChecked();
589
590 /**
591 * Wait for the process to terminate and return its status. Like poll(),
592 * the only exception this can throw is std::logic_error if you call this
593 * on a Subprocess whose status is RUNNING. Aborts on egregious
594 * violations of contract, like an out-of-band waitpid(p.pid(), 0, 0).
595 */
596 ProcessReturnCode wait();
597
598 /**
599 * Wait for the process to terminate, throw if unsuccessful.
600 */
601 void waitChecked();
602
603 /**
604 * Send a signal to the child. Shortcuts for the commonly used Unix
605 * signals are below.
606 */
607 void sendSignal(int signal);
608 void terminate() {
609 sendSignal(SIGTERM);
610 }
611 void kill() {
612 sendSignal(SIGKILL);
613 }
614
615 ////
616 //// The methods below only affect the process's communication pipes, but
617 //// not its return code or state (they do not poll() or wait()).
618 ////
619
620 /**
621 * Communicate with the child until all pipes to/from the child are closed.
622 *
623 * The input buffer is written to the process' stdin pipe, and data is read
624 * from the stdout and stderr pipes. Non-blocking I/O is performed on all
625 * pipes simultaneously to avoid deadlocks.
626 *
627 * The stdin pipe will be closed after the full input buffer has been written.
628 * An error will be thrown if a non-empty input buffer is supplied but stdin
629 * was not configured as a pipe.
630 *
631 * Returns a pair of buffers containing the data read from stdout and stderr.
632 * If stdout or stderr is not a pipe, an empty IOBuf queue will be returned
633 * for the respective buffer.
634 *
635 * Note that communicate() and communicateIOBuf() both return when all
636 * pipes to/from the child are closed; the child might stay alive after
637 * that, so you must still wait().
638 *
639 * communicateIOBuf() uses IOBufQueue for buffering (which has the
640 * advantage that it won't try to allocate all data at once), but it does
641 * store the subprocess's entire output in memory before returning.
642 *
643 * communicate() uses strings for simplicity.
644 */
645 std::pair<IOBufQueue, IOBufQueue> communicateIOBuf(
646 IOBufQueue input = IOBufQueue());
647
648 std::pair<std::string, std::string> communicate(
649 StringPiece input = StringPiece());
650
651 /**
652 * Communicate with the child until all pipes to/from the child are closed.
653 *
654 * == Semantics ==
655 *
656 * readCallback(pfd, cfd) will be called whenever there's data available
657 * on any pipe *from* the child (PIPE_OUT). pfd is the file descriptor
658 * in the parent (that you use to read from); cfd is the file descriptor
659 * in the child (used for identifying the stream; 1 = child's standard
660 * output, 2 = child's standard error, etc)
661 *
662 * writeCallback(pfd, cfd) will be called whenever a pipe *to* the child is
663 * writable (PIPE_IN). pfd is the file descriptor in the parent (that you
664 * use to write to); cfd is the file descriptor in the child (used for
665 * identifying the stream; 0 = child's standard input, etc)
666 *
667 * The read and write callbacks must read from / write to pfd and return
668 * false during normal operation. Return true to tell communicate() to
669 * close the pipe. For readCallback, this might send SIGPIPE to the
670 * child, or make its writes fail with EPIPE, so you should generally
671 * avoid returning true unless you've reached end-of-file.
672 *
673 * communicate() returns when all pipes to/from the child are closed; the
674 * child might stay alive after that, so you must still wait().
675 * Conversely, the child may quit long before its pipes are closed, since
676 * its descendants can keep them alive forever.
677 *
678 * Most users won't need to use this callback version; the simpler version
679 * of communicate (which buffers data in memory) will probably work fine.
680 *
681 * == Things you must get correct ==
682 *
683 * 1) You MUST consume all data passed to readCallback (or return true to
684 * close the pipe). Similarly, you MUST write to a writable pipe (or
685 * return true to close the pipe). To do otherwise is an error that can
686 * result in a deadlock. You must do this even for pipes you are not
687 * interested in.
688 *
689 * 2) pfd is nonblocking, so be prepared for read() / write() to return -1
690 * and set errno to EAGAIN (in which case you should return false). Use
691 * readNoInt() from FileUtil.h to handle interrupted reads for you.
692 *
693 * 3) Your callbacks MUST NOT call any of the Subprocess methods that
694 * manipulate the pipe FDs. Check the docblocks, but, for example,
695 * neither closeParentFd (return true instead) nor takeOwnershipOfPipes
696 * are safe. Stick to reading/writing from pfd, as appropriate.
697 *
698 * == Good to know ==
699 *
700 * 1) See ReadLinesCallback for an easy way to consume the child's output
701 * streams line-by-line (or tokenized by another delimiter).
702 *
703 * 2) "Wait until the descendants close the pipes" is usually the behavior
704 * you want, since the descendants may have something to say even if the
705 * immediate child is dead. If you need to be able to force-close all
706 * parent FDs, communicate() will NOT work for you. Do it your own way by
707 * using takeOwnershipOfPipes().
708 *
709 * Why not? You can return "true" from your callbacks to sever active
710 * pipes, but inactive ones can remain open indefinitely. It is
711 * impossible to safely close inactive pipes while another thread is
712 * blocked in communicate(). This is BY DESIGN. Racing communicate()'s
713 * read/write callbacks can result in wrong I/O and data corruption. This
714 * class would need internal synchronization and timeouts, a poor and
715 * expensive implementation choice, in order to make closeParentFd()
716 * thread-safe.
717 */
718 using FdCallback = folly::Function<bool(int, int)>;
719 void communicate(FdCallback readCallback, FdCallback writeCallback);
720
721 /**
722 * A readCallback for Subprocess::communicate() that helps you consume
723 * lines (or other delimited pieces) from your subprocess's file
724 * descriptors. Use the readLinesCallback() helper to get template
725 * deduction. For example:
726 *
727 * subprocess.communicate(
728 * Subprocess::readLinesCallback(
729 * [](int fd, folly::StringPiece s) {
730 * std::cout << fd << " said: " << s;
731 * return false; // Keep reading from the child
732 * }
733 * ),
734 * [](int pdf, int cfd){ return true; } // Don't write to the child
735 * );
736 *
737 * If a file line exceeds maxLineLength, your callback will get some
738 * initial chunks of maxLineLength with no trailing delimiters. The final
739 * chunk of a line is delimiter-terminated iff the delimiter was present
740 * in the input. In particular, the last line in a file always lacks a
741 * delimiter -- so if a file ends on a delimiter, the final line is empty.
742 *
743 * Like a regular communicate() callback, your fdLineCb() normally returns
744 * false. It may return true to tell Subprocess to close the underlying
745 * file descriptor. The child process may then receive SIGPIPE or get
746 * EPIPE errors on writes.
747 */
748 template <class Callback>
749 class ReadLinesCallback {
750 private:
751 // Binds an FD to the client-provided FD+line callback
752 struct StreamSplitterCallback {
753 StreamSplitterCallback(Callback& cb, int fd) : cb_(cb), fd_(fd) {}
754 // The return value semantics are inverted vs StreamSplitter
755 bool operator()(StringPiece s) {
756 return !cb_(fd_, s);
757 }
758 Callback& cb_;
759 int fd_;
760 };
761 typedef gen::StreamSplitter<StreamSplitterCallback> LineSplitter;
762
763 public:
764 explicit ReadLinesCallback(
765 Callback&& fdLineCb,
766 uint64_t maxLineLength = 0, // No line length limit by default
767 char delimiter = '\n',
768 uint64_t bufSize = 1024)
769 : fdLineCb_(std::forward<Callback>(fdLineCb)),
770 maxLineLength_(maxLineLength),
771 delimiter_(delimiter),
772 bufSize_(bufSize) {}
773
774 bool operator()(int pfd, int cfd) {
775 // Make a splitter for this cfd if it doesn't already exist
776 auto it = fdToSplitter_.find(cfd);
777 auto& splitter = (it != fdToSplitter_.end())
778 ? it->second
779 : fdToSplitter_
780 .emplace(
781 cfd,
782 LineSplitter(
783 delimiter_,
784 StreamSplitterCallback(fdLineCb_, cfd),
785 maxLineLength_))
786 .first->second;
787 // Read as much as we can from this FD
788 char buf[bufSize_];
789 while (true) {
790 ssize_t ret = readNoInt(pfd, buf, bufSize_);
791 if (ret == -1 && errno == EAGAIN) { // No more data for now
792 return false;
793 }
794 checkUnixError(ret, "read");
795 if (ret == 0) { // Reached end-of-file
796 splitter.flush(); // Ignore return since the file is over anyway
797 return true;
798 }
799 if (!splitter(StringPiece(buf, ret))) {
800 return true; // The callback told us to stop
801 }
802 }
803 }
804
805 private:
806 Callback fdLineCb_;
807 const uint64_t maxLineLength_;
808 const char delimiter_;
809 const uint64_t bufSize_;
810 // We lazily make splitters for all cfds that get used.
811 std::unordered_map<int, LineSplitter> fdToSplitter_;
812 };
813
814 // Helper to enable template deduction
815 template <class Callback>
816 static auto readLinesCallback(
817 Callback&& fdLineCb,
818 uint64_t maxLineLength = 0, // No line length limit by default
819 char delimiter = '\n',
820 uint64_t bufSize = 1024)
821 -> ReadLinesCallback<typename std::decay<Callback>::type> {
822 return ReadLinesCallback<typename std::decay<Callback>::type>(
823 std::forward<Callback>(fdLineCb), maxLineLength, delimiter, bufSize);
824 }
825
826 /**
827 * communicate() callbacks can use this to temporarily enable/disable
828 * notifications (callbacks) for a pipe to/from the child. By default,
829 * all are enabled. Useful for "chatty" communication -- you want to
830 * disable write callbacks until you receive the expected message.
831 *
832 * Disabling a pipe does not free you from the requirement to consume all
833 * incoming data. Failing to do so will easily create deadlock bugs.
834 *
835 * Throws if the childFd is not known.
836 */
837 void enableNotifications(int childFd, bool enabled);
838
839 /**
840 * Are notifications for one pipe to/from child enabled? Throws if the
841 * childFd is not known.
842 */
843 bool notificationsEnabled(int childFd) const;
844
845 ////
846 //// The following methods are meant for the cases when communicate() is
847 //// not suitable. You should not need them when you call communicate(),
848 //// and, in fact, it is INHERENTLY UNSAFE to use closeParentFd() or
849 //// takeOwnershipOfPipes() from a communicate() callback.
850 ////
851
852 /**
853 * Close the parent file descriptor given a file descriptor in the child.
854 * DO NOT USE from communicate() callbacks; make them return true instead.
855 */
856 void closeParentFd(int childFd);
857
858 /**
859 * Set all pipes from / to child to be non-blocking. communicate() does
860 * this for you.
861 */
862 void setAllNonBlocking();
863
864 /**
865 * Get parent file descriptor corresponding to the given file descriptor
866 * in the child. Throws if childFd isn't a pipe (PIPE_IN / PIPE_OUT).
867 * Do not close() the returned file descriptor; use closeParentFd, above.
868 */
869 int parentFd(int childFd) const {
870 return pipes_[findByChildFd(childFd)].pipe.fd();
871 }
872 int stdinFd() const {
873 return parentFd(0);
874 }
875 int stdoutFd() const {
876 return parentFd(1);
877 }
878 int stderrFd() const {
879 return parentFd(2);
880 }
881
882 /**
883 * The child's pipes are logically separate from the process metadata
884 * (they may even be kept alive by the child's descendants). This call
885 * lets you manage the pipes' lifetime separetely from the lifetime of the
886 * child process.
887 *
888 * After this call, the Subprocess instance will have no knowledge of
889 * these pipes, and the caller assumes responsibility for managing their
890 * lifetimes. Pro-tip: prefer to explicitly close() the pipes, since
891 * folly::File would otherwise silently suppress I/O errors.
892 *
893 * No, you may NOT call this from a communicate() callback.
894 */
895 struct ChildPipe {
896 ChildPipe(int fd, folly::File&& ppe) : childFd(fd), pipe(std::move(ppe)) {}
897 int childFd;
898 folly::File pipe; // Owns the parent FD
899 };
900 std::vector<ChildPipe> takeOwnershipOfPipes();
901
902 private:
903 // spawn() sets up a pipe to read errors from the child,
904 // then calls spawnInternal() to do the bulk of the work. Once
905 // spawnInternal() returns it reads the error pipe to see if the child
906 // encountered any errors.
907 void spawn(
908 std::unique_ptr<const char*[]> argv,
909 const char* executable,
910 const Options& options,
911 const std::vector<std::string>* env);
912 void spawnInternal(
913 std::unique_ptr<const char*[]> argv,
914 const char* executable,
915 Options& options,
916 const std::vector<std::string>* env,
917 int errFd);
918
919 // Actions to run in child.
920 // Note that this runs after vfork(), so tread lightly.
921 // Returns 0 on success, or an errno value on failure.
922 int prepareChild(
923 const Options& options,
924 const sigset_t* sigmask,
925 const char* childDir) const;
926 int runChild(
927 const char* executable,
928 char** argv,
929 char** env,
930 const Options& options) const;
931
932 /**
933 * Read from the error pipe, and throw SubprocessSpawnError if the child
934 * failed before calling exec().
935 */
936 void readChildErrorPipe(int pfd, const char* executable);
937
938 // Returns an index into pipes_. Throws std::invalid_argument if not found.
939 size_t findByChildFd(const int childFd) const;
940
941 pid_t pid_{-1};
942 ProcessReturnCode returnCode_;
943
944 /**
945 * Represents a pipe between this process, and the child process (or its
946 * descendant). To interact with these pipes, you can use communicate(),
947 * or use parentFd() and related methods, or separate them from the
948 * Subprocess instance entirely via takeOwnershipOfPipes().
949 */
950 struct Pipe : private boost::totally_ordered<Pipe> {
951 folly::File pipe; // Our end of the pipe, wrapped in a File to auto-close.
952 int childFd = -1; // Identifies the pipe: what FD is this in the child?
953 int direction = PIPE_IN; // one of PIPE_IN / PIPE_OUT
954 bool enabled = true; // Are notifications enabled in communicate()?
955
956 bool operator<(const Pipe& other) const {
957 return childFd < other.childFd;
958 }
959 bool operator==(const Pipe& other) const {
960 return childFd == other.childFd;
961 }
962 };
963
964 // Populated at process start according to fdActions, empty after
965 // takeOwnershipOfPipes(). Sorted by childFd. Can only have elements
966 // erased, but not inserted, after being populated.
967 //
968 // The number of pipes between parent and child is assumed to be small,
969 // so we're happy with a vector here, even if it means linear erase.
970 std::vector<Pipe> pipes_;
971};
972
973} // namespace folly
974