Subprocess.h source code [folly/Subprocess.h]

1	/*
2	* Copyright 2012-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	/**
17	* Subprocess library, modeled after Python's subprocess module
18	* (http://docs.python.org/2/library/subprocess.html)
19	*
20	* This library defines one class (Subprocess) which represents a child
21	* process. Subprocess has two constructors: one that takes a vector<string>
22	* and executes the given executable without using the shell, and one
23	* that takes a string and executes the given command using the shell.
24	* Subprocess allows you to redirect the child's standard input, standard
25	* output, and standard error to/from child descriptors in the parent,
26	* or to create communication pipes between the child and the parent.
27	*
28	* The simplest example is a thread-safe [1] version of the system() library
29	* function:
30	* Subprocess(cmd).wait();
31	* which executes the command using the default shell and waits for it
32	* to complete, returning the exit status.
33	*
34	* A thread-safe [1] version of popen() (type="r", to read from the child):
35	* Subprocess proc(cmd, Subprocess::Options().pipeStdout());
36	* // read from proc.stdoutFd()
37	* proc.wait();
38	*
39	* A thread-safe [1] version of popen() (type="w", to write to the child):
40	* Subprocess proc(cmd, Subprocess::Options().pipeStdin());
41	* // write to proc.stdinFd()
42	* proc.wait();
43	*
44	* If you want to redirect both stdin and stdout to pipes, you can, but note
45	* that you're subject to a variety of deadlocks. You'll want to use
46	* nonblocking I/O, like the callback version of communicate().
47	*
48	* The string or IOBuf-based variants of communicate() are the simplest way
49	* to communicate with a child via its standard input, standard output, and
50	* standard error. They buffer everything in memory, so they are not great
51	* for large amounts of data (or long-running processes), but they are much
52	* simpler than the callback version.
53	*
54	* == A note on thread-safety ==
55	*
56	* [1] "thread-safe" refers ONLY to the fact that Subprocess is very careful
57	* to fork in a way that does not cause grief in multithreaded programs.
58	*
59	* Caveat: If your system does not have the atomic pipe2 system call, it is
60	* not safe to concurrently call Subprocess from different threads.
61	* Therefore, it is best to have a single thread be responsible for spawning
62	* subprocesses.
63	*
64	* A particular instances of Subprocess is emphatically not thread-safe.
65	* If you need to simultaneously communicate via the pipes, and interact
66	* with the Subprocess state, your best bet is to:
67	* - takeOwnershipOfPipes() to separate the pipe I/O from the subprocess.
68	* - Only interact with the Subprocess from one thread at a time.
69	*
70	* The current implementation of communicate() cannot be safely interrupted.
71	* To do so correctly, one would need to use EventFD, or open a dedicated
72	* pipe to be messaged from a different thread -- in particular, kill() will
73	* not do, since a descendant may keep the pipes open indefinitely.
74	*
75	* So, once you call communicate(), you must wait for it to return, and not
76	* touch the pipes from other threads. closeParentFd() is emphatically
77	* unsafe to call concurrently, and even sendSignal() is not a good idea.
78	* You can perhaps give the Subprocess's PID to a different thread before
79	* starting communicate(), and use that PID to send a signal without
80	* accessing the Subprocess object. In that case, you will need a mutex
81	* that ensures you don't wait() before you sent said signal. In a
82	* nutshell, don't do this.
83	*
84	* In fact, signals are inherently concurrency-unsafe on Unix: if you signal
85	* a PID, while another thread is in waitpid(), the signal may fire either
86	* before or after the process is reaped. This means that your signal can,
87	* in pathological circumstances, be delivered to the wrong process (ouch!).
88	* To avoid this, you should only use non-blocking waits (i.e. poll()), and
89	* make sure to serialize your signals (i.e. kill()) with the waits --
90	* either wait & signal from the same thread, or use a mutex.
91	*/
92
93	#pragma once
94
95	#include <signal.h>
96	#include <sys/types.h>
97
98	#if __APPLE__
99	#include <sys/wait.h>
100	#else
101	#include <wait.h>
102	#endif
103
104	#include <exception>
105	#include <string>
106	#include <vector>
107
108	#include <boost/container/flat_map.hpp>
109	#include <boost/operators.hpp>
110
111	#include <folly/Exception.h>
112	#include <folly/File.h>
113	#include <folly/FileUtil.h>
114	#include <folly/Function.h>
115	#include <folly/MapUtil.h>
116	#include <folly/Optional.h>
117	#include <folly/Portability.h>
118	#include <folly/Range.h>
119	#include <folly/gen/String.h>
120	#include <folly/io/IOBufQueue.h>
121	#include <folly/portability/SysResource.h>
122
123	namespace folly {
124
125	/**
126	* Class to wrap a process return code.
127	*/
128	class Subprocess;
129	class ProcessReturnCode {
130	public:
131	enum State {
132	// Subprocess starts in the constructor, so this state designates only
133	// default-initialized or moved-out ProcessReturnCodes.
134	NOT_STARTED,
135	RUNNING,
136	EXITED,
137	KILLED,
138	};
139
140	static ProcessReturnCode makeNotStarted() {
141	return ProcessReturnCode (RV_NOT_STARTED);
142	}
143
144	static ProcessReturnCode makeRunning() {
145	return ProcessReturnCode (RV_RUNNING);
146	}
147
148	static ProcessReturnCode make(int status);
149
150	// Default-initialized for convenience. Subprocess::returnCode() will
151	// never produce this value.
152	ProcessReturnCode() : rawStatus_(RV_NOT_STARTED) {}
153
154	// Trivially copyable
155	ProcessReturnCode(const ProcessReturnCode& p) = default;
156	ProcessReturnCode& operator=(const ProcessReturnCode& p) = default;
157	// Non-default move: In order for Subprocess to be movable, the "moved
158	// out" state must not be "running", or ~Subprocess() will abort.
159	ProcessReturnCode(ProcessReturnCode&& p) noexcept;
160	ProcessReturnCode& operator=(ProcessReturnCode&& p) noexcept;
161
162	/**
163	* Process state. One of:
164	* NOT_STARTED: process hasn't been started successfully
165	* RUNNING: process is currently running
166	* EXITED: process exited (successfully or not)
167	* KILLED: process was killed by a signal.
168	*/
169	State state() const;
170
171	/**
172	* Helper wrappers around state().
173	*/
174	bool notStarted() const {
175	return state() == NOT_STARTED;
176	}
177	bool running() const {
178	return state() == RUNNING;
179	}
180	bool exited() const {
181	return state() == EXITED;
182	}
183	bool killed() const {
184	return state() == KILLED;
185	}
186
187	/**
188	* Exit status. Only valid if state() == EXITED; throws otherwise.
189	*/
190	int exitStatus() const;
191
192	/**
193	* Signal that caused the process's termination. Only valid if
194	* state() == KILLED; throws otherwise.
195	*/
196	int killSignal() const;
197
198	/**
199	* Was a core file generated? Only valid if state() == KILLED; throws
200	* otherwise.
201	*/
202	bool coreDumped() const;
203
204	/**
205	* String representation; one of
206	* "not started"
207	* "running"
208	* "exited with status <status>"
209	* "killed by signal <signal>"
210	* "killed by signal <signal> (core dumped)"
211	*/
212	std::string str() const;
213
214	/**
215	* Helper function to enforce a precondition based on this.
216	* Throws std::logic_error if in an unexpected state.
217	*/
218	void enforce(State state) const;
219
220	private:
221	explicit ProcessReturnCode(int rv) : rawStatus_(rv) {}
222	static constexpr int RV_NOT_STARTED = -`2`;
223	static constexpr int RV_RUNNING = -`1`;
224
225	int rawStatus_;
226	};
227
228	/**
229	* Base exception thrown by the Subprocess methods.
230	*/
231	class FOLLY_EXPORT SubprocessError : public std::runtime_error {
232	public:
233	using std::runtime_error::runtime_error;
234	};
235
236	/**
237	* Exception thrown by *Checked methods of Subprocess.
238	*/
239	class FOLLY_EXPORT CalledProcessError : public SubprocessError {
240	public:
241	explicit CalledProcessError(ProcessReturnCode rc);
242	~CalledProcessError() throw() override = default;
243	ProcessReturnCode returnCode() const {
244	return returnCode_;
245	}
246
247	private:
248	ProcessReturnCode returnCode_;
249	};
250
251	/**
252	* Exception thrown if the subprocess cannot be started.
253	*/
254	class FOLLY_EXPORT SubprocessSpawnError : public SubprocessError {
255	public:
256	SubprocessSpawnError(const char* executable, int errCode, int errnoValue);
257	~SubprocessSpawnError() throw() override = default;
258	int errnoValue() const {
259	return errnoValue_;
260	}
261
262	private:
263	int errnoValue_;
264	};
265
266	/**
267	* Subprocess.
268	*/
269	class Subprocess {
270	public:
271	static const int CLOSE = -`1`;
272	static const int PIPE = -`2`;
273	static const int PIPE_IN = -`3`;
274	static const int PIPE_OUT = -`4`;
275
276	/**
277	* See Subprocess::Options::dangerousPostForkPreExecCallback() for usage.
278	* Every derived class should include the following warning:
279	*
280	* DANGER: This class runs after fork in a child processes. Be fast, the
281	* parent thread is waiting, but remember that other parent threads are
282	* running and may mutate your state. Avoid mutating any data belonging to
283	* the parent. Avoid interacting with non-POD data that originated in the
284	* parent. Avoid any libraries that may internally reference non-POD data.
285	* Especially beware parent mutexes -- for example, glog's LOG() uses one.
286	*/
287	struct DangerousPostForkPreExecCallback {
288	virtual ~DangerousPostForkPreExecCallback() {}
289	// This must return 0 on success, or an `errno` error code.
290	virtual int operator()() = `0`;
291	};
292
293	/**
294	* Class representing various options: file descriptor behavior, and
295	* whether to use $PATH for searching for the executable,
296	*
297	* By default, we don't use $PATH, file descriptors are closed if
298	* the close-on-exec flag is set (fcntl FD_CLOEXEC) and inherited
299	* otherwise.
300	*/
301	class Options {
302	friend class Subprocess;
303
304	public:
305	Options() {} // E.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58328
306
307	/**
308	* Change action for file descriptor fd.
309	*
310	* "action" may be another file descriptor number (dup2()ed before the
311	* child execs), or one of CLOSE, PIPE_IN, and PIPE_OUT.
312	*
313	* CLOSE: close the file descriptor in the child
314	* PIPE_IN: open a pipe from the child
315	* PIPE_OUT: open a pipe to the child
316	*
317	* PIPE is a shortcut; same as PIPE_IN for stdin (fd 0), same as
318	* PIPE_OUT for stdout (fd 1) or stderr (fd 2), and an error for
319	* other file descriptors.
320	*/
321	Options& fd(int fd, int action);
322
323	/**
324	* Shortcut to change the action for standard input.
325	*/
326	Options& stdinFd(int action) {
327	return fd(STDIN_FILENO, action);
328	}
329
330	/**
331	* Shortcut to change the action for standard output.
332	*/
333	Options& stdoutFd(int action) {
334	return fd(STDOUT_FILENO, action);
335	}
336
337	/**
338	* Shortcut to change the action for standard error.
339	* Note that stderr(1) will redirect the standard error to the same
340	* file descriptor as standard output; the equivalent of bash's "2>&1"
341	*/
342	Options& stderrFd(int action) {
343	return fd(STDERR_FILENO, action);
344	}
345
346	Options& pipeStdin() {
347	return fd(STDIN_FILENO, PIPE_IN);
348	}
349	Options& pipeStdout() {
350	return fd(STDOUT_FILENO, PIPE_OUT);
351	}
352	Options& pipeStderr() {
353	return fd(STDERR_FILENO, PIPE_OUT);
354	}
355
356	/**
357	* Close all other fds (other than standard input, output, error,
358	* and file descriptors explicitly specified with fd()).
359	*
360	* This is potentially slow; it's generally a better idea to
361	* set the close-on-exec flag on all file descriptors that shouldn't
362	* be inherited by the child.
363	*
364	* Even with this option set, standard input, output, and error are
365	* not closed; use stdin(CLOSE), stdout(CLOSE), stderr(CLOSE) if you
366	* desire this.
367	*/
368	Options& closeOtherFds() {
369	closeOtherFds_ = true;
370	return *this;
371	}
372
373	/**
374	* Use the search path ($PATH) when searching for the executable.
375	*/
376	Options& usePath() {
377	usePath_ = true;
378	return *this;
379	}
380
381	/**
382	* Change the child's working directory, after the vfork.
383	*/
384	Options& chdir(const std::string& dir) {
385	childDir_ = dir;
386	return *this;
387	}
388
389	#if __linux__
390	/**
391	* Child will receive a signal when the parent exits.
392	*/
393	Options& parentDeathSignal(int sig) {
394	parentDeathSignal_ = sig;
395	return *this;
396	}
397	#endif
398
399	/**
400	* Child will be made a process group leader when it starts. Upside: one
401	* can reliably kill all its non-daemonizing descendants. Downside: the
402	* child will not receive Ctrl-C etc during interactive use.
403	*/
404	Options& processGroupLeader() {
405	processGroupLeader_ = true;
406	return *this;
407	}
408
409	/**
410	* Detach the spawned process, to allow destroying the Subprocess object
411	* without waiting for the child process to finish.
412	*
413	* This causes the code to fork twice before executing the command.
414	* The intermediate child process will exit immediately, causing the process
415	* running the executable to be reparented to init (pid 1).
416	*
417	* Subprocess objects created with detach() enabled will already be in an
418	* "EXITED" state when the constructor returns. The caller should not call
419	* wait() or poll() on the Subprocess, and pid() will return -1.
420	*/
421	Options& detach() {
422	detach_ = true;
423	return *this;
424	}
425
426	/**
427	* * READ THIS WHOLE DOCBLOCK BEFORE USING *
428	*
429	* Run this callback in the child after the fork, just before the
430	* exec(), and after the child's state has been completely set up:
431	* - signal handlers have been reset to default handling and unblocked
432	* - the working directory was set
433	* - closed any file descriptors specified via Options()
434	* - set child process flags (see code)
435	*
436	* This is EXTREMELY DANGEROUS. For example, this innocuous-looking code
437	* can cause a fraction of your Subprocess launches to hang forever:
438	*
439	* LOG(INFO) << "Hello from the child";
440	*
441	* The reason is that glog has an internal mutex. If your fork() happens
442	* when the parent has the mutex locked, the child will wait forever.
443	*
444	* == GUIDELINES ==
445	*
446	* - Be quick -- the parent thread is blocked until you exit.
447	* - Remember that other parent threads are running, and may mutate your
448	* state.
449	* - Avoid mutating any data belonging to the parent.
450	* - Avoid interacting with non-POD data that came from the parent.
451	* - Avoid any libraries that may internally reference non-POD state.
452	* - Especially beware parent mutexes, e.g. LOG() uses a global mutex.
453	* - Avoid invoking the parent's destructors (you can accidentally
454	* delete files, terminate network connections, etc).
455	* - Read http://ewontfix.com/7/
456	*/
457	Options& dangerousPostForkPreExecCallback(
458	DangerousPostForkPreExecCallback* cob) {
459	dangerousPostForkPreExecCallback_ = cob;
460	return *this;
461	}
462
463	#if __linux__
464	/**
465	* This is an experimental feature, it is best you don't use it at this
466	* point of time.
467	* Although folly would support cloning with custom flags in some form, this
468	* API might change in the near future. So use the following assuming it is
469	* experimental. (Apr 11, 2017)
470	*
471	* This unlocks Subprocess to support clone flags, many of them need
472	* CAP_SYS_ADMIN permissions. It might also require you to go through the
473	* implementation to understand what happens before, between and after the
474	* fork-and-exec.
475	*
476	* `man 2 clone` would be a starting point for knowing about the available
477	* flags.
478	*/
479	using clone_flags_t = uint64_t;
480	Options& useCloneWithFlags(clone_flags_t cloneFlags) noexcept {
481	cloneFlags_ = cloneFlags;
482	return *this;
483	}
484	#endif
485
486	private:
487	typedef boost::container::flat_map<int, int> FdMap;
488	FdMap fdActions_;
489	bool closeOtherFds_{false};
490	bool usePath_{false};
491	bool processGroupLeader_{false};
492	bool detach_{false};
493	std::string childDir_; // "" keeps the parent's working directory
494	#if __linux__
495	int parentDeathSignal_{`0`};
496	#endif
497	DangerousPostForkPreExecCallback* dangerousPostForkPreExecCallback_{
498	nullptr};
499	#if __linux__
500	// none means `vfork()` instead of a custom `clone()`
501	// Optional<> is used because value of '0' means do clone without any flags.
502	Optional<clone_flags_t> cloneFlags_;
503	#endif
504	};
505
506	// Non-copiable, but movable
507	Subprocess(const Subprocess&) = delete;
508	Subprocess& operator=(const Subprocess&) = delete;
509	Subprocess(Subprocess&&) = default;
510	Subprocess& operator=(Subprocess&&) = default;
511
512	/**
513	* Create an uninitialized subprocess.
514	*
515	* In this state it can only be destroyed, or assigned to using the move
516	* assignment operator.
517	*/
518	Subprocess();
519
520	/**
521	* Create a subprocess from the given arguments. argv[0] must be listed.
522	* If not-null, executable must be the actual executable
523	* being used (otherwise it's the same as argv[0]).
524	*
525	* If env is not-null, it must contain name=value strings to be used
526	* as the child's environment; otherwise, we inherit the environment
527	* from the parent. env must be null if options.usePath is set.
528	*/
529	explicit Subprocess(
530	const std::vector<std::string>& argv,
531	const Options& options = Options (),
532	const char* executable = nullptr,
533	const std::vector<std::string>* env = nullptr);
534	~Subprocess();
535
536	/**
537	* Create a subprocess run as a shell command (as shell -c 'command')
538	*
539	* The shell to use is taken from the environment variable $SHELL,
540	* or /bin/sh if $SHELL is unset.
541	*/
542	// clang-format off
543	[[deprecated(
544	"Prefer not running in a shell or use `shellify`.")]]
545	explicit Subprocess(
546	const std::string& cmd,
547	const Options& options = Options (),
548	const std::vector<std::string>* env = nullptr);
549	// clang-format on
550
551	////
552	//// The methods below only manipulate the process state, and do not
553	//// affect its communication pipes.
554	////
555
556	/**
557	* Return the child's pid, or -1 if the child wasn't successfully spawned
558	* or has already been wait()ed upon.
559	*/
560	pid_t pid() const;
561
562	/**
563	* Return the child's status (as per wait()) if the process has already
564	* been waited on, -1 if the process is still running, or -2 if the
565	* process hasn't been successfully started. NOTE that this does not call
566	* waitpid() or Subprocess::poll(), but simply returns the status stored
567	* in the Subprocess object.
568	*/
569	ProcessReturnCode returnCode() const {
570	return returnCode_;
571	}
572
573	/**
574	* Poll the child's status and return it. Return the exit status if the
575	* subprocess had quit, or RUNNING otherwise. Throws an std::logic_error
576	* if called on a Subprocess whose status is no longer RUNNING. No other
577	* exceptions are possible. Aborts on egregious violations of contract,
578	* e.g. if you wait for the underlying process without going through this
579	* Subprocess instance.
580	*/
581	ProcessReturnCode poll(struct rusage* ru = nullptr);
582
583	/**
584	* Poll the child's status. If the process is still running, return false.
585	* Otherwise, return true if the process exited with status 0 (success),
586	* or throw CalledProcessError if the process exited with a non-zero status.
587	*/
588	bool pollChecked();
589
590	/**
591	* Wait for the process to terminate and return its status. Like poll(),
592	* the only exception this can throw is std::logic_error if you call this
593	* on a Subprocess whose status is RUNNING. Aborts on egregious
594	* violations of contract, like an out-of-band waitpid(p.pid(), 0, 0).
595	*/
596	ProcessReturnCode wait();
597
598	/**
599	* Wait for the process to terminate, throw if unsuccessful.
600	*/
601	void waitChecked();
602
603	/**
604	* Send a signal to the child. Shortcuts for the commonly used Unix
605	* signals are below.
606	*/
607	void sendSignal(int signal);
608	void terminate() {
609	sendSignal(SIGTERM);
610	}
611	void kill() {
612	sendSignal(SIGKILL);
613	}
614
615	////
616	//// The methods below only affect the process's communication pipes, but
617	//// not its return code or state (they do not poll() or wait()).
618	////
619
620	/**
621	* Communicate with the child until all pipes to/from the child are closed.
622	*
623	* The input buffer is written to the process' stdin pipe, and data is read
624	* from the stdout and stderr pipes. Non-blocking I/O is performed on all
625	* pipes simultaneously to avoid deadlocks.
626	*
627	* The stdin pipe will be closed after the full input buffer has been written.
628	* An error will be thrown if a non-empty input buffer is supplied but stdin
629	* was not configured as a pipe.
630	*
631	* Returns a pair of buffers containing the data read from stdout and stderr.
632	* If stdout or stderr is not a pipe, an empty IOBuf queue will be returned
633	* for the respective buffer.
634	*
635	* Note that communicate() and communicateIOBuf() both return when all
636	* pipes to/from the child are closed; the child might stay alive after
637	* that, so you must still wait().
638	*
639	* communicateIOBuf() uses IOBufQueue for buffering (which has the
640	* advantage that it won't try to allocate all data at once), but it does
641	* store the subprocess's entire output in memory before returning.
642	*
643	* communicate() uses strings for simplicity.
644	*/
645	std::pair<IOBufQueue, IOBufQueue> communicateIOBuf(
646	IOBufQueue input = IOBufQueue ());
647
648	std::pair<std::string, std::string> communicate(
649	StringPiece input = StringPiece ());
650
651	/**
652	* Communicate with the child until all pipes to/from the child are closed.
653	*
654	* == Semantics ==
655	*
656	* readCallback(pfd, cfd) will be called whenever there's data available
657	* on any pipe from the child (PIPE_OUT). pfd is the file descriptor
658	* in the parent (that you use to read from); cfd is the file descriptor
659	* in the child (used for identifying the stream; 1 = child's standard
660	* output, 2 = child's standard error, etc)
661	*
662	* writeCallback(pfd, cfd) will be called whenever a pipe to the child is
663	* writable (PIPE_IN). pfd is the file descriptor in the parent (that you
664	* use to write to); cfd is the file descriptor in the child (used for
665	* identifying the stream; 0 = child's standard input, etc)
666	*
667	* The read and write callbacks must read from / write to pfd and return
668	* false during normal operation. Return true to tell communicate() to
669	* close the pipe. For readCallback, this might send SIGPIPE to the
670	* child, or make its writes fail with EPIPE, so you should generally
671	* avoid returning true unless you've reached end-of-file.
672	*
673	* communicate() returns when all pipes to/from the child are closed; the
674	* child might stay alive after that, so you must still wait().
675	* Conversely, the child may quit long before its pipes are closed, since
676	* its descendants can keep them alive forever.
677	*
678	* Most users won't need to use this callback version; the simpler version
679	* of communicate (which buffers data in memory) will probably work fine.
680	*
681	* == Things you must get correct ==
682	*
683	* 1) You MUST consume all data passed to readCallback (or return true to
684	* close the pipe). Similarly, you MUST write to a writable pipe (or
685	* return true to close the pipe). To do otherwise is an error that can
686	* result in a deadlock. You must do this even for pipes you are not
687	* interested in.
688	*
689	* 2) pfd is nonblocking, so be prepared for read() / write() to return -1
690	* and set errno to EAGAIN (in which case you should return false). Use
691	* readNoInt() from FileUtil.h to handle interrupted reads for you.
692	*
693	* 3) Your callbacks MUST NOT call any of the Subprocess methods that
694	* manipulate the pipe FDs. Check the docblocks, but, for example,
695	* neither closeParentFd (return true instead) nor takeOwnershipOfPipes
696	* are safe. Stick to reading/writing from pfd, as appropriate.
697	*
698	* == Good to know ==
699	*
700	* 1) See ReadLinesCallback for an easy way to consume the child's output
701	* streams line-by-line (or tokenized by another delimiter).
702	*
703	* 2) "Wait until the descendants close the pipes" is usually the behavior
704	* you want, since the descendants may have something to say even if the
705	* immediate child is dead. If you need to be able to force-close all
706	* parent FDs, communicate() will NOT work for you. Do it your own way by
707	* using takeOwnershipOfPipes().
708	*
709	* Why not? You can return "true" from your callbacks to sever active
710	* pipes, but inactive ones can remain open indefinitely. It is
711	* impossible to safely close inactive pipes while another thread is
712	* blocked in communicate(). This is BY DESIGN. Racing communicate()'s
713	* read/write callbacks can result in wrong I/O and data corruption. This
714	* class would need internal synchronization and timeouts, a poor and
715	* expensive implementation choice, in order to make closeParentFd()
716	* thread-safe.
717	*/
718	using FdCallback = folly::Function<bool(int, int)>;
719	void communicate(FdCallback readCallback, FdCallback writeCallback);
720
721	/**
722	* A readCallback for Subprocess::communicate() that helps you consume
723	* lines (or other delimited pieces) from your subprocess's file
724	* descriptors. Use the readLinesCallback() helper to get template
725	* deduction. For example:
726	*
727	* subprocess.communicate(
728	* Subprocess::readLinesCallback(
729	* [](int fd, folly::StringPiece s) {
730	* std::cout << fd << " said: " << s;
731	* return false; // Keep reading from the child
732	* }
733	* ),
734	* [](int pdf, int cfd){ return true; } // Don't write to the child
735	* );
736	*
737	* If a file line exceeds maxLineLength, your callback will get some
738	* initial chunks of maxLineLength with no trailing delimiters. The final
739	* chunk of a line is delimiter-terminated iff the delimiter was present
740	* in the input. In particular, the last line in a file always lacks a
741	* delimiter -- so if a file ends on a delimiter, the final line is empty.
742	*
743	* Like a regular communicate() callback, your fdLineCb() normally returns
744	* false. It may return true to tell Subprocess to close the underlying
745	* file descriptor. The child process may then receive SIGPIPE or get
746	* EPIPE errors on writes.
747	*/
748	template <class Callback>
749	class ReadLinesCallback {
750	private:
751	// Binds an FD to the client-provided FD+line callback
752	struct StreamSplitterCallback {
753	StreamSplitterCallback(Callback& cb, int fd) : cb_(cb), fd_(fd) {}
754	// The return value semantics are inverted vs StreamSplitter
755	bool operator()(StringPiece s) {
756	return !cb_(fd_, s);
757	}
758	Callback& cb_;
759	int fd_;
760	};
761	typedef gen::StreamSplitter<StreamSplitterCallback> LineSplitter;
762
763	public:
764	explicit ReadLinesCallback(
765	Callback&& fdLineCb,
766	uint64_t maxLineLength = `0`, // No line length limit by default
767	char delimiter = `'\n'`,
768	uint64_t bufSize = `1024`)
769	: fdLineCb_(std::forward<Callback>(fdLineCb)),
770	maxLineLength_(maxLineLength),
771	delimiter_(delimiter),
772	bufSize_(bufSize) {}
773
774	bool operator()(int pfd, int cfd) {
775	// Make a splitter for this cfd if it doesn't already exist
776	auto it = fdToSplitter_.find(cfd);
777	auto& splitter = (it != fdToSplitter_.end())
778	? it->second
779	: fdToSplitter_
780	.emplace(
781	cfd,
782	LineSplitter(
783	delimiter_,
784	StreamSplitterCallback(fdLineCb_, cfd),
785	maxLineLength_))
786	.first->second;
787	// Read as much as we can from this FD
788	char buf[bufSize_];
789	while (true) {
790	ssize_t ret = readNoInt(pfd, buf, bufSize_);
791	if (ret == -`1` && errno == EAGAIN) { // No more data for now
792	return false;
793	}
794	checkUnixError(ret, "read");
795	if (ret == `0`) { // Reached end-of-file
796	splitter.flush(); // Ignore return since the file is over anyway
797	return true;
798	}
799	if (!splitter(StringPiece(buf, ret))) {
800	return true; // The callback told us to stop
801	}
802	}
803	}
804
805	private:
806	Callback fdLineCb_;
807	const uint64_t maxLineLength_;
808	const char delimiter_;
809	const uint64_t bufSize_;
810	// We lazily make splitters for all cfds that get used.
811	std::unordered_map<int, LineSplitter> fdToSplitter_;
812	};
813
814	// Helper to enable template deduction
815	template <class Callback>
816	static auto readLinesCallback(
817	Callback&& fdLineCb,
818	uint64_t maxLineLength = `0`, // No line length limit by default
819	char delimiter = `'\n'`,
820	uint64_t bufSize = `1024`)
821	-> ReadLinesCallback<typename std::decay<Callback>::type> {
822	return ReadLinesCallback<typename std::decay<Callback>::type>(
823	std::forward<Callback>(fdLineCb), maxLineLength, delimiter, bufSize);
824	}
825
826	/**
827	* communicate() callbacks can use this to temporarily enable/disable
828	* notifications (callbacks) for a pipe to/from the child. By default,
829	* all are enabled. Useful for "chatty" communication -- you want to
830	* disable write callbacks until you receive the expected message.
831	*
832	* Disabling a pipe does not free you from the requirement to consume all
833	* incoming data. Failing to do so will easily create deadlock bugs.
834	*
835	* Throws if the childFd is not known.
836	*/
837	void enableNotifications(int childFd, bool enabled);
838
839	/**
840	* Are notifications for one pipe to/from child enabled? Throws if the
841	* childFd is not known.
842	*/
843	bool notificationsEnabled(int childFd) const;
844
845	////
846	//// The following methods are meant for the cases when communicate() is
847	//// not suitable. You should not need them when you call communicate(),
848	//// and, in fact, it is INHERENTLY UNSAFE to use closeParentFd() or
849	//// takeOwnershipOfPipes() from a communicate() callback.
850	////
851
852	/**
853	* Close the parent file descriptor given a file descriptor in the child.
854	* DO NOT USE from communicate() callbacks; make them return true instead.
855	*/
856	void closeParentFd(int childFd);
857
858	/**
859	* Set all pipes from / to child to be non-blocking. communicate() does
860	* this for you.
861	*/
862	void setAllNonBlocking();
863
864	/**
865	* Get parent file descriptor corresponding to the given file descriptor
866	* in the child. Throws if childFd isn't a pipe (PIPE_IN / PIPE_OUT).
867	* Do not close() the returned file descriptor; use closeParentFd, above.
868	*/
869	int parentFd(int childFd) const {
870	return pipes_[findByChildFd(childFd)].pipe.fd();
871	}
872	int stdinFd() const {
873	return parentFd(`0`);
874	}
875	int stdoutFd() const {
876	return parentFd(`1`);
877	}
878	int stderrFd() const {
879	return parentFd(`2`);
880	}
881
882	/**
883	* The child's pipes are logically separate from the process metadata
884	* (they may even be kept alive by the child's descendants). This call
885	* lets you manage the pipes' lifetime separetely from the lifetime of the
886	* child process.
887	*
888	* After this call, the Subprocess instance will have no knowledge of
889	* these pipes, and the caller assumes responsibility for managing their
890	* lifetimes. Pro-tip: prefer to explicitly close() the pipes, since
891	* folly::File would otherwise silently suppress I/O errors.
892	*
893	* No, you may NOT call this from a communicate() callback.
894	*/
895	struct ChildPipe {
896	ChildPipe(int fd, folly::File&& ppe) : childFd(fd), pipe (std::move(ppe)) {}
897	int childFd;
898	folly::File pipe; // Owns the parent FD
899	};
900	std::vector<ChildPipe> takeOwnershipOfPipes();
901
902	private:
903	// spawn() sets up a pipe to read errors from the child,
904	// then calls spawnInternal() to do the bulk of the work. Once
905	// spawnInternal() returns it reads the error pipe to see if the child
906	// encountered any errors.
907	void spawn(
908	std::unique_ptr<const char*[]> argv,
909	const char* executable,
910	const Options& options,
911	const std::vector<std::string>* env);
912	void spawnInternal(
913	std::unique_ptr<const char*[]> argv,
914	const char* executable,
915	Options& options,
916	const std::vector<std::string>* env,
917	int errFd);
918
919	// Actions to run in child.
920	// Note that this runs after vfork(), so tread lightly.
921	// Returns 0 on success, or an errno value on failure.
922	int prepareChild(
923	const Options& options,
924	const sigset_t* sigmask,
925	const char* childDir) const;
926	int runChild(
927	const char* executable,
928	char** argv,
929	char** env,
930	const Options& options) const;
931
932	/**
933	* Read from the error pipe, and throw SubprocessSpawnError if the child
934	* failed before calling exec().
935	*/
936	void readChildErrorPipe(int pfd, const char* executable);
937
938	// Returns an index into pipes_. Throws std::invalid_argument if not found.
939	size_t findByChildFd(const int childFd) const;
940
941	pid_t pid_{-`1`};
942	ProcessReturnCode returnCode_;
943
944	/**
945	* Represents a pipe between this process, and the child process (or its
946	* descendant). To interact with these pipes, you can use communicate(),
947	* or use parentFd() and related methods, or separate them from the
948	* Subprocess instance entirely via takeOwnershipOfPipes().
949	*/
950	struct Pipe : private boost::totally_ordered<Pipe> {
951	folly::File pipe; // Our end of the pipe, wrapped in a File to auto-close.
952	int childFd = -`1`; // Identifies the pipe: what FD is this in the child?
953	int direction = PIPE_IN; // one of PIPE_IN / PIPE_OUT
954	bool enabled = true; // Are notifications enabled in communicate()?
955
956	bool operator<(const Pipe& other) const {
957	return childFd < other.childFd;
958	}
959	bool operator==(const Pipe& other) const {
960	return childFd == other.childFd;
961	}
962	};
963
964	// Populated at process start according to fdActions, empty after
965	// takeOwnershipOfPipes(). Sorted by childFd. Can only have elements
966	// erased, but not inserted, after being populated.
967	//
968	// The number of pipes between parent and child is assumed to be small,
969	// so we're happy with a vector here, even if it means linear erase.
970	std::vector<Pipe> pipes_;
971	};
972
973	} // namespace folly
974

Browse the source code of folly/Subprocess.h