1 | /* |
2 | * Copyright 2012-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | /** |
17 | * Subprocess library, modeled after Python's subprocess module |
18 | * (http://docs.python.org/2/library/subprocess.html) |
19 | * |
20 | * This library defines one class (Subprocess) which represents a child |
21 | * process. Subprocess has two constructors: one that takes a vector<string> |
22 | * and executes the given executable without using the shell, and one |
23 | * that takes a string and executes the given command using the shell. |
24 | * Subprocess allows you to redirect the child's standard input, standard |
25 | * output, and standard error to/from child descriptors in the parent, |
26 | * or to create communication pipes between the child and the parent. |
27 | * |
28 | * The simplest example is a thread-safe [1] version of the system() library |
29 | * function: |
30 | * Subprocess(cmd).wait(); |
31 | * which executes the command using the default shell and waits for it |
32 | * to complete, returning the exit status. |
33 | * |
34 | * A thread-safe [1] version of popen() (type="r", to read from the child): |
35 | * Subprocess proc(cmd, Subprocess::Options().pipeStdout()); |
36 | * // read from proc.stdoutFd() |
37 | * proc.wait(); |
38 | * |
39 | * A thread-safe [1] version of popen() (type="w", to write to the child): |
40 | * Subprocess proc(cmd, Subprocess::Options().pipeStdin()); |
41 | * // write to proc.stdinFd() |
42 | * proc.wait(); |
43 | * |
44 | * If you want to redirect both stdin and stdout to pipes, you can, but note |
45 | * that you're subject to a variety of deadlocks. You'll want to use |
46 | * nonblocking I/O, like the callback version of communicate(). |
47 | * |
48 | * The string or IOBuf-based variants of communicate() are the simplest way |
49 | * to communicate with a child via its standard input, standard output, and |
50 | * standard error. They buffer everything in memory, so they are not great |
51 | * for large amounts of data (or long-running processes), but they are much |
52 | * simpler than the callback version. |
53 | * |
54 | * == A note on thread-safety == |
55 | * |
56 | * [1] "thread-safe" refers ONLY to the fact that Subprocess is very careful |
57 | * to fork in a way that does not cause grief in multithreaded programs. |
58 | * |
59 | * Caveat: If your system does not have the atomic pipe2 system call, it is |
60 | * not safe to concurrently call Subprocess from different threads. |
61 | * Therefore, it is best to have a single thread be responsible for spawning |
62 | * subprocesses. |
63 | * |
64 | * A particular instances of Subprocess is emphatically **not** thread-safe. |
65 | * If you need to simultaneously communicate via the pipes, and interact |
66 | * with the Subprocess state, your best bet is to: |
67 | * - takeOwnershipOfPipes() to separate the pipe I/O from the subprocess. |
68 | * - Only interact with the Subprocess from one thread at a time. |
69 | * |
70 | * The current implementation of communicate() cannot be safely interrupted. |
71 | * To do so correctly, one would need to use EventFD, or open a dedicated |
72 | * pipe to be messaged from a different thread -- in particular, kill() will |
73 | * not do, since a descendant may keep the pipes open indefinitely. |
74 | * |
75 | * So, once you call communicate(), you must wait for it to return, and not |
76 | * touch the pipes from other threads. closeParentFd() is emphatically |
77 | * unsafe to call concurrently, and even sendSignal() is not a good idea. |
78 | * You can perhaps give the Subprocess's PID to a different thread before |
79 | * starting communicate(), and use that PID to send a signal without |
80 | * accessing the Subprocess object. In that case, you will need a mutex |
81 | * that ensures you don't wait() before you sent said signal. In a |
82 | * nutshell, don't do this. |
83 | * |
84 | * In fact, signals are inherently concurrency-unsafe on Unix: if you signal |
85 | * a PID, while another thread is in waitpid(), the signal may fire either |
86 | * before or after the process is reaped. This means that your signal can, |
87 | * in pathological circumstances, be delivered to the wrong process (ouch!). |
88 | * To avoid this, you should only use non-blocking waits (i.e. poll()), and |
89 | * make sure to serialize your signals (i.e. kill()) with the waits -- |
90 | * either wait & signal from the same thread, or use a mutex. |
91 | */ |
92 | |
93 | #pragma once |
94 | |
95 | #include <signal.h> |
96 | #include <sys/types.h> |
97 | |
98 | #if __APPLE__ |
99 | #include <sys/wait.h> |
100 | #else |
101 | #include <wait.h> |
102 | #endif |
103 | |
104 | #include <exception> |
105 | #include <string> |
106 | #include <vector> |
107 | |
108 | #include <boost/container/flat_map.hpp> |
109 | #include <boost/operators.hpp> |
110 | |
111 | #include <folly/Exception.h> |
112 | #include <folly/File.h> |
113 | #include <folly/FileUtil.h> |
114 | #include <folly/Function.h> |
115 | #include <folly/MapUtil.h> |
116 | #include <folly/Optional.h> |
117 | #include <folly/Portability.h> |
118 | #include <folly/Range.h> |
119 | #include <folly/gen/String.h> |
120 | #include <folly/io/IOBufQueue.h> |
121 | #include <folly/portability/SysResource.h> |
122 | |
123 | namespace folly { |
124 | |
125 | /** |
126 | * Class to wrap a process return code. |
127 | */ |
128 | class Subprocess; |
129 | class ProcessReturnCode { |
130 | public: |
131 | enum State { |
132 | // Subprocess starts in the constructor, so this state designates only |
133 | // default-initialized or moved-out ProcessReturnCodes. |
134 | NOT_STARTED, |
135 | RUNNING, |
136 | EXITED, |
137 | KILLED, |
138 | }; |
139 | |
140 | static ProcessReturnCode makeNotStarted() { |
141 | return ProcessReturnCode(RV_NOT_STARTED); |
142 | } |
143 | |
144 | static ProcessReturnCode makeRunning() { |
145 | return ProcessReturnCode(RV_RUNNING); |
146 | } |
147 | |
148 | static ProcessReturnCode make(int status); |
149 | |
150 | // Default-initialized for convenience. Subprocess::returnCode() will |
151 | // never produce this value. |
152 | ProcessReturnCode() : rawStatus_(RV_NOT_STARTED) {} |
153 | |
154 | // Trivially copyable |
155 | ProcessReturnCode(const ProcessReturnCode& p) = default; |
156 | ProcessReturnCode& operator=(const ProcessReturnCode& p) = default; |
157 | // Non-default move: In order for Subprocess to be movable, the "moved |
158 | // out" state must not be "running", or ~Subprocess() will abort. |
159 | ProcessReturnCode(ProcessReturnCode&& p) noexcept; |
160 | ProcessReturnCode& operator=(ProcessReturnCode&& p) noexcept; |
161 | |
162 | /** |
163 | * Process state. One of: |
164 | * NOT_STARTED: process hasn't been started successfully |
165 | * RUNNING: process is currently running |
166 | * EXITED: process exited (successfully or not) |
167 | * KILLED: process was killed by a signal. |
168 | */ |
169 | State state() const; |
170 | |
171 | /** |
172 | * Helper wrappers around state(). |
173 | */ |
174 | bool notStarted() const { |
175 | return state() == NOT_STARTED; |
176 | } |
177 | bool running() const { |
178 | return state() == RUNNING; |
179 | } |
180 | bool exited() const { |
181 | return state() == EXITED; |
182 | } |
183 | bool killed() const { |
184 | return state() == KILLED; |
185 | } |
186 | |
187 | /** |
188 | * Exit status. Only valid if state() == EXITED; throws otherwise. |
189 | */ |
190 | int exitStatus() const; |
191 | |
192 | /** |
193 | * Signal that caused the process's termination. Only valid if |
194 | * state() == KILLED; throws otherwise. |
195 | */ |
196 | int killSignal() const; |
197 | |
198 | /** |
199 | * Was a core file generated? Only valid if state() == KILLED; throws |
200 | * otherwise. |
201 | */ |
202 | bool coreDumped() const; |
203 | |
204 | /** |
205 | * String representation; one of |
206 | * "not started" |
207 | * "running" |
208 | * "exited with status <status>" |
209 | * "killed by signal <signal>" |
210 | * "killed by signal <signal> (core dumped)" |
211 | */ |
212 | std::string str() const; |
213 | |
214 | /** |
215 | * Helper function to enforce a precondition based on this. |
216 | * Throws std::logic_error if in an unexpected state. |
217 | */ |
218 | void enforce(State state) const; |
219 | |
220 | private: |
221 | explicit ProcessReturnCode(int rv) : rawStatus_(rv) {} |
222 | static constexpr int RV_NOT_STARTED = -2; |
223 | static constexpr int RV_RUNNING = -1; |
224 | |
225 | int rawStatus_; |
226 | }; |
227 | |
228 | /** |
229 | * Base exception thrown by the Subprocess methods. |
230 | */ |
231 | class FOLLY_EXPORT SubprocessError : public std::runtime_error { |
232 | public: |
233 | using std::runtime_error::runtime_error; |
234 | }; |
235 | |
236 | /** |
237 | * Exception thrown by *Checked methods of Subprocess. |
238 | */ |
239 | class FOLLY_EXPORT CalledProcessError : public SubprocessError { |
240 | public: |
241 | explicit CalledProcessError(ProcessReturnCode rc); |
242 | ~CalledProcessError() throw() override = default; |
243 | ProcessReturnCode returnCode() const { |
244 | return returnCode_; |
245 | } |
246 | |
247 | private: |
248 | ProcessReturnCode returnCode_; |
249 | }; |
250 | |
251 | /** |
252 | * Exception thrown if the subprocess cannot be started. |
253 | */ |
254 | class FOLLY_EXPORT SubprocessSpawnError : public SubprocessError { |
255 | public: |
256 | SubprocessSpawnError(const char* executable, int errCode, int errnoValue); |
257 | ~SubprocessSpawnError() throw() override = default; |
258 | int errnoValue() const { |
259 | return errnoValue_; |
260 | } |
261 | |
262 | private: |
263 | int errnoValue_; |
264 | }; |
265 | |
266 | /** |
267 | * Subprocess. |
268 | */ |
269 | class Subprocess { |
270 | public: |
271 | static const int CLOSE = -1; |
272 | static const int PIPE = -2; |
273 | static const int PIPE_IN = -3; |
274 | static const int PIPE_OUT = -4; |
275 | |
276 | /** |
277 | * See Subprocess::Options::dangerousPostForkPreExecCallback() for usage. |
278 | * Every derived class should include the following warning: |
279 | * |
280 | * DANGER: This class runs after fork in a child processes. Be fast, the |
281 | * parent thread is waiting, but remember that other parent threads are |
282 | * running and may mutate your state. Avoid mutating any data belonging to |
283 | * the parent. Avoid interacting with non-POD data that originated in the |
284 | * parent. Avoid any libraries that may internally reference non-POD data. |
285 | * Especially beware parent mutexes -- for example, glog's LOG() uses one. |
286 | */ |
287 | struct DangerousPostForkPreExecCallback { |
288 | virtual ~DangerousPostForkPreExecCallback() {} |
289 | // This must return 0 on success, or an `errno` error code. |
290 | virtual int operator()() = 0; |
291 | }; |
292 | |
293 | /** |
294 | * Class representing various options: file descriptor behavior, and |
295 | * whether to use $PATH for searching for the executable, |
296 | * |
297 | * By default, we don't use $PATH, file descriptors are closed if |
298 | * the close-on-exec flag is set (fcntl FD_CLOEXEC) and inherited |
299 | * otherwise. |
300 | */ |
301 | class Options { |
302 | friend class Subprocess; |
303 | |
304 | public: |
305 | Options() {} // E.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58328 |
306 | |
307 | /** |
308 | * Change action for file descriptor fd. |
309 | * |
310 | * "action" may be another file descriptor number (dup2()ed before the |
311 | * child execs), or one of CLOSE, PIPE_IN, and PIPE_OUT. |
312 | * |
313 | * CLOSE: close the file descriptor in the child |
314 | * PIPE_IN: open a pipe *from* the child |
315 | * PIPE_OUT: open a pipe *to* the child |
316 | * |
317 | * PIPE is a shortcut; same as PIPE_IN for stdin (fd 0), same as |
318 | * PIPE_OUT for stdout (fd 1) or stderr (fd 2), and an error for |
319 | * other file descriptors. |
320 | */ |
321 | Options& fd(int fd, int action); |
322 | |
323 | /** |
324 | * Shortcut to change the action for standard input. |
325 | */ |
326 | Options& stdinFd(int action) { |
327 | return fd(STDIN_FILENO, action); |
328 | } |
329 | |
330 | /** |
331 | * Shortcut to change the action for standard output. |
332 | */ |
333 | Options& stdoutFd(int action) { |
334 | return fd(STDOUT_FILENO, action); |
335 | } |
336 | |
337 | /** |
338 | * Shortcut to change the action for standard error. |
339 | * Note that stderr(1) will redirect the standard error to the same |
340 | * file descriptor as standard output; the equivalent of bash's "2>&1" |
341 | */ |
342 | Options& stderrFd(int action) { |
343 | return fd(STDERR_FILENO, action); |
344 | } |
345 | |
346 | Options& pipeStdin() { |
347 | return fd(STDIN_FILENO, PIPE_IN); |
348 | } |
349 | Options& pipeStdout() { |
350 | return fd(STDOUT_FILENO, PIPE_OUT); |
351 | } |
352 | Options& pipeStderr() { |
353 | return fd(STDERR_FILENO, PIPE_OUT); |
354 | } |
355 | |
356 | /** |
357 | * Close all other fds (other than standard input, output, error, |
358 | * and file descriptors explicitly specified with fd()). |
359 | * |
360 | * This is potentially slow; it's generally a better idea to |
361 | * set the close-on-exec flag on all file descriptors that shouldn't |
362 | * be inherited by the child. |
363 | * |
364 | * Even with this option set, standard input, output, and error are |
365 | * not closed; use stdin(CLOSE), stdout(CLOSE), stderr(CLOSE) if you |
366 | * desire this. |
367 | */ |
368 | Options& closeOtherFds() { |
369 | closeOtherFds_ = true; |
370 | return *this; |
371 | } |
372 | |
373 | /** |
374 | * Use the search path ($PATH) when searching for the executable. |
375 | */ |
376 | Options& usePath() { |
377 | usePath_ = true; |
378 | return *this; |
379 | } |
380 | |
381 | /** |
382 | * Change the child's working directory, after the vfork. |
383 | */ |
384 | Options& chdir(const std::string& dir) { |
385 | childDir_ = dir; |
386 | return *this; |
387 | } |
388 | |
389 | #if __linux__ |
390 | /** |
391 | * Child will receive a signal when the parent exits. |
392 | */ |
393 | Options& parentDeathSignal(int sig) { |
394 | parentDeathSignal_ = sig; |
395 | return *this; |
396 | } |
397 | #endif |
398 | |
399 | /** |
400 | * Child will be made a process group leader when it starts. Upside: one |
401 | * can reliably kill all its non-daemonizing descendants. Downside: the |
402 | * child will not receive Ctrl-C etc during interactive use. |
403 | */ |
404 | Options& processGroupLeader() { |
405 | processGroupLeader_ = true; |
406 | return *this; |
407 | } |
408 | |
409 | /** |
410 | * Detach the spawned process, to allow destroying the Subprocess object |
411 | * without waiting for the child process to finish. |
412 | * |
413 | * This causes the code to fork twice before executing the command. |
414 | * The intermediate child process will exit immediately, causing the process |
415 | * running the executable to be reparented to init (pid 1). |
416 | * |
417 | * Subprocess objects created with detach() enabled will already be in an |
418 | * "EXITED" state when the constructor returns. The caller should not call |
419 | * wait() or poll() on the Subprocess, and pid() will return -1. |
420 | */ |
421 | Options& detach() { |
422 | detach_ = true; |
423 | return *this; |
424 | } |
425 | |
426 | /** |
427 | * *** READ THIS WHOLE DOCBLOCK BEFORE USING *** |
428 | * |
429 | * Run this callback in the child after the fork, just before the |
430 | * exec(), and after the child's state has been completely set up: |
431 | * - signal handlers have been reset to default handling and unblocked |
432 | * - the working directory was set |
433 | * - closed any file descriptors specified via Options() |
434 | * - set child process flags (see code) |
435 | * |
436 | * This is EXTREMELY DANGEROUS. For example, this innocuous-looking code |
437 | * can cause a fraction of your Subprocess launches to hang forever: |
438 | * |
439 | * LOG(INFO) << "Hello from the child"; |
440 | * |
441 | * The reason is that glog has an internal mutex. If your fork() happens |
442 | * when the parent has the mutex locked, the child will wait forever. |
443 | * |
444 | * == GUIDELINES == |
445 | * |
446 | * - Be quick -- the parent thread is blocked until you exit. |
447 | * - Remember that other parent threads are running, and may mutate your |
448 | * state. |
449 | * - Avoid mutating any data belonging to the parent. |
450 | * - Avoid interacting with non-POD data that came from the parent. |
451 | * - Avoid any libraries that may internally reference non-POD state. |
452 | * - Especially beware parent mutexes, e.g. LOG() uses a global mutex. |
453 | * - Avoid invoking the parent's destructors (you can accidentally |
454 | * delete files, terminate network connections, etc). |
455 | * - Read http://ewontfix.com/7/ |
456 | */ |
457 | Options& dangerousPostForkPreExecCallback( |
458 | DangerousPostForkPreExecCallback* cob) { |
459 | dangerousPostForkPreExecCallback_ = cob; |
460 | return *this; |
461 | } |
462 | |
463 | #if __linux__ |
464 | /** |
465 | * This is an experimental feature, it is best you don't use it at this |
466 | * point of time. |
467 | * Although folly would support cloning with custom flags in some form, this |
468 | * API might change in the near future. So use the following assuming it is |
469 | * experimental. (Apr 11, 2017) |
470 | * |
471 | * This unlocks Subprocess to support clone flags, many of them need |
472 | * CAP_SYS_ADMIN permissions. It might also require you to go through the |
473 | * implementation to understand what happens before, between and after the |
474 | * fork-and-exec. |
475 | * |
476 | * `man 2 clone` would be a starting point for knowing about the available |
477 | * flags. |
478 | */ |
479 | using clone_flags_t = uint64_t; |
480 | Options& useCloneWithFlags(clone_flags_t cloneFlags) noexcept { |
481 | cloneFlags_ = cloneFlags; |
482 | return *this; |
483 | } |
484 | #endif |
485 | |
486 | private: |
487 | typedef boost::container::flat_map<int, int> FdMap; |
488 | FdMap fdActions_; |
489 | bool closeOtherFds_{false}; |
490 | bool usePath_{false}; |
491 | bool processGroupLeader_{false}; |
492 | bool detach_{false}; |
493 | std::string childDir_; // "" keeps the parent's working directory |
494 | #if __linux__ |
495 | int parentDeathSignal_{0}; |
496 | #endif |
497 | DangerousPostForkPreExecCallback* dangerousPostForkPreExecCallback_{ |
498 | nullptr}; |
499 | #if __linux__ |
500 | // none means `vfork()` instead of a custom `clone()` |
501 | // Optional<> is used because value of '0' means do clone without any flags. |
502 | Optional<clone_flags_t> cloneFlags_; |
503 | #endif |
504 | }; |
505 | |
506 | // Non-copiable, but movable |
507 | Subprocess(const Subprocess&) = delete; |
508 | Subprocess& operator=(const Subprocess&) = delete; |
509 | Subprocess(Subprocess&&) = default; |
510 | Subprocess& operator=(Subprocess&&) = default; |
511 | |
512 | /** |
513 | * Create an uninitialized subprocess. |
514 | * |
515 | * In this state it can only be destroyed, or assigned to using the move |
516 | * assignment operator. |
517 | */ |
518 | Subprocess(); |
519 | |
520 | /** |
521 | * Create a subprocess from the given arguments. argv[0] must be listed. |
522 | * If not-null, executable must be the actual executable |
523 | * being used (otherwise it's the same as argv[0]). |
524 | * |
525 | * If env is not-null, it must contain name=value strings to be used |
526 | * as the child's environment; otherwise, we inherit the environment |
527 | * from the parent. env must be null if options.usePath is set. |
528 | */ |
529 | explicit Subprocess( |
530 | const std::vector<std::string>& argv, |
531 | const Options& options = Options(), |
532 | const char* executable = nullptr, |
533 | const std::vector<std::string>* env = nullptr); |
534 | ~Subprocess(); |
535 | |
536 | /** |
537 | * Create a subprocess run as a shell command (as shell -c 'command') |
538 | * |
539 | * The shell to use is taken from the environment variable $SHELL, |
540 | * or /bin/sh if $SHELL is unset. |
541 | */ |
542 | // clang-format off |
543 | [[deprecated( |
544 | "Prefer not running in a shell or use `shellify`." )]] |
545 | explicit Subprocess( |
546 | const std::string& cmd, |
547 | const Options& options = Options(), |
548 | const std::vector<std::string>* env = nullptr); |
549 | // clang-format on |
550 | |
551 | //// |
552 | //// The methods below only manipulate the process state, and do not |
553 | //// affect its communication pipes. |
554 | //// |
555 | |
556 | /** |
557 | * Return the child's pid, or -1 if the child wasn't successfully spawned |
558 | * or has already been wait()ed upon. |
559 | */ |
560 | pid_t pid() const; |
561 | |
562 | /** |
563 | * Return the child's status (as per wait()) if the process has already |
564 | * been waited on, -1 if the process is still running, or -2 if the |
565 | * process hasn't been successfully started. NOTE that this does not call |
566 | * waitpid() or Subprocess::poll(), but simply returns the status stored |
567 | * in the Subprocess object. |
568 | */ |
569 | ProcessReturnCode returnCode() const { |
570 | return returnCode_; |
571 | } |
572 | |
573 | /** |
574 | * Poll the child's status and return it. Return the exit status if the |
575 | * subprocess had quit, or RUNNING otherwise. Throws an std::logic_error |
576 | * if called on a Subprocess whose status is no longer RUNNING. No other |
577 | * exceptions are possible. Aborts on egregious violations of contract, |
578 | * e.g. if you wait for the underlying process without going through this |
579 | * Subprocess instance. |
580 | */ |
581 | ProcessReturnCode poll(struct rusage* ru = nullptr); |
582 | |
583 | /** |
584 | * Poll the child's status. If the process is still running, return false. |
585 | * Otherwise, return true if the process exited with status 0 (success), |
586 | * or throw CalledProcessError if the process exited with a non-zero status. |
587 | */ |
588 | bool pollChecked(); |
589 | |
590 | /** |
591 | * Wait for the process to terminate and return its status. Like poll(), |
592 | * the only exception this can throw is std::logic_error if you call this |
593 | * on a Subprocess whose status is RUNNING. Aborts on egregious |
594 | * violations of contract, like an out-of-band waitpid(p.pid(), 0, 0). |
595 | */ |
596 | ProcessReturnCode wait(); |
597 | |
598 | /** |
599 | * Wait for the process to terminate, throw if unsuccessful. |
600 | */ |
601 | void waitChecked(); |
602 | |
603 | /** |
604 | * Send a signal to the child. Shortcuts for the commonly used Unix |
605 | * signals are below. |
606 | */ |
607 | void sendSignal(int signal); |
608 | void terminate() { |
609 | sendSignal(SIGTERM); |
610 | } |
611 | void kill() { |
612 | sendSignal(SIGKILL); |
613 | } |
614 | |
615 | //// |
616 | //// The methods below only affect the process's communication pipes, but |
617 | //// not its return code or state (they do not poll() or wait()). |
618 | //// |
619 | |
620 | /** |
621 | * Communicate with the child until all pipes to/from the child are closed. |
622 | * |
623 | * The input buffer is written to the process' stdin pipe, and data is read |
624 | * from the stdout and stderr pipes. Non-blocking I/O is performed on all |
625 | * pipes simultaneously to avoid deadlocks. |
626 | * |
627 | * The stdin pipe will be closed after the full input buffer has been written. |
628 | * An error will be thrown if a non-empty input buffer is supplied but stdin |
629 | * was not configured as a pipe. |
630 | * |
631 | * Returns a pair of buffers containing the data read from stdout and stderr. |
632 | * If stdout or stderr is not a pipe, an empty IOBuf queue will be returned |
633 | * for the respective buffer. |
634 | * |
635 | * Note that communicate() and communicateIOBuf() both return when all |
636 | * pipes to/from the child are closed; the child might stay alive after |
637 | * that, so you must still wait(). |
638 | * |
639 | * communicateIOBuf() uses IOBufQueue for buffering (which has the |
640 | * advantage that it won't try to allocate all data at once), but it does |
641 | * store the subprocess's entire output in memory before returning. |
642 | * |
643 | * communicate() uses strings for simplicity. |
644 | */ |
645 | std::pair<IOBufQueue, IOBufQueue> communicateIOBuf( |
646 | IOBufQueue input = IOBufQueue()); |
647 | |
648 | std::pair<std::string, std::string> communicate( |
649 | StringPiece input = StringPiece()); |
650 | |
651 | /** |
652 | * Communicate with the child until all pipes to/from the child are closed. |
653 | * |
654 | * == Semantics == |
655 | * |
656 | * readCallback(pfd, cfd) will be called whenever there's data available |
657 | * on any pipe *from* the child (PIPE_OUT). pfd is the file descriptor |
658 | * in the parent (that you use to read from); cfd is the file descriptor |
659 | * in the child (used for identifying the stream; 1 = child's standard |
660 | * output, 2 = child's standard error, etc) |
661 | * |
662 | * writeCallback(pfd, cfd) will be called whenever a pipe *to* the child is |
663 | * writable (PIPE_IN). pfd is the file descriptor in the parent (that you |
664 | * use to write to); cfd is the file descriptor in the child (used for |
665 | * identifying the stream; 0 = child's standard input, etc) |
666 | * |
667 | * The read and write callbacks must read from / write to pfd and return |
668 | * false during normal operation. Return true to tell communicate() to |
669 | * close the pipe. For readCallback, this might send SIGPIPE to the |
670 | * child, or make its writes fail with EPIPE, so you should generally |
671 | * avoid returning true unless you've reached end-of-file. |
672 | * |
673 | * communicate() returns when all pipes to/from the child are closed; the |
674 | * child might stay alive after that, so you must still wait(). |
675 | * Conversely, the child may quit long before its pipes are closed, since |
676 | * its descendants can keep them alive forever. |
677 | * |
678 | * Most users won't need to use this callback version; the simpler version |
679 | * of communicate (which buffers data in memory) will probably work fine. |
680 | * |
681 | * == Things you must get correct == |
682 | * |
683 | * 1) You MUST consume all data passed to readCallback (or return true to |
684 | * close the pipe). Similarly, you MUST write to a writable pipe (or |
685 | * return true to close the pipe). To do otherwise is an error that can |
686 | * result in a deadlock. You must do this even for pipes you are not |
687 | * interested in. |
688 | * |
689 | * 2) pfd is nonblocking, so be prepared for read() / write() to return -1 |
690 | * and set errno to EAGAIN (in which case you should return false). Use |
691 | * readNoInt() from FileUtil.h to handle interrupted reads for you. |
692 | * |
693 | * 3) Your callbacks MUST NOT call any of the Subprocess methods that |
694 | * manipulate the pipe FDs. Check the docblocks, but, for example, |
695 | * neither closeParentFd (return true instead) nor takeOwnershipOfPipes |
696 | * are safe. Stick to reading/writing from pfd, as appropriate. |
697 | * |
698 | * == Good to know == |
699 | * |
700 | * 1) See ReadLinesCallback for an easy way to consume the child's output |
701 | * streams line-by-line (or tokenized by another delimiter). |
702 | * |
703 | * 2) "Wait until the descendants close the pipes" is usually the behavior |
704 | * you want, since the descendants may have something to say even if the |
705 | * immediate child is dead. If you need to be able to force-close all |
706 | * parent FDs, communicate() will NOT work for you. Do it your own way by |
707 | * using takeOwnershipOfPipes(). |
708 | * |
709 | * Why not? You can return "true" from your callbacks to sever active |
710 | * pipes, but inactive ones can remain open indefinitely. It is |
711 | * impossible to safely close inactive pipes while another thread is |
712 | * blocked in communicate(). This is BY DESIGN. Racing communicate()'s |
713 | * read/write callbacks can result in wrong I/O and data corruption. This |
714 | * class would need internal synchronization and timeouts, a poor and |
715 | * expensive implementation choice, in order to make closeParentFd() |
716 | * thread-safe. |
717 | */ |
718 | using FdCallback = folly::Function<bool(int, int)>; |
719 | void communicate(FdCallback readCallback, FdCallback writeCallback); |
720 | |
721 | /** |
722 | * A readCallback for Subprocess::communicate() that helps you consume |
723 | * lines (or other delimited pieces) from your subprocess's file |
724 | * descriptors. Use the readLinesCallback() helper to get template |
725 | * deduction. For example: |
726 | * |
727 | * subprocess.communicate( |
728 | * Subprocess::readLinesCallback( |
729 | * [](int fd, folly::StringPiece s) { |
730 | * std::cout << fd << " said: " << s; |
731 | * return false; // Keep reading from the child |
732 | * } |
733 | * ), |
734 | * [](int pdf, int cfd){ return true; } // Don't write to the child |
735 | * ); |
736 | * |
737 | * If a file line exceeds maxLineLength, your callback will get some |
738 | * initial chunks of maxLineLength with no trailing delimiters. The final |
739 | * chunk of a line is delimiter-terminated iff the delimiter was present |
740 | * in the input. In particular, the last line in a file always lacks a |
741 | * delimiter -- so if a file ends on a delimiter, the final line is empty. |
742 | * |
743 | * Like a regular communicate() callback, your fdLineCb() normally returns |
744 | * false. It may return true to tell Subprocess to close the underlying |
745 | * file descriptor. The child process may then receive SIGPIPE or get |
746 | * EPIPE errors on writes. |
747 | */ |
748 | template <class Callback> |
749 | class ReadLinesCallback { |
750 | private: |
751 | // Binds an FD to the client-provided FD+line callback |
752 | struct StreamSplitterCallback { |
753 | StreamSplitterCallback(Callback& cb, int fd) : cb_(cb), fd_(fd) {} |
754 | // The return value semantics are inverted vs StreamSplitter |
755 | bool operator()(StringPiece s) { |
756 | return !cb_(fd_, s); |
757 | } |
758 | Callback& cb_; |
759 | int fd_; |
760 | }; |
761 | typedef gen::StreamSplitter<StreamSplitterCallback> LineSplitter; |
762 | |
763 | public: |
764 | explicit ReadLinesCallback( |
765 | Callback&& fdLineCb, |
766 | uint64_t maxLineLength = 0, // No line length limit by default |
767 | char delimiter = '\n', |
768 | uint64_t bufSize = 1024) |
769 | : fdLineCb_(std::forward<Callback>(fdLineCb)), |
770 | maxLineLength_(maxLineLength), |
771 | delimiter_(delimiter), |
772 | bufSize_(bufSize) {} |
773 | |
774 | bool operator()(int pfd, int cfd) { |
775 | // Make a splitter for this cfd if it doesn't already exist |
776 | auto it = fdToSplitter_.find(cfd); |
777 | auto& splitter = (it != fdToSplitter_.end()) |
778 | ? it->second |
779 | : fdToSplitter_ |
780 | .emplace( |
781 | cfd, |
782 | LineSplitter( |
783 | delimiter_, |
784 | StreamSplitterCallback(fdLineCb_, cfd), |
785 | maxLineLength_)) |
786 | .first->second; |
787 | // Read as much as we can from this FD |
788 | char buf[bufSize_]; |
789 | while (true) { |
790 | ssize_t ret = readNoInt(pfd, buf, bufSize_); |
791 | if (ret == -1 && errno == EAGAIN) { // No more data for now |
792 | return false; |
793 | } |
794 | checkUnixError(ret, "read" ); |
795 | if (ret == 0) { // Reached end-of-file |
796 | splitter.flush(); // Ignore return since the file is over anyway |
797 | return true; |
798 | } |
799 | if (!splitter(StringPiece(buf, ret))) { |
800 | return true; // The callback told us to stop |
801 | } |
802 | } |
803 | } |
804 | |
805 | private: |
806 | Callback fdLineCb_; |
807 | const uint64_t maxLineLength_; |
808 | const char delimiter_; |
809 | const uint64_t bufSize_; |
810 | // We lazily make splitters for all cfds that get used. |
811 | std::unordered_map<int, LineSplitter> fdToSplitter_; |
812 | }; |
813 | |
814 | // Helper to enable template deduction |
815 | template <class Callback> |
816 | static auto readLinesCallback( |
817 | Callback&& fdLineCb, |
818 | uint64_t maxLineLength = 0, // No line length limit by default |
819 | char delimiter = '\n', |
820 | uint64_t bufSize = 1024) |
821 | -> ReadLinesCallback<typename std::decay<Callback>::type> { |
822 | return ReadLinesCallback<typename std::decay<Callback>::type>( |
823 | std::forward<Callback>(fdLineCb), maxLineLength, delimiter, bufSize); |
824 | } |
825 | |
826 | /** |
827 | * communicate() callbacks can use this to temporarily enable/disable |
828 | * notifications (callbacks) for a pipe to/from the child. By default, |
829 | * all are enabled. Useful for "chatty" communication -- you want to |
830 | * disable write callbacks until you receive the expected message. |
831 | * |
832 | * Disabling a pipe does not free you from the requirement to consume all |
833 | * incoming data. Failing to do so will easily create deadlock bugs. |
834 | * |
835 | * Throws if the childFd is not known. |
836 | */ |
837 | void enableNotifications(int childFd, bool enabled); |
838 | |
839 | /** |
840 | * Are notifications for one pipe to/from child enabled? Throws if the |
841 | * childFd is not known. |
842 | */ |
843 | bool notificationsEnabled(int childFd) const; |
844 | |
845 | //// |
846 | //// The following methods are meant for the cases when communicate() is |
847 | //// not suitable. You should not need them when you call communicate(), |
848 | //// and, in fact, it is INHERENTLY UNSAFE to use closeParentFd() or |
849 | //// takeOwnershipOfPipes() from a communicate() callback. |
850 | //// |
851 | |
852 | /** |
853 | * Close the parent file descriptor given a file descriptor in the child. |
854 | * DO NOT USE from communicate() callbacks; make them return true instead. |
855 | */ |
856 | void closeParentFd(int childFd); |
857 | |
858 | /** |
859 | * Set all pipes from / to child to be non-blocking. communicate() does |
860 | * this for you. |
861 | */ |
862 | void setAllNonBlocking(); |
863 | |
864 | /** |
865 | * Get parent file descriptor corresponding to the given file descriptor |
866 | * in the child. Throws if childFd isn't a pipe (PIPE_IN / PIPE_OUT). |
867 | * Do not close() the returned file descriptor; use closeParentFd, above. |
868 | */ |
869 | int parentFd(int childFd) const { |
870 | return pipes_[findByChildFd(childFd)].pipe.fd(); |
871 | } |
872 | int stdinFd() const { |
873 | return parentFd(0); |
874 | } |
875 | int stdoutFd() const { |
876 | return parentFd(1); |
877 | } |
878 | int stderrFd() const { |
879 | return parentFd(2); |
880 | } |
881 | |
882 | /** |
883 | * The child's pipes are logically separate from the process metadata |
884 | * (they may even be kept alive by the child's descendants). This call |
885 | * lets you manage the pipes' lifetime separetely from the lifetime of the |
886 | * child process. |
887 | * |
888 | * After this call, the Subprocess instance will have no knowledge of |
889 | * these pipes, and the caller assumes responsibility for managing their |
890 | * lifetimes. Pro-tip: prefer to explicitly close() the pipes, since |
891 | * folly::File would otherwise silently suppress I/O errors. |
892 | * |
893 | * No, you may NOT call this from a communicate() callback. |
894 | */ |
895 | struct ChildPipe { |
896 | ChildPipe(int fd, folly::File&& ppe) : childFd(fd), pipe(std::move(ppe)) {} |
897 | int childFd; |
898 | folly::File pipe; // Owns the parent FD |
899 | }; |
900 | std::vector<ChildPipe> takeOwnershipOfPipes(); |
901 | |
902 | private: |
903 | // spawn() sets up a pipe to read errors from the child, |
904 | // then calls spawnInternal() to do the bulk of the work. Once |
905 | // spawnInternal() returns it reads the error pipe to see if the child |
906 | // encountered any errors. |
907 | void spawn( |
908 | std::unique_ptr<const char*[]> argv, |
909 | const char* executable, |
910 | const Options& options, |
911 | const std::vector<std::string>* env); |
912 | void spawnInternal( |
913 | std::unique_ptr<const char*[]> argv, |
914 | const char* executable, |
915 | Options& options, |
916 | const std::vector<std::string>* env, |
917 | int errFd); |
918 | |
919 | // Actions to run in child. |
920 | // Note that this runs after vfork(), so tread lightly. |
921 | // Returns 0 on success, or an errno value on failure. |
922 | int prepareChild( |
923 | const Options& options, |
924 | const sigset_t* sigmask, |
925 | const char* childDir) const; |
926 | int runChild( |
927 | const char* executable, |
928 | char** argv, |
929 | char** env, |
930 | const Options& options) const; |
931 | |
932 | /** |
933 | * Read from the error pipe, and throw SubprocessSpawnError if the child |
934 | * failed before calling exec(). |
935 | */ |
936 | void readChildErrorPipe(int pfd, const char* executable); |
937 | |
938 | // Returns an index into pipes_. Throws std::invalid_argument if not found. |
939 | size_t findByChildFd(const int childFd) const; |
940 | |
941 | pid_t pid_{-1}; |
942 | ProcessReturnCode returnCode_; |
943 | |
944 | /** |
945 | * Represents a pipe between this process, and the child process (or its |
946 | * descendant). To interact with these pipes, you can use communicate(), |
947 | * or use parentFd() and related methods, or separate them from the |
948 | * Subprocess instance entirely via takeOwnershipOfPipes(). |
949 | */ |
950 | struct Pipe : private boost::totally_ordered<Pipe> { |
951 | folly::File pipe; // Our end of the pipe, wrapped in a File to auto-close. |
952 | int childFd = -1; // Identifies the pipe: what FD is this in the child? |
953 | int direction = PIPE_IN; // one of PIPE_IN / PIPE_OUT |
954 | bool enabled = true; // Are notifications enabled in communicate()? |
955 | |
956 | bool operator<(const Pipe& other) const { |
957 | return childFd < other.childFd; |
958 | } |
959 | bool operator==(const Pipe& other) const { |
960 | return childFd == other.childFd; |
961 | } |
962 | }; |
963 | |
964 | // Populated at process start according to fdActions, empty after |
965 | // takeOwnershipOfPipes(). Sorted by childFd. Can only have elements |
966 | // erased, but not inserted, after being populated. |
967 | // |
968 | // The number of pipes between parent and child is assumed to be small, |
969 | // so we're happy with a vector here, even if it means linear erase. |
970 | std::vector<Pipe> pipes_; |
971 | }; |
972 | |
973 | } // namespace folly |
974 | |