1/****************************************************************************
2**
3** Copyright (C) 2020 Intel Corporation.
4** Copyright (C) 2015 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com
5**
6** Permission is hereby granted, free of charge, to any person obtaining a copy
7** of this software and associated documentation files (the "Software"), to deal
8** in the Software without restriction, including without limitation the rights
9** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10** copies of the Software, and to permit persons to whom the Software is
11** furnished to do so, subject to the following conditions:
12**
13** The above copyright notice and this permission notice shall be included in
14** all copies or substantial portions of the Software.
15**
16** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22** THE SOFTWARE.
23**
24****************************************************************************/
25
26#ifndef _GNU_SOURCE
27# define _GNU_SOURCE
28#endif
29
30#include "forkfd.h"
31
32/* Macros fine-tuning the build: */
33//#define FORKFD_NO_FORKFD 1 /* disable the forkfd() function */
34//#define FORKFD_NO_SPAWNFD 1 /* disable the spawnfd() function */
35//#define FORKFD_DISABLE_FORK_FALLBACK 1 /* disable falling back to fork() from system_forkfd() */
36
37#include <sys/types.h>
38#if defined(__OpenBSD__) || defined(__NetBSD__)
39# include <sys/param.h>
40#endif
41#include <sys/time.h>
42#include <sys/resource.h>
43#include <sys/wait.h>
44#include <assert.h>
45#include <errno.h>
46#include <pthread.h>
47#include <signal.h>
48#include <stdlib.h>
49#include <string.h>
50#include <time.h>
51#include <unistd.h>
52
53#ifdef __linux__
54# define HAVE_WAIT4 1
55# if defined(__BIONIC__) || (defined(__GLIBC__) && (__GLIBC__ << 8) + __GLIBC_MINOR__ >= 0x208 && \
56 (!defined(__UCLIBC__) || ((__UCLIBC_MAJOR__ << 16) + (__UCLIBC_MINOR__ << 8) + __UCLIBC_SUBLEVEL__ > 0x90201)))
57# include <sys/eventfd.h>
58# ifdef EFD_CLOEXEC
59# define HAVE_EVENTFD 1
60# endif
61# endif
62# if defined(__BIONIC__) || (defined(__GLIBC__) && (__GLIBC__ << 8) + __GLIBC_MINOR__ >= 0x209 && \
63 (!defined(__UCLIBC__) || ((__UCLIBC_MAJOR__ << 16) + (__UCLIBC_MINOR__ << 8) + __UCLIBC_SUBLEVEL__ > 0x90201)))
64# define HAVE_PIPE2 1
65# endif
66#endif
67
68#if _POSIX_VERSION-0 >= 200809L || _XOPEN_VERSION-0 >= 500
69# define HAVE_WAITID 1
70#endif
71#if !defined(WEXITED) || !defined(WNOWAIT)
72# undef HAVE_WAITID
73#endif
74
75#if (defined(__FreeBSD__) && defined(__FreeBSD_version) && __FreeBSD_version >= 1000032) || \
76 (defined(__OpenBSD__) && OpenBSD >= 201505) || \
77 (defined(__NetBSD__) && __NetBSD_Version__ >= 600000000)
78# define HAVE_PIPE2 1
79#endif
80#if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__FreeBSD_kernel__) || \
81 defined(__OpenBSD__) || defined(__NetBSD__) || defined(__APPLE__)
82# define HAVE_WAIT4 1
83#endif
84
85#if defined(__APPLE__)
86/* Up until OS X 10.7, waitid(P_ALL, ...) will return success, but will not
87 * fill in the details of the dead child. That means waitid is not useful to us.
88 * Therefore, we only enable waitid() support if we're targetting OS X 10.8 or
89 * later.
90 */
91# include <Availability.h>
92# include <AvailabilityMacros.h>
93# if MAC_OS_X_VERSION_MIN_REQUIRED <= 1070
94# define HAVE_BROKEN_WAITID 1
95# endif
96#endif
97
98#include "forkfd_atomic.h"
99
100static int system_has_forkfd(void);
101static int system_forkfd(int flags, pid_t *ppid, int *system);
102static int system_forkfd_wait(int ffd, struct forkfd_info *info, int ffdwoptions, struct rusage *rusage);
103
104static int disable_fork_fallback(void)
105{
106#ifdef FORKFD_DISABLE_FORK_FALLBACK
107 /* if there's no system forkfd, we have to use the fallback */
108 return system_has_forkfd();
109#else
110 return false;
111#endif
112}
113
114#define CHILDREN_IN_SMALL_ARRAY 16
115#define CHILDREN_IN_BIG_ARRAY 256
116#define sizeofarray(array) (sizeof(array)/sizeof(array[0]))
117#define EINTR_LOOP(ret, call) \
118 do { \
119 ret = call; \
120 } while (ret == -1 && errno == EINTR)
121
122struct pipe_payload
123{
124 struct forkfd_info info;
125 struct rusage rusage;
126};
127
128typedef struct process_info
129{
130 ffd_atomic_int pid;
131 int deathPipe;
132} ProcessInfo;
133
134struct BigArray;
135typedef struct Header
136{
137 ffd_atomic_pointer(struct BigArray) nextArray;
138 ffd_atomic_int busyCount;
139} Header;
140
141typedef struct BigArray
142{
143 Header header;
144 ProcessInfo entries[CHILDREN_IN_BIG_ARRAY];
145} BigArray;
146
147typedef struct SmallArray
148{
149 Header header;
150 ProcessInfo entries[CHILDREN_IN_SMALL_ARRAY];
151} SmallArray;
152static SmallArray children;
153
154static struct sigaction old_sigaction;
155static pthread_once_t forkfd_initialization = PTHREAD_ONCE_INIT;
156static ffd_atomic_int forkfd_status = FFD_ATOMIC_INIT(0);
157
158#ifdef HAVE_BROKEN_WAITID
159static int waitid_works = 0;
160#else
161static const int waitid_works = 1;
162#endif
163
164static ProcessInfo *tryAllocateInSection(Header *header, ProcessInfo entries[], int maxCount)
165{
166 /* we use ACQUIRE here because the signal handler might have released the PID */
167 int busyCount = ffd_atomic_add_fetch(&header->busyCount, 1, FFD_ATOMIC_ACQUIRE);
168 if (busyCount <= maxCount) {
169 /* there's an available entry in this section, find it and take it */
170 int i;
171 for (i = 0; i < maxCount; ++i) {
172 /* if the PID is 0, it's free; mark it as used by swapping it with -1 */
173 int expected_pid = 0;
174 if (ffd_atomic_compare_exchange(&entries[i].pid, &expected_pid,
175 -1, FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED))
176 return &entries[i];
177 }
178 }
179
180 /* there isn't an available entry, undo our increment */
181 (void)ffd_atomic_add_fetch(&header->busyCount, -1, FFD_ATOMIC_RELAXED);
182 return NULL;
183}
184
185static ProcessInfo *allocateInfo(Header **header)
186{
187 Header *currentHeader = &children.header;
188
189 /* try to find an available entry in the small array first */
190 ProcessInfo *info =
191 tryAllocateInSection(currentHeader, children.entries, sizeofarray(children.entries));
192
193 /* go on to the next arrays */
194 while (info == NULL) {
195 BigArray *array = ffd_atomic_load(&currentHeader->nextArray, FFD_ATOMIC_ACQUIRE);
196 if (array == NULL) {
197 /* allocate an array and try to use it */
198 BigArray *allocatedArray = (BigArray *)calloc(1, sizeof(BigArray));
199 if (allocatedArray == NULL)
200 return NULL;
201
202 if (ffd_atomic_compare_exchange(&currentHeader->nextArray, &array, allocatedArray,
203 FFD_ATOMIC_RELEASE, FFD_ATOMIC_ACQUIRE)) {
204 /* success */
205 array = allocatedArray;
206 } else {
207 /* failed, the atomic updated 'array' */
208 free(allocatedArray);
209 }
210 }
211
212 currentHeader = &array->header;
213 info = tryAllocateInSection(currentHeader, array->entries, sizeofarray(array->entries));
214 }
215
216 *header = currentHeader;
217 return info;
218}
219
220#ifdef HAVE_WAITID
221static int isChildReady(pid_t pid, siginfo_t *info)
222{
223 info->si_pid = 0;
224 return waitid(P_PID, pid, info, WEXITED | WNOHANG | WNOWAIT) == 0 && info->si_pid == pid;
225}
226#endif
227
228static void convertStatusToForkfdInfo(int status, struct forkfd_info *info)
229{
230 if (WIFEXITED(status)) {
231 info->code = CLD_EXITED;
232 info->status = WEXITSTATUS(status);
233 } else if (WIFSIGNALED(status)) {
234 info->code = CLD_KILLED;
235# ifdef WCOREDUMP
236 if (WCOREDUMP(status))
237 info->code = CLD_DUMPED;
238# endif
239 info->status = WTERMSIG(status);
240 }
241}
242
243#ifdef __GNUC__
244__attribute__((unused))
245#endif
246static int convertForkfdWaitFlagsToWaitFlags(int ffdoptions)
247{
248 int woptions = WEXITED;
249 if (ffdoptions & FFDW_NOWAIT)
250 woptions |= WNOWAIT;
251 if (ffdoptions & FFDW_NOHANG)
252 woptions |= WNOHANG;
253 return woptions;
254}
255
256static int tryReaping(pid_t pid, struct pipe_payload *payload)
257{
258 /* reap the child */
259#if defined(HAVE_WAIT4)
260 int status;
261 if (wait4(pid, &status, WNOHANG, &payload->rusage) <= 0)
262 return 0;
263 convertStatusToForkfdInfo(status, &payload->info);
264#else
265# if defined(HAVE_WAITID)
266 if (waitid_works) {
267 /* we have waitid(2), which gets us some payload values on some systems */
268 siginfo_t info;
269 info.si_pid = 0;
270 int ret = waitid(P_PID, pid, &info, WEXITED | WNOHANG) == 0 && info.si_pid == pid;
271 if (!ret)
272 return ret;
273
274 payload->info.code = info.si_code;
275 payload->info.status = info.si_status;
276# ifdef __linux__
277 payload->rusage.ru_utime.tv_sec = info.si_utime / CLOCKS_PER_SEC;
278 payload->rusage.ru_utime.tv_usec = info.si_utime % CLOCKS_PER_SEC;
279 payload->rusage.ru_stime.tv_sec = info.si_stime / CLOCKS_PER_SEC;
280 payload->rusage.ru_stime.tv_usec = info.si_stime % CLOCKS_PER_SEC;
281# endif
282 return 1;
283 }
284# endif // HAVE_WAITID
285 int status;
286 if (waitpid(pid, &status, WNOHANG) <= 0)
287 return 0; // child did not change state
288 convertStatusToForkfdInfo(status, &payload->info);
289#endif // !HAVE_WAIT4
290
291 return 1;
292}
293
294static void freeInfo(Header *header, ProcessInfo *entry)
295{
296 entry->deathPipe = -1;
297 ffd_atomic_store(&entry->pid, 0, FFD_ATOMIC_RELEASE);
298
299 (void)ffd_atomic_add_fetch(&header->busyCount, -1, FFD_ATOMIC_RELEASE);
300 assert(header->busyCount >= 0);
301}
302
303static void notifyAndFreeInfo(Header *header, ProcessInfo *entry,
304 const struct pipe_payload *payload)
305{
306 ssize_t ret;
307 EINTR_LOOP(ret, write(entry->deathPipe, payload, sizeof(*payload)));
308 EINTR_LOOP(ret, close(entry->deathPipe));
309
310 freeInfo(header, entry);
311}
312
313static void reapChildProcesses();
314static void sigchld_handler(int signum, siginfo_t *handler_info, void *handler_context)
315{
316 /*
317 * This is a signal handler, so we need to be careful about which functions
318 * we can call. See the full, official listing in the POSIX.1-2008
319 * specification at:
320 * http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
321 *
322 * The handler_info and handler_context parameters may not be valid, if
323 * we're a chained handler from another handler that did not use
324 * SA_SIGINFO. Therefore, we must obtain the siginfo ourselves directly by
325 * calling waitid.
326 *
327 * But we pass them anyway. Let's call the chained handler first, while
328 * those two arguments have a chance of being correct.
329 */
330 if (old_sigaction.sa_handler != SIG_IGN && old_sigaction.sa_handler != SIG_DFL) {
331 if (old_sigaction.sa_flags & SA_SIGINFO)
332 old_sigaction.sa_sigaction(signum, handler_info, handler_context);
333 else
334 old_sigaction.sa_handler(signum);
335 }
336
337 if (ffd_atomic_load(&forkfd_status, FFD_ATOMIC_RELAXED) == 1) {
338 int saved_errno = errno;
339 reapChildProcesses();
340 errno = saved_errno;
341 }
342}
343
344static inline void reapChildProcesses()
345{
346 /* is this one of our children? */
347 BigArray *array;
348 siginfo_t info;
349 struct pipe_payload payload;
350 int i;
351
352 memset(&info, 0, sizeof info);
353 memset(&payload, 0, sizeof payload);
354
355#ifdef HAVE_WAITID
356 if (waitid_works) {
357 /* be optimistic: try to see if we can get the child that exited */
358search_next_child:
359 /* waitid returns -1 ECHILD if there are no further children at all;
360 * it returns 0 and sets si_pid to 0 if there are children but they are not ready
361 * to be waited (we're passing WNOHANG). We should not get EINTR because
362 * we're passing WNOHANG and we should definitely not get EINVAL or anything else.
363 * That means we can actually ignore the return code and only inspect si_pid.
364 */
365 info.si_pid = 0;
366 waitid(P_ALL, 0, &info, WNOHANG | WNOWAIT | WEXITED);
367 if (info.si_pid == 0) {
368 /* there are no further un-waited-for children, so we can just exit.
369 */
370 return;
371 }
372
373 for (i = 0; i < (int)sizeofarray(children.entries); ++i) {
374 /* acquire the child first: swap the PID with -1 to indicate it's busy */
375 int pid = info.si_pid;
376 if (ffd_atomic_compare_exchange(&children.entries[i].pid, &pid, -1,
377 FFD_ATOMIC_ACQUIRE, FFD_ATOMIC_RELAXED)) {
378 /* this is our child, send notification and free up this entry */
379 /* ### FIXME: what if tryReaping returns false? */
380 if (tryReaping(pid, &payload))
381 notifyAndFreeInfo(&children.header, &children.entries[i], &payload);
382 goto search_next_child;
383 }
384 }
385
386 /* try the arrays */
387 array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE);
388 while (array != NULL) {
389 for (i = 0; i < (int)sizeofarray(array->entries); ++i) {
390 int pid = info.si_pid;
391 if (ffd_atomic_compare_exchange(&array->entries[i].pid, &pid, -1,
392 FFD_ATOMIC_ACQUIRE, FFD_ATOMIC_RELAXED)) {
393 /* this is our child, send notification and free up this entry */
394 /* ### FIXME: what if tryReaping returns false? */
395 if (tryReaping(pid, &payload))
396 notifyAndFreeInfo(&array->header, &array->entries[i], &payload);
397 goto search_next_child;
398 }
399 }
400
401 array = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE);
402 }
403
404 /* if we got here, we couldn't find this child in our list. That means this child
405 * belongs to one of the chained SIGCHLD handlers. However, there might be another
406 * child that exited and does belong to us, so we need to check each one individually.
407 */
408 }
409#endif
410
411 for (i = 0; i < (int)sizeofarray(children.entries); ++i) {
412 int pid = ffd_atomic_load(&children.entries[i].pid, FFD_ATOMIC_ACQUIRE);
413 if (pid <= 0)
414 continue;
415#ifdef HAVE_WAITID
416 if (waitid_works) {
417 /* The child might have been reaped by the block above in another thread,
418 * so first check if it's ready and, if it is, lock it */
419 if (!isChildReady(pid, &info) ||
420 !ffd_atomic_compare_exchange(&children.entries[i].pid, &pid, -1,
421 FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED))
422 continue;
423 }
424#endif
425 if (tryReaping(pid, &payload)) {
426 /* this is our child, send notification and free up this entry */
427 notifyAndFreeInfo(&children.header, &children.entries[i], &payload);
428 }
429 }
430
431 /* try the arrays */
432 array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE);
433 while (array != NULL) {
434 for (i = 0; i < (int)sizeofarray(array->entries); ++i) {
435 int pid = ffd_atomic_load(&array->entries[i].pid, FFD_ATOMIC_ACQUIRE);
436 if (pid <= 0)
437 continue;
438#ifdef HAVE_WAITID
439 if (waitid_works) {
440 /* The child might have been reaped by the block above in another thread,
441 * so first check if it's ready and, if it is, lock it */
442 if (!isChildReady(pid, &info) ||
443 !ffd_atomic_compare_exchange(&array->entries[i].pid, &pid, -1,
444 FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED))
445 continue;
446 }
447#endif
448 if (tryReaping(pid, &payload)) {
449 /* this is our child, send notification and free up this entry */
450 notifyAndFreeInfo(&array->header, &array->entries[i], &payload);
451 }
452 }
453
454 array = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE);
455 }
456}
457
458static void ignore_sigpipe()
459{
460#ifdef O_NOSIGPIPE
461 static ffd_atomic_int done = FFD_ATOMIC_INIT(0);
462 if (ffd_atomic_load(&done, FFD_ATOMIC_RELAXED))
463 return;
464#endif
465
466 struct sigaction action;
467 memset(&action, 0, sizeof action);
468 sigemptyset(&action.sa_mask);
469 action.sa_handler = SIG_IGN;
470 action.sa_flags = 0;
471 sigaction(SIGPIPE, &action, NULL);
472
473#ifdef O_NOSIGPIPE
474 ffd_atomic_store(&done, 1, FFD_ATOMIC_RELAXED);
475#endif
476}
477
478#if defined(__GNUC__) && (!defined(__FreeBSD__) || __FreeBSD__ < 10)
479__attribute((destructor, unused)) static void cleanup();
480#endif
481
482static void cleanup()
483{
484 BigArray *array;
485 /* This function is not thread-safe!
486 * It must only be called when the process is shutting down.
487 * At shutdown, we expect no one to be calling forkfd(), so we don't
488 * need to be thread-safe with what is done there.
489 *
490 * But SIGCHLD might be delivered to any thread, including this one.
491 * There's no way to prevent that. The correct solution would be to
492 * cooperatively delete. We don't do that.
493 */
494 if (ffd_atomic_load(&forkfd_status, FFD_ATOMIC_RELAXED) == 0)
495 return;
496
497 /* notify the handler that we're no longer in operation */
498 ffd_atomic_store(&forkfd_status, 0, FFD_ATOMIC_RELAXED);
499
500 /* free any arrays we might have */
501 array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE);
502 while (array != NULL) {
503 BigArray *next = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE);
504 free(array);
505 array = next;
506 }
507}
508
509static void forkfd_initialize()
510{
511#if defined(HAVE_BROKEN_WAITID)
512 pid_t pid = fork();
513 if (pid == 0) {
514 _exit(0);
515 } else if (pid > 0) {
516 siginfo_t info;
517 waitid(P_ALL, 0, &info, WNOWAIT | WEXITED);
518 waitid_works = (info.si_pid != 0);
519 info.si_pid = 0;
520
521 // now really reap the child
522 waitid(P_PID, pid, &info, WEXITED);
523 waitid_works = waitid_works && (info.si_pid != 0);
524 }
525#endif
526
527 /* install our signal handler */
528 struct sigaction action;
529 memset(&action, 0, sizeof action);
530 sigemptyset(&action.sa_mask);
531 action.sa_flags = SA_NOCLDSTOP | SA_SIGINFO;
532 action.sa_sigaction = sigchld_handler;
533
534 /* ### RACE CONDITION
535 * The sigaction function does a memcpy from an internal buffer
536 * to old_sigaction, which we use in the SIGCHLD handler. If a
537 * SIGCHLD is delivered before or during that memcpy, the handler will
538 * see an inconsistent state.
539 *
540 * There is no solution. pthread_sigmask doesn't work here because the
541 * signal could be delivered to another thread.
542 */
543 sigaction(SIGCHLD, &action, &old_sigaction);
544
545#ifndef O_NOSIGPIPE
546 /* disable SIGPIPE too */
547 ignore_sigpipe();
548#endif
549
550#ifdef __GNUC__
551 (void) cleanup; /* suppress unused static function warning */
552#else
553 atexit(cleanup);
554#endif
555
556 ffd_atomic_store(&forkfd_status, 1, FFD_ATOMIC_RELAXED);
557}
558
559static int create_pipe(int filedes[], int flags)
560{
561 int ret = -1;
562#ifdef HAVE_PIPE2
563 /* use pipe2(2) whenever possible, since it can thread-safely create a
564 * cloexec pair of pipes. Without it, we have a race condition setting
565 * FD_CLOEXEC
566 */
567
568# ifdef O_NOSIGPIPE
569 /* try first with O_NOSIGPIPE */
570 ret = pipe2(filedes, O_CLOEXEC | O_NOSIGPIPE);
571 if (ret == -1) {
572 /* O_NOSIGPIPE not supported, ignore SIGPIPE */
573 ignore_sigpipe();
574 }
575# endif
576 if (ret == -1)
577 ret = pipe2(filedes, O_CLOEXEC);
578 if (ret == -1)
579 return ret;
580
581 if ((flags & FFD_CLOEXEC) == 0)
582 fcntl(filedes[0], F_SETFD, 0);
583#else
584 ret = pipe(filedes);
585 if (ret == -1)
586 return ret;
587
588 fcntl(filedes[1], F_SETFD, FD_CLOEXEC);
589 if (flags & FFD_CLOEXEC)
590 fcntl(filedes[0], F_SETFD, FD_CLOEXEC);
591#endif
592 if (flags & FFD_NONBLOCK)
593 fcntl(filedes[0], F_SETFL, fcntl(filedes[0], F_GETFL) | O_NONBLOCK);
594 return ret;
595}
596
597#ifndef FORKFD_NO_FORKFD
598/**
599 * @brief forkfd returns a file descriptor representing a child process
600 * @return a file descriptor, or -1 in case of failure
601 *
602 * forkfd() creates a file descriptor that can be used to be notified of when a
603 * child process exits. This file descriptor can be monitored using select(2),
604 * poll(2) or similar mechanisms.
605 *
606 * The @a flags parameter can contain the following values ORed to change the
607 * behaviour of forkfd():
608 *
609 * @li @c FFD_NONBLOCK Set the O_NONBLOCK file status flag on the new open file
610 * descriptor. Using this flag saves extra calls to fnctl(2) to achieve the same
611 * result.
612 *
613 * @li @c FFD_CLOEXEC Set the close-on-exec (FD_CLOEXEC) flag on the new file
614 * descriptor. You probably want to set this flag, since forkfd() does not work
615 * if the original parent process dies.
616 *
617 * @li @c FFD_USE_FORK Tell forkfd() to actually call fork() instead of a
618 * different system implementation that may be available. On systems where a
619 * different implementation is available, its behavior may differ from that of
620 * fork(), such as not calling the functions registered with pthread_atfork().
621 * If that's necessary, pass this flag.
622 *
623 * The file descriptor returned by forkfd() supports the following operations:
624 *
625 * @li read(2) When the child process exits, then the buffer supplied to
626 * read(2) is used to return information about the status of the child in the
627 * form of one @c siginfo_t structure. The buffer must be at least
628 * sizeof(siginfo_t) bytes. The return value of read(2) is the total number of
629 * bytes read.
630 *
631 * @li poll(2), select(2) (and similar) The file descriptor is readable (the
632 * select(2) readfds argument; the poll(2) POLLIN flag) if the child has exited
633 * or signalled via SIGCHLD.
634 *
635 * @li close(2) When the file descriptor is no longer required it should be closed.
636 */
637int forkfd(int flags, pid_t *ppid)
638{
639 Header *header;
640 ProcessInfo *info;
641 pid_t pid;
642 int fd = -1;
643 int death_pipe[2];
644 int sync_pipe[2];
645 int ret;
646#ifdef __linux__
647 int efd;
648#endif
649
650 if (disable_fork_fallback())
651 flags &= ~FFD_USE_FORK;
652
653 if ((flags & FFD_USE_FORK) == 0) {
654 fd = system_forkfd(flags, ppid, &ret);
655 if (ret || disable_fork_fallback())
656 return fd;
657 }
658
659 (void) pthread_once(&forkfd_initialization, forkfd_initialize);
660
661 info = allocateInfo(&header);
662 if (info == NULL) {
663 errno = ENOMEM;
664 return -1;
665 }
666
667 /* create the pipes before we fork */
668 if (create_pipe(death_pipe, flags) == -1)
669 goto err_free; /* failed to create the pipes, pass errno */
670
671#ifdef HAVE_EVENTFD
672 /* try using an eventfd, which consumes less resources */
673 efd = eventfd(0, EFD_CLOEXEC);
674 if (efd == -1)
675#endif
676 {
677 /* try a pipe */
678 if (create_pipe(sync_pipe, FFD_CLOEXEC) == -1) {
679 /* failed both at eventfd and pipe; fail and pass errno */
680 goto err_close;
681 }
682 }
683
684 /* now fork */
685 pid = fork();
686 if (pid == -1)
687 goto err_close2; /* failed to fork, pass errno */
688 if (ppid)
689 *ppid = pid;
690
691 /*
692 * We need to store the child's PID in the info structure, so
693 * the SIGCHLD handler knows that this child is present and it
694 * knows the writing end of the pipe to pass information on.
695 * However, the child process could exit before we stored the
696 * information (or the handler could run for other children exiting).
697 * We prevent that from happening by blocking the child process in
698 * a read(2) until we're finished storing the information.
699 */
700 if (pid == 0) {
701 /* this is the child process */
702 /* first, wait for the all clear */
703#ifdef HAVE_EVENTFD
704 if (efd != -1) {
705 eventfd_t val64;
706 EINTR_LOOP(ret, eventfd_read(efd, &val64));
707 EINTR_LOOP(ret, close(efd));
708 } else
709#endif
710 {
711 char c;
712 EINTR_LOOP(ret, close(sync_pipe[1]));
713 EINTR_LOOP(ret, read(sync_pipe[0], &c, sizeof c));
714 EINTR_LOOP(ret, close(sync_pipe[0]));
715 }
716
717 /* now close the pipes and return to the caller */
718 EINTR_LOOP(ret, close(death_pipe[0]));
719 EINTR_LOOP(ret, close(death_pipe[1]));
720 fd = FFD_CHILD_PROCESS;
721 } else {
722 /* parent process */
723 info->deathPipe = death_pipe[1];
724 fd = death_pipe[0];
725 ffd_atomic_store(&info->pid, pid, FFD_ATOMIC_RELEASE);
726
727 /* release the child */
728#ifdef HAVE_EVENTFD
729 if (efd != -1) {
730 eventfd_t val64 = 42;
731 EINTR_LOOP(ret, eventfd_write(efd, val64));
732 EINTR_LOOP(ret, close(efd));
733 } else
734#endif
735 {
736 /*
737 * Usually, closing would be enough to make read(2) return and the child process
738 * continue. We need to write here: another thread could be calling forkfd at the
739 * same time, which means auxpipe[1] might be open in another child process.
740 */
741 EINTR_LOOP(ret, close(sync_pipe[0]));
742 EINTR_LOOP(ret, write(sync_pipe[1], "", 1));
743 EINTR_LOOP(ret, close(sync_pipe[1]));
744 }
745 }
746
747 return fd;
748
749err_close2:
750#ifdef HAVE_EVENTFD
751 if (efd != -1) {
752 EINTR_LOOP(ret, close(efd));
753 } else
754#endif
755 {
756 EINTR_LOOP(ret, close(sync_pipe[0]));
757 EINTR_LOOP(ret, close(sync_pipe[1]));
758 }
759err_close:
760 EINTR_LOOP(ret, close(death_pipe[0]));
761 EINTR_LOOP(ret, close(death_pipe[1]));
762err_free:
763 /* free the info pointer */
764 freeInfo(header, info);
765 return -1;
766}
767#endif // FORKFD_NO_FORKFD
768
769#if _POSIX_SPAWN > 0 && !defined(FORKFD_NO_SPAWNFD)
770int spawnfd(int flags, pid_t *ppid, const char *path, const posix_spawn_file_actions_t *file_actions,
771 posix_spawnattr_t *attrp, char *const argv[], char *const envp[])
772{
773 Header *header;
774 ProcessInfo *info;
775 struct pipe_payload payload;
776 pid_t pid;
777 int death_pipe[2];
778 int ret = -1;
779 /* we can only do work if we have a way to start the child in stopped mode;
780 * otherwise, we have a major race condition. */
781
782 assert(!system_has_forkfd());
783
784 (void) pthread_once(&forkfd_initialization, forkfd_initialize);
785
786 info = allocateInfo(&header);
787 if (info == NULL) {
788 errno = ENOMEM;
789 goto out;
790 }
791
792 /* create the pipe before we spawn */
793 if (create_pipe(death_pipe, flags) == -1)
794 goto err_free; /* failed to create the pipes, pass errno */
795
796 /* start the process */
797 if (flags & FFD_SPAWN_SEARCH_PATH) {
798 /* use posix_spawnp */
799 if (posix_spawnp(&pid, path, file_actions, attrp, argv, envp) != 0)
800 goto err_close;
801 } else {
802 if (posix_spawn(&pid, path, file_actions, attrp, argv, envp) != 0)
803 goto err_close;
804 }
805
806 if (ppid)
807 *ppid = pid;
808
809 /* Store the child's PID in the info structure.
810 */
811 info->deathPipe = death_pipe[1];
812 ffd_atomic_store(&info->pid, pid, FFD_ATOMIC_RELEASE);
813
814 /* check if the child has already exited */
815 if (tryReaping(pid, &payload))
816 notifyAndFreeInfo(header, info, &payload);
817
818 ret = death_pipe[0];
819 return ret;
820
821err_close:
822 EINTR_LOOP(ret, close(death_pipe[0]));
823 EINTR_LOOP(ret, close(death_pipe[1]));
824
825err_free:
826 /* free the info pointer */
827 freeInfo(header, info);
828
829out:
830 return -1;
831}
832#endif // _POSIX_SPAWN && !FORKFD_NO_SPAWNFD
833
834int forkfd_wait4(int ffd, struct forkfd_info *info, int options, struct rusage *rusage)
835{
836 struct pipe_payload payload;
837 int ret;
838
839 if (system_has_forkfd()) {
840 /* if this is one of our pipes, not a procdesc/pidfd, we'll get an EBADF */
841 ret = system_forkfd_wait(ffd, info, options, rusage);
842 if (disable_fork_fallback() || ret != -1 || errno != EBADF)
843 return ret;
844 }
845
846 ret = read(ffd, &payload, sizeof(payload));
847 if (ret == -1)
848 return ret; /* pass errno, probably EINTR, EBADF or EWOULDBLOCK */
849
850 assert(ret == sizeof(payload));
851 if (info)
852 *info = payload.info;
853 if (rusage)
854 *rusage = payload.rusage;
855
856 return 0; /* success */
857}
858
859
860int forkfd_close(int ffd)
861{
862 return close(ffd);
863}
864
865#if defined(__FreeBSD__) && __FreeBSD__ >= 9
866# include "forkfd_freebsd.c"
867#elif defined(__linux__)
868# include "forkfd_linux.c"
869#else
870int system_has_forkfd()
871{
872 return 0;
873}
874
875int system_forkfd(int flags, pid_t *ppid, int *system)
876{
877 (void)flags;
878 (void)ppid;
879 *system = 0;
880 return -1;
881}
882
883int system_forkfd_wait(int ffd, struct forkfd_info *info, int options, struct rusage *rusage)
884{
885 (void)ffd;
886 (void)info;
887 (void)options;
888 (void)rusage;
889 return -1;
890}
891#endif
892