1/****************************************************************************
2**
3** Copyright (C) 2020 Intel Corporation.
4**
5** Permission is hereby granted, free of charge, to any person obtaining a copy
6** of this software and associated documentation files (the "Software"), to deal
7** in the Software without restriction, including without limitation the rights
8** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9** copies of the Software, and to permit persons to whom the Software is
10** furnished to do so, subject to the following conditions:
11**
12** The above copyright notice and this permission notice shall be included in
13** all copies or substantial portions of the Software.
14**
15** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21** THE SOFTWARE.
22**
23****************************************************************************/
24
25#ifndef _GNU_SOURCE
26# define _GNU_SOURCE
27#endif
28
29#include "forkfd.h"
30
31#include <errno.h>
32#include <fcntl.h>
33#include <limits.h>
34#include <sched.h>
35#include <signal.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <sys/resource.h>
40#include <sys/syscall.h>
41#include <sys/types.h>
42#include <sys/wait.h>
43#include <unistd.h>
44
45#include "forkfd_atomic.h"
46
47#ifndef CLONE_PIDFD
48# define CLONE_PIDFD 0x00001000
49#endif
50#ifndef P_PIDFD
51# define P_PIDFD 3
52#endif
53
54// in forkfd.c
55static int convertForkfdWaitFlagsToWaitFlags(int ffdoptions);
56static void convertStatusToForkfdInfo(int status, struct forkfd_info *info);
57
58static ffd_atomic_int system_forkfd_state = FFD_ATOMIC_INIT(0);
59
60static int sys_waitid(int which, int pid_or_pidfd, siginfo_t *infop, int options,
61 struct rusage *ru)
62{
63 /* use the waitid raw system call, which has an extra parameter that glibc
64 * doesn't offer to us */
65 return syscall(__NR_waitid, which, pid_or_pidfd, infop, options, ru);
66}
67
68static int sys_clone(unsigned long cloneflags, int *ptid)
69{
70 void *child_stack = NULL;
71 int *ctid = NULL;
72 unsigned long newtls = 0;
73#if defined(__NR_clone2)
74 size_t stack_size = 0;
75 return syscall(__NR_clone2, cloneflags, child_stack, stack_size, ptid, ctid, newtls);
76#elif defined(__cris__) || defined(__s390__)
77 /* a.k.a., CONFIG_CLONE_BACKWARDS2 architectures */
78 return syscall(__NR_clone, child_stack, cloneflags, ptid, newtls, ctid);
79#elif defined(__microblaze__)
80 /* a.k.a., CONFIG_CLONE_BACKWARDS3 architectures */
81 size_t stack_size = 0;
82 return syscall(__NR_clone, cloneflags, child_stack, stack_size, ptid, newtls, ctid);
83#elif defined(__arc__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
84 defined(__nds32__) || defined(__hppa__) || defined(__powerpc__) || defined(__i386__) || \
85 defined(__x86_64__) || defined(__xtensa__) || defined(__alpha__) || defined(__riscv)
86 /* ctid and newtls are inverted on CONFIG_CLONE_BACKWARDS architectures,
87 * but since both values are 0, there's no harm. */
88 return syscall(__NR_clone, cloneflags, child_stack, ptid, ctid, newtls);
89#else
90 (void) child_stack;
91 (void) ctid;
92 (void) newtls;
93 errno = ENOSYS;
94 return -1;
95#endif
96}
97
98static int detect_clone_pidfd_support()
99{
100 /*
101 * Detect support for CLONE_PIDFD and P_PIDFD. Support was added in steps:
102 * - Linux 5.2 added CLONE_PIDFD support in clone(2) system call
103 * - Linux 5.2 added pidfd_send_signal(2)
104 * - Linux 5.3 added support for poll(2) on pidfds
105 * - Linux 5.3 added clone3(2)
106 * - Linux 5.4 added P_PIDFD support in waitid(2)
107 *
108 * We need CLONE_PIDFD and the poll(2) support. We could emulate the
109 * P_PIDFD support by reading the PID from /proc/self/fdinfo/n, which works
110 * in Linux 5.2, but without poll(2), we can't guarantee the functionality
111 * anyway.
112 *
113 * So we detect by trying to waitid(2) on a positive file descriptor that
114 * is definitely closed (INT_MAX). If P_PIDFD is supported, waitid(2) will
115 * return EBADF. If it isn't supported, it returns EINVAL (as it would for
116 * a negative file descriptor). This will succeed on Linux 5.4.
117 *
118 * We could have instead detected by the existence of the clone3(2) system
119 * call, but for that we would have needed to wait for __NR_clone3 to show
120 * up on the libcs. We choose to go via the waitid(2) route, which requires
121 * platform-independent constants only. It would have simplified the
122 * sys_clone() mess above...
123 */
124
125 sys_waitid(P_PIDFD, INT_MAX, NULL, WEXITED|WNOHANG, NULL);
126 return errno == EBADF ? 1 : -1;
127}
128
129int system_has_forkfd()
130{
131 return ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED) > 0;
132}
133
134int system_forkfd(int flags, pid_t *ppid, int *system)
135{
136 pid_t pid;
137 int pidfd;
138
139 int state = ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED);
140 if (state == 0) {
141 state = detect_clone_pidfd_support();
142 ffd_atomic_store(&system_forkfd_state, state, FFD_ATOMIC_RELAXED);
143 }
144 if (state < 0) {
145 *system = 0;
146 return state;
147 }
148
149 *system = 1;
150 unsigned long cloneflags = CLONE_PIDFD | SIGCHLD;
151 pid = sys_clone(cloneflags, &pidfd);
152 if (pid < 0)
153 return pid;
154 if (ppid)
155 *ppid = pid;
156
157 if (pid == 0) {
158 /* Child process */
159 return FFD_CHILD_PROCESS;
160 }
161
162 /* parent process */
163 if ((flags & FFD_CLOEXEC) == 0) {
164 /* pidfd defaults to O_CLOEXEC */
165 fcntl(pidfd, F_SETFD, 0);
166 }
167 if (flags & FFD_NONBLOCK)
168 fcntl(pidfd, F_SETFL, fcntl(pidfd, F_GETFL) | O_NONBLOCK);
169 return pidfd;
170}
171
172int system_forkfd_wait(int ffd, struct forkfd_info *info, int ffdoptions, struct rusage *rusage)
173{
174 siginfo_t si;
175 int ret;
176 int options = convertForkfdWaitFlagsToWaitFlags(ffdoptions);
177
178 if ((options & WNOHANG) == 0) {
179 /* check if the file descriptor is non-blocking */
180 ret = fcntl(ffd, F_GETFL);
181 if (ret == -1)
182 return ret;
183 if (ret & O_NONBLOCK)
184 options |= WNOHANG;
185 }
186
187 ret = sys_waitid(P_PIDFD, ffd, &si, options, rusage);
188 if (ret == -1 && errno == ECHILD) {
189 errno = EWOULDBLOCK;
190 } else if (ret == 0 && info) {
191 info->code = si.si_code;
192 info->status = si.si_status;
193 }
194 return ret;
195}
196