1/*
2 * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#include <assert.h>
27#include <limits.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <signal.h>
31#include <pthread.h>
32#include <sys/types.h>
33#include <sys/socket.h>
34#include <sys/time.h>
35#include <sys/resource.h>
36#include <sys/uio.h>
37#include <unistd.h>
38#include <errno.h>
39#include <poll.h>
40#include "jvm.h"
41#include "net_util.h"
42
43/*
44 * Stack allocated by thread when doing blocking operation
45 */
46typedef struct threadEntry {
47 pthread_t thr; /* this thread */
48 struct threadEntry *next; /* next thread */
49 int intr; /* interrupted */
50} threadEntry_t;
51
52/*
53 * Heap allocated during initialized - one entry per fd
54 */
55typedef struct {
56 pthread_mutex_t lock; /* fd lock */
57 threadEntry_t *threads; /* threads blocked on fd */
58} fdEntry_t;
59
60/*
61 * Signal to unblock thread
62 */
63static int sigWakeup = (__SIGRTMAX - 2);
64
65/*
66 * fdTable holds one entry per file descriptor, up to a certain
67 * maximum.
68 * Theoretically, the number of possible file descriptors can get
69 * large, though usually it does not. Entries for small value file
70 * descriptors are kept in a simple table, which covers most scenarios.
71 * Entries for large value file descriptors are kept in an overflow
72 * table, which is organized as a sparse two dimensional array whose
73 * slabs are allocated on demand. This covers all corner cases while
74 * keeping memory consumption reasonable.
75 */
76
77/* Base table for low value file descriptors */
78static fdEntry_t* fdTable = NULL;
79/* Maximum size of base table (in number of entries). */
80static const int fdTableMaxSize = 0x1000; /* 4K */
81/* Actual size of base table (in number of entries) */
82static int fdTableLen = 0;
83/* Max. theoretical number of file descriptors on system. */
84static int fdLimit = 0;
85
86/* Overflow table, should base table not be large enough. Organized as
87 * an array of n slabs, each holding 64k entries.
88 */
89static fdEntry_t** fdOverflowTable = NULL;
90/* Number of slabs in the overflow table */
91static int fdOverflowTableLen = 0;
92/* Number of entries in one slab */
93static const int fdOverflowTableSlabSize = 0x10000; /* 64k */
94pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER;
95
96/*
97 * Null signal handler
98 */
99static void sig_wakeup(int sig) {
100}
101
102/*
103 * Initialization routine (executed when library is loaded)
104 * Allocate fd tables and sets up signal handler.
105 */
106static void __attribute((constructor)) init() {
107 struct rlimit nbr_files;
108 sigset_t sigset;
109 struct sigaction sa;
110 int i = 0;
111
112 /* Determine the maximum number of possible file descriptors. */
113 if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) {
114 fprintf(stderr, "library initialization failed - "
115 "unable to get max # of allocated fds\n");
116 abort();
117 }
118 if (nbr_files.rlim_max != RLIM_INFINITY) {
119 fdLimit = nbr_files.rlim_max;
120 } else {
121 /* We just do not know. */
122 fdLimit = INT_MAX;
123 }
124
125 /* Allocate table for low value file descriptors. */
126 fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize;
127 fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t));
128 if (fdTable == NULL) {
129 fprintf(stderr, "library initialization failed - "
130 "unable to allocate file descriptor table - out of memory");
131 abort();
132 } else {
133 for (i = 0; i < fdTableLen; i ++) {
134 pthread_mutex_init(&fdTable[i].lock, NULL);
135 }
136 }
137
138 /* Allocate overflow table, if needed */
139 if (fdLimit > fdTableMaxSize) {
140 fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1;
141 fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*));
142 if (fdOverflowTable == NULL) {
143 fprintf(stderr, "library initialization failed - "
144 "unable to allocate file descriptor overflow table - out of memory");
145 abort();
146 }
147 }
148
149 /*
150 * Setup the signal handler
151 */
152 sa.sa_handler = sig_wakeup;
153 sa.sa_flags = 0;
154 sigemptyset(&sa.sa_mask);
155 sigaction(sigWakeup, &sa, NULL);
156
157 sigemptyset(&sigset);
158 sigaddset(&sigset, sigWakeup);
159 sigprocmask(SIG_UNBLOCK, &sigset, NULL);
160}
161
162/*
163 * Return the fd table for this fd.
164 */
165static inline fdEntry_t *getFdEntry(int fd)
166{
167 fdEntry_t* result = NULL;
168
169 if (fd < 0) {
170 return NULL;
171 }
172
173 /* This should not happen. If it does, our assumption about
174 * max. fd value was wrong. */
175 assert(fd < fdLimit);
176
177 if (fd < fdTableMaxSize) {
178 /* fd is in base table. */
179 assert(fd < fdTableLen);
180 result = &fdTable[fd];
181 } else {
182 /* fd is in overflow table. */
183 const int indexInOverflowTable = fd - fdTableMaxSize;
184 const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize;
185 const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize;
186 fdEntry_t* slab = NULL;
187 assert(rootindex < fdOverflowTableLen);
188 assert(slabindex < fdOverflowTableSlabSize);
189 pthread_mutex_lock(&fdOverflowTableLock);
190 /* Allocate new slab in overflow table if needed */
191 if (fdOverflowTable[rootindex] == NULL) {
192 fdEntry_t* const newSlab =
193 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t));
194 if (newSlab == NULL) {
195 fprintf(stderr, "Unable to allocate file descriptor overflow"
196 " table slab - out of memory");
197 pthread_mutex_unlock(&fdOverflowTableLock);
198 abort();
199 } else {
200 int i;
201 for (i = 0; i < fdOverflowTableSlabSize; i ++) {
202 pthread_mutex_init(&newSlab[i].lock, NULL);
203 }
204 fdOverflowTable[rootindex] = newSlab;
205 }
206 }
207 pthread_mutex_unlock(&fdOverflowTableLock);
208 slab = fdOverflowTable[rootindex];
209 result = &slab[slabindex];
210 }
211
212 return result;
213
214}
215
216/*
217 * Start a blocking operation :-
218 * Insert thread onto thread list for the fd.
219 */
220static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self)
221{
222 self->thr = pthread_self();
223 self->intr = 0;
224
225 pthread_mutex_lock(&(fdEntry->lock));
226 {
227 self->next = fdEntry->threads;
228 fdEntry->threads = self;
229 }
230 pthread_mutex_unlock(&(fdEntry->lock));
231}
232
233/*
234 * End a blocking operation :-
235 * Remove thread from thread list for the fd
236 * If fd has been interrupted then set errno to EBADF
237 */
238static inline void endOp
239 (fdEntry_t *fdEntry, threadEntry_t *self)
240{
241 int orig_errno = errno;
242 pthread_mutex_lock(&(fdEntry->lock));
243 {
244 threadEntry_t *curr, *prev=NULL;
245 curr = fdEntry->threads;
246 while (curr != NULL) {
247 if (curr == self) {
248 if (curr->intr) {
249 orig_errno = EBADF;
250 }
251 if (prev == NULL) {
252 fdEntry->threads = curr->next;
253 } else {
254 prev->next = curr->next;
255 }
256 break;
257 }
258 prev = curr;
259 curr = curr->next;
260 }
261 }
262 pthread_mutex_unlock(&(fdEntry->lock));
263 errno = orig_errno;
264}
265
266/*
267 * Close or dup2 a file descriptor ensuring that all threads blocked on
268 * the file descriptor are notified via a wakeup signal.
269 *
270 * fd1 < 0 => close(fd2)
271 * fd1 >= 0 => dup2(fd1, fd2)
272 *
273 * Returns -1 with errno set if operation fails.
274 */
275static int closefd(int fd1, int fd2) {
276 int rv, orig_errno;
277 fdEntry_t *fdEntry = getFdEntry(fd2);
278 if (fdEntry == NULL) {
279 errno = EBADF;
280 return -1;
281 }
282
283 /*
284 * Lock the fd to hold-off additional I/O on this fd.
285 */
286 pthread_mutex_lock(&(fdEntry->lock));
287
288 {
289 /*
290 * And close/dup the file descriptor
291 * (restart if interrupted by signal)
292 */
293 if (fd1 < 0) {
294 rv = close(fd2);
295 } else {
296 do {
297 rv = dup2(fd1, fd2);
298 } while (rv == -1 && errno == EINTR);
299 }
300
301 /*
302 * Send a wakeup signal to all threads blocked on this
303 * file descriptor.
304 */
305 threadEntry_t *curr = fdEntry->threads;
306 while (curr != NULL) {
307 curr->intr = 1;
308 pthread_kill( curr->thr, sigWakeup );
309 curr = curr->next;
310 }
311 }
312
313 /*
314 * Unlock without destroying errno
315 */
316 orig_errno = errno;
317 pthread_mutex_unlock(&(fdEntry->lock));
318 errno = orig_errno;
319
320 return rv;
321}
322
323/*
324 * Wrapper for dup2 - same semantics as dup2 system call except
325 * that any threads blocked in an I/O system call on fd2 will be
326 * preempted and return -1/EBADF;
327 */
328int NET_Dup2(int fd, int fd2) {
329 if (fd < 0) {
330 errno = EBADF;
331 return -1;
332 }
333 return closefd(fd, fd2);
334}
335
336/*
337 * Wrapper for close - same semantics as close system call
338 * except that any threads blocked in an I/O on fd will be
339 * preempted and the I/O system call will return -1/EBADF.
340 */
341int NET_SocketClose(int fd) {
342 return closefd(-1, fd);
343}
344
345/************** Basic I/O operations here ***************/
346
347/*
348 * Macro to perform a blocking IO operation. Restarts
349 * automatically if interrupted by signal (other than
350 * our wakeup signal)
351 */
352#define BLOCKING_IO_RETURN_INT(FD, FUNC) { \
353 int ret; \
354 threadEntry_t self; \
355 fdEntry_t *fdEntry = getFdEntry(FD); \
356 if (fdEntry == NULL) { \
357 errno = EBADF; \
358 return -1; \
359 } \
360 do { \
361 startOp(fdEntry, &self); \
362 ret = FUNC; \
363 endOp(fdEntry, &self); \
364 } while (ret == -1 && errno == EINTR); \
365 return ret; \
366}
367
368int NET_Read(int s, void* buf, size_t len) {
369 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) );
370}
371
372int NET_NonBlockingRead(int s, void* buf, size_t len) {
373 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, MSG_DONTWAIT) );
374}
375
376int NET_ReadV(int s, const struct iovec * vector, int count) {
377 BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) );
378}
379
380int NET_RecvFrom(int s, void *buf, int len, unsigned int flags,
381 struct sockaddr *from, socklen_t *fromlen) {
382 BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) );
383}
384
385int NET_Send(int s, void *msg, int len, unsigned int flags) {
386 BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) );
387}
388
389int NET_SendTo(int s, const void *msg, int len, unsigned int
390 flags, const struct sockaddr *to, int tolen) {
391 BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) );
392}
393
394int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) {
395 BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) );
396}
397
398int NET_Connect(int s, struct sockaddr *addr, int addrlen) {
399 BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) );
400}
401
402int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) {
403 BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) );
404}
405
406/*
407 * Wrapper for poll(s, timeout).
408 * Auto restarts with adjusted timeout if interrupted by
409 * signal other than our wakeup signal.
410 */
411int NET_Timeout(JNIEnv *env, int s, long timeout, jlong nanoTimeStamp) {
412 jlong prevNanoTime = nanoTimeStamp;
413 jlong nanoTimeout = (jlong)timeout * NET_NSEC_PER_MSEC;
414 fdEntry_t *fdEntry = getFdEntry(s);
415
416 /*
417 * Check that fd hasn't been closed.
418 */
419 if (fdEntry == NULL) {
420 errno = EBADF;
421 return -1;
422 }
423
424 for(;;) {
425 struct pollfd pfd;
426 int rv;
427 threadEntry_t self;
428
429 /*
430 * Poll the fd. If interrupted by our wakeup signal
431 * errno will be set to EBADF.
432 */
433 pfd.fd = s;
434 pfd.events = POLLIN | POLLERR;
435
436 startOp(fdEntry, &self);
437 rv = poll(&pfd, 1, nanoTimeout / NET_NSEC_PER_MSEC);
438 endOp(fdEntry, &self);
439 /*
440 * If interrupted then adjust timeout. If timeout
441 * has expired return 0 (indicating timeout expired).
442 */
443 if (rv < 0 && errno == EINTR) {
444 jlong newNanoTime = JVM_NanoTime(env, 0);
445 nanoTimeout -= newNanoTime - prevNanoTime;
446 if (nanoTimeout < NET_NSEC_PER_MSEC) {
447 return 0;
448 }
449 prevNanoTime = newNanoTime;
450 } else {
451 return rv;
452 }
453 }
454}
455