1 | /* |
2 | * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Oracle designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Oracle in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | */ |
25 | |
26 | #include <assert.h> |
27 | #include <limits.h> |
28 | #include <stdio.h> |
29 | #include <stdlib.h> |
30 | #include <signal.h> |
31 | #include <pthread.h> |
32 | #include <sys/types.h> |
33 | #include <sys/socket.h> |
34 | #include <sys/time.h> |
35 | #include <sys/resource.h> |
36 | #include <sys/uio.h> |
37 | #include <unistd.h> |
38 | #include <errno.h> |
39 | #include <poll.h> |
40 | #include "jvm.h" |
41 | #include "net_util.h" |
42 | |
43 | /* |
44 | * Stack allocated by thread when doing blocking operation |
45 | */ |
46 | typedef struct threadEntry { |
47 | pthread_t thr; /* this thread */ |
48 | struct threadEntry *next; /* next thread */ |
49 | int intr; /* interrupted */ |
50 | } threadEntry_t; |
51 | |
52 | /* |
53 | * Heap allocated during initialized - one entry per fd |
54 | */ |
55 | typedef struct { |
56 | pthread_mutex_t lock; /* fd lock */ |
57 | threadEntry_t *threads; /* threads blocked on fd */ |
58 | } fdEntry_t; |
59 | |
60 | /* |
61 | * Signal to unblock thread |
62 | */ |
63 | static int sigWakeup = (__SIGRTMAX - 2); |
64 | |
65 | /* |
66 | * fdTable holds one entry per file descriptor, up to a certain |
67 | * maximum. |
68 | * Theoretically, the number of possible file descriptors can get |
69 | * large, though usually it does not. Entries for small value file |
70 | * descriptors are kept in a simple table, which covers most scenarios. |
71 | * Entries for large value file descriptors are kept in an overflow |
72 | * table, which is organized as a sparse two dimensional array whose |
73 | * slabs are allocated on demand. This covers all corner cases while |
74 | * keeping memory consumption reasonable. |
75 | */ |
76 | |
77 | /* Base table for low value file descriptors */ |
78 | static fdEntry_t* fdTable = NULL; |
79 | /* Maximum size of base table (in number of entries). */ |
80 | static const int fdTableMaxSize = 0x1000; /* 4K */ |
81 | /* Actual size of base table (in number of entries) */ |
82 | static int fdTableLen = 0; |
83 | /* Max. theoretical number of file descriptors on system. */ |
84 | static int fdLimit = 0; |
85 | |
86 | /* Overflow table, should base table not be large enough. Organized as |
87 | * an array of n slabs, each holding 64k entries. |
88 | */ |
89 | static fdEntry_t** fdOverflowTable = NULL; |
90 | /* Number of slabs in the overflow table */ |
91 | static int fdOverflowTableLen = 0; |
92 | /* Number of entries in one slab */ |
93 | static const int fdOverflowTableSlabSize = 0x10000; /* 64k */ |
94 | pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER; |
95 | |
96 | /* |
97 | * Null signal handler |
98 | */ |
99 | static void sig_wakeup(int sig) { |
100 | } |
101 | |
102 | /* |
103 | * Initialization routine (executed when library is loaded) |
104 | * Allocate fd tables and sets up signal handler. |
105 | */ |
106 | static void __attribute((constructor)) init() { |
107 | struct rlimit nbr_files; |
108 | sigset_t sigset; |
109 | struct sigaction sa; |
110 | int i = 0; |
111 | |
112 | /* Determine the maximum number of possible file descriptors. */ |
113 | if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) { |
114 | fprintf(stderr, "library initialization failed - " |
115 | "unable to get max # of allocated fds\n" ); |
116 | abort(); |
117 | } |
118 | if (nbr_files.rlim_max != RLIM_INFINITY) { |
119 | fdLimit = nbr_files.rlim_max; |
120 | } else { |
121 | /* We just do not know. */ |
122 | fdLimit = INT_MAX; |
123 | } |
124 | |
125 | /* Allocate table for low value file descriptors. */ |
126 | fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize; |
127 | fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t)); |
128 | if (fdTable == NULL) { |
129 | fprintf(stderr, "library initialization failed - " |
130 | "unable to allocate file descriptor table - out of memory" ); |
131 | abort(); |
132 | } else { |
133 | for (i = 0; i < fdTableLen; i ++) { |
134 | pthread_mutex_init(&fdTable[i].lock, NULL); |
135 | } |
136 | } |
137 | |
138 | /* Allocate overflow table, if needed */ |
139 | if (fdLimit > fdTableMaxSize) { |
140 | fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1; |
141 | fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*)); |
142 | if (fdOverflowTable == NULL) { |
143 | fprintf(stderr, "library initialization failed - " |
144 | "unable to allocate file descriptor overflow table - out of memory" ); |
145 | abort(); |
146 | } |
147 | } |
148 | |
149 | /* |
150 | * Setup the signal handler |
151 | */ |
152 | sa.sa_handler = sig_wakeup; |
153 | sa.sa_flags = 0; |
154 | sigemptyset(&sa.sa_mask); |
155 | sigaction(sigWakeup, &sa, NULL); |
156 | |
157 | sigemptyset(&sigset); |
158 | sigaddset(&sigset, sigWakeup); |
159 | sigprocmask(SIG_UNBLOCK, &sigset, NULL); |
160 | } |
161 | |
162 | /* |
163 | * Return the fd table for this fd. |
164 | */ |
165 | static inline fdEntry_t *getFdEntry(int fd) |
166 | { |
167 | fdEntry_t* result = NULL; |
168 | |
169 | if (fd < 0) { |
170 | return NULL; |
171 | } |
172 | |
173 | /* This should not happen. If it does, our assumption about |
174 | * max. fd value was wrong. */ |
175 | assert(fd < fdLimit); |
176 | |
177 | if (fd < fdTableMaxSize) { |
178 | /* fd is in base table. */ |
179 | assert(fd < fdTableLen); |
180 | result = &fdTable[fd]; |
181 | } else { |
182 | /* fd is in overflow table. */ |
183 | const int indexInOverflowTable = fd - fdTableMaxSize; |
184 | const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize; |
185 | const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize; |
186 | fdEntry_t* slab = NULL; |
187 | assert(rootindex < fdOverflowTableLen); |
188 | assert(slabindex < fdOverflowTableSlabSize); |
189 | pthread_mutex_lock(&fdOverflowTableLock); |
190 | /* Allocate new slab in overflow table if needed */ |
191 | if (fdOverflowTable[rootindex] == NULL) { |
192 | fdEntry_t* const newSlab = |
193 | (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t)); |
194 | if (newSlab == NULL) { |
195 | fprintf(stderr, "Unable to allocate file descriptor overflow" |
196 | " table slab - out of memory" ); |
197 | pthread_mutex_unlock(&fdOverflowTableLock); |
198 | abort(); |
199 | } else { |
200 | int i; |
201 | for (i = 0; i < fdOverflowTableSlabSize; i ++) { |
202 | pthread_mutex_init(&newSlab[i].lock, NULL); |
203 | } |
204 | fdOverflowTable[rootindex] = newSlab; |
205 | } |
206 | } |
207 | pthread_mutex_unlock(&fdOverflowTableLock); |
208 | slab = fdOverflowTable[rootindex]; |
209 | result = &slab[slabindex]; |
210 | } |
211 | |
212 | return result; |
213 | |
214 | } |
215 | |
216 | /* |
217 | * Start a blocking operation :- |
218 | * Insert thread onto thread list for the fd. |
219 | */ |
220 | static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self) |
221 | { |
222 | self->thr = pthread_self(); |
223 | self->intr = 0; |
224 | |
225 | pthread_mutex_lock(&(fdEntry->lock)); |
226 | { |
227 | self->next = fdEntry->threads; |
228 | fdEntry->threads = self; |
229 | } |
230 | pthread_mutex_unlock(&(fdEntry->lock)); |
231 | } |
232 | |
233 | /* |
234 | * End a blocking operation :- |
235 | * Remove thread from thread list for the fd |
236 | * If fd has been interrupted then set errno to EBADF |
237 | */ |
238 | static inline void endOp |
239 | (fdEntry_t *fdEntry, threadEntry_t *self) |
240 | { |
241 | int orig_errno = errno; |
242 | pthread_mutex_lock(&(fdEntry->lock)); |
243 | { |
244 | threadEntry_t *curr, *prev=NULL; |
245 | curr = fdEntry->threads; |
246 | while (curr != NULL) { |
247 | if (curr == self) { |
248 | if (curr->intr) { |
249 | orig_errno = EBADF; |
250 | } |
251 | if (prev == NULL) { |
252 | fdEntry->threads = curr->next; |
253 | } else { |
254 | prev->next = curr->next; |
255 | } |
256 | break; |
257 | } |
258 | prev = curr; |
259 | curr = curr->next; |
260 | } |
261 | } |
262 | pthread_mutex_unlock(&(fdEntry->lock)); |
263 | errno = orig_errno; |
264 | } |
265 | |
266 | /* |
267 | * Close or dup2 a file descriptor ensuring that all threads blocked on |
268 | * the file descriptor are notified via a wakeup signal. |
269 | * |
270 | * fd1 < 0 => close(fd2) |
271 | * fd1 >= 0 => dup2(fd1, fd2) |
272 | * |
273 | * Returns -1 with errno set if operation fails. |
274 | */ |
275 | static int closefd(int fd1, int fd2) { |
276 | int rv, orig_errno; |
277 | fdEntry_t *fdEntry = getFdEntry(fd2); |
278 | if (fdEntry == NULL) { |
279 | errno = EBADF; |
280 | return -1; |
281 | } |
282 | |
283 | /* |
284 | * Lock the fd to hold-off additional I/O on this fd. |
285 | */ |
286 | pthread_mutex_lock(&(fdEntry->lock)); |
287 | |
288 | { |
289 | /* |
290 | * And close/dup the file descriptor |
291 | * (restart if interrupted by signal) |
292 | */ |
293 | if (fd1 < 0) { |
294 | rv = close(fd2); |
295 | } else { |
296 | do { |
297 | rv = dup2(fd1, fd2); |
298 | } while (rv == -1 && errno == EINTR); |
299 | } |
300 | |
301 | /* |
302 | * Send a wakeup signal to all threads blocked on this |
303 | * file descriptor. |
304 | */ |
305 | threadEntry_t *curr = fdEntry->threads; |
306 | while (curr != NULL) { |
307 | curr->intr = 1; |
308 | pthread_kill( curr->thr, sigWakeup ); |
309 | curr = curr->next; |
310 | } |
311 | } |
312 | |
313 | /* |
314 | * Unlock without destroying errno |
315 | */ |
316 | orig_errno = errno; |
317 | pthread_mutex_unlock(&(fdEntry->lock)); |
318 | errno = orig_errno; |
319 | |
320 | return rv; |
321 | } |
322 | |
323 | /* |
324 | * Wrapper for dup2 - same semantics as dup2 system call except |
325 | * that any threads blocked in an I/O system call on fd2 will be |
326 | * preempted and return -1/EBADF; |
327 | */ |
328 | int NET_Dup2(int fd, int fd2) { |
329 | if (fd < 0) { |
330 | errno = EBADF; |
331 | return -1; |
332 | } |
333 | return closefd(fd, fd2); |
334 | } |
335 | |
336 | /* |
337 | * Wrapper for close - same semantics as close system call |
338 | * except that any threads blocked in an I/O on fd will be |
339 | * preempted and the I/O system call will return -1/EBADF. |
340 | */ |
341 | int NET_SocketClose(int fd) { |
342 | return closefd(-1, fd); |
343 | } |
344 | |
345 | /************** Basic I/O operations here ***************/ |
346 | |
347 | /* |
348 | * Macro to perform a blocking IO operation. Restarts |
349 | * automatically if interrupted by signal (other than |
350 | * our wakeup signal) |
351 | */ |
352 | #define BLOCKING_IO_RETURN_INT(FD, FUNC) { \ |
353 | int ret; \ |
354 | threadEntry_t self; \ |
355 | fdEntry_t *fdEntry = getFdEntry(FD); \ |
356 | if (fdEntry == NULL) { \ |
357 | errno = EBADF; \ |
358 | return -1; \ |
359 | } \ |
360 | do { \ |
361 | startOp(fdEntry, &self); \ |
362 | ret = FUNC; \ |
363 | endOp(fdEntry, &self); \ |
364 | } while (ret == -1 && errno == EINTR); \ |
365 | return ret; \ |
366 | } |
367 | |
368 | int NET_Read(int s, void* buf, size_t len) { |
369 | BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) ); |
370 | } |
371 | |
372 | int NET_NonBlockingRead(int s, void* buf, size_t len) { |
373 | BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, MSG_DONTWAIT) ); |
374 | } |
375 | |
376 | int NET_ReadV(int s, const struct iovec * vector, int count) { |
377 | BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) ); |
378 | } |
379 | |
380 | int NET_RecvFrom(int s, void *buf, int len, unsigned int flags, |
381 | struct sockaddr *from, socklen_t *fromlen) { |
382 | BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) ); |
383 | } |
384 | |
385 | int NET_Send(int s, void *msg, int len, unsigned int flags) { |
386 | BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) ); |
387 | } |
388 | |
389 | int NET_SendTo(int s, const void *msg, int len, unsigned int |
390 | flags, const struct sockaddr *to, int tolen) { |
391 | BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) ); |
392 | } |
393 | |
394 | int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) { |
395 | BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) ); |
396 | } |
397 | |
398 | int NET_Connect(int s, struct sockaddr *addr, int addrlen) { |
399 | BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) ); |
400 | } |
401 | |
402 | int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) { |
403 | BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) ); |
404 | } |
405 | |
406 | /* |
407 | * Wrapper for poll(s, timeout). |
408 | * Auto restarts with adjusted timeout if interrupted by |
409 | * signal other than our wakeup signal. |
410 | */ |
411 | int NET_Timeout(JNIEnv *env, int s, long timeout, jlong nanoTimeStamp) { |
412 | jlong prevNanoTime = nanoTimeStamp; |
413 | jlong nanoTimeout = (jlong)timeout * NET_NSEC_PER_MSEC; |
414 | fdEntry_t *fdEntry = getFdEntry(s); |
415 | |
416 | /* |
417 | * Check that fd hasn't been closed. |
418 | */ |
419 | if (fdEntry == NULL) { |
420 | errno = EBADF; |
421 | return -1; |
422 | } |
423 | |
424 | for(;;) { |
425 | struct pollfd pfd; |
426 | int rv; |
427 | threadEntry_t self; |
428 | |
429 | /* |
430 | * Poll the fd. If interrupted by our wakeup signal |
431 | * errno will be set to EBADF. |
432 | */ |
433 | pfd.fd = s; |
434 | pfd.events = POLLIN | POLLERR; |
435 | |
436 | startOp(fdEntry, &self); |
437 | rv = poll(&pfd, 1, nanoTimeout / NET_NSEC_PER_MSEC); |
438 | endOp(fdEntry, &self); |
439 | /* |
440 | * If interrupted then adjust timeout. If timeout |
441 | * has expired return 0 (indicating timeout expired). |
442 | */ |
443 | if (rv < 0 && errno == EINTR) { |
444 | jlong newNanoTime = JVM_NanoTime(env, 0); |
445 | nanoTimeout -= newNanoTime - prevNanoTime; |
446 | if (nanoTimeout < NET_NSEC_PER_MSEC) { |
447 | return 0; |
448 | } |
449 | prevNanoTime = newNanoTime; |
450 | } else { |
451 | return rv; |
452 | } |
453 | } |
454 | } |
455 | |