1 | /* Copyright Joyent, Inc. and other Node contributors. All rights reserved. |
2 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
3 | * of this software and associated documentation files (the "Software"), to |
4 | * deal in the Software without restriction, including without limitation the |
5 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
6 | * sell copies of the Software, and to permit persons to whom the Software is |
7 | * furnished to do so, subject to the following conditions: |
8 | * |
9 | * The above copyright notice and this permission notice shall be included in |
10 | * all copies or substantial portions of the Software. |
11 | * |
12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
17 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
18 | * IN THE SOFTWARE. |
19 | */ |
20 | |
21 | /* We lean on the fact that POLL{IN,OUT,ERR,HUP} correspond with their |
22 | * EPOLL* counterparts. We use the POLL* variants in this file because that |
23 | * is what libuv uses elsewhere. |
24 | */ |
25 | |
26 | #include "uv.h" |
27 | #include "internal.h" |
28 | |
29 | #include <inttypes.h> |
30 | #include <stdint.h> |
31 | #include <stdio.h> |
32 | #include <stdlib.h> |
33 | #include <string.h> |
34 | #include <assert.h> |
35 | #include <errno.h> |
36 | |
37 | #include <net/if.h> |
38 | #include <sys/epoll.h> |
39 | #include <sys/param.h> |
40 | #include <sys/prctl.h> |
41 | #include <sys/sysinfo.h> |
42 | #include <unistd.h> |
43 | #include <fcntl.h> |
44 | #include <time.h> |
45 | |
46 | #define HAVE_IFADDRS_H 1 |
47 | |
48 | #ifdef __UCLIBC__ |
49 | # if __UCLIBC_MAJOR__ < 0 && __UCLIBC_MINOR__ < 9 && __UCLIBC_SUBLEVEL__ < 32 |
50 | # undef HAVE_IFADDRS_H |
51 | # endif |
52 | #endif |
53 | |
54 | #ifdef HAVE_IFADDRS_H |
55 | # if defined(__ANDROID__) |
56 | # include "uv/android-ifaddrs.h" |
57 | # else |
58 | # include <ifaddrs.h> |
59 | # endif |
60 | # include <sys/socket.h> |
61 | # include <net/ethernet.h> |
62 | # include <netpacket/packet.h> |
63 | #endif /* HAVE_IFADDRS_H */ |
64 | |
65 | /* Available from 2.6.32 onwards. */ |
66 | #ifndef CLOCK_MONOTONIC_COARSE |
67 | # define CLOCK_MONOTONIC_COARSE 6 |
68 | #endif |
69 | |
70 | /* This is rather annoying: CLOCK_BOOTTIME lives in <linux/time.h> but we can't |
71 | * include that file because it conflicts with <time.h>. We'll just have to |
72 | * define it ourselves. |
73 | */ |
74 | #ifndef CLOCK_BOOTTIME |
75 | # define CLOCK_BOOTTIME 7 |
76 | #endif |
77 | |
78 | static int read_models(unsigned int numcpus, uv_cpu_info_t* ci); |
79 | static int read_times(FILE* statfile_fp, |
80 | unsigned int numcpus, |
81 | uv_cpu_info_t* ci); |
82 | static void read_speeds(unsigned int numcpus, uv_cpu_info_t* ci); |
83 | static uint64_t read_cpufreq(unsigned int cpunum); |
84 | |
85 | |
86 | int uv__platform_loop_init(uv_loop_t* loop) { |
87 | int fd; |
88 | fd = epoll_create1(O_CLOEXEC); |
89 | |
90 | /* epoll_create1() can fail either because it's not implemented (old kernel) |
91 | * or because it doesn't understand the O_CLOEXEC flag. |
92 | */ |
93 | if (fd == -1 && (errno == ENOSYS || errno == EINVAL)) { |
94 | fd = epoll_create(256); |
95 | |
96 | if (fd != -1) |
97 | uv__cloexec(fd, 1); |
98 | } |
99 | |
100 | loop->backend_fd = fd; |
101 | loop->inotify_fd = -1; |
102 | loop->inotify_watchers = NULL; |
103 | |
104 | if (fd == -1) |
105 | return UV__ERR(errno); |
106 | |
107 | return 0; |
108 | } |
109 | |
110 | |
111 | int uv__io_fork(uv_loop_t* loop) { |
112 | int err; |
113 | void* old_watchers; |
114 | |
115 | old_watchers = loop->inotify_watchers; |
116 | |
117 | uv__close(loop->backend_fd); |
118 | loop->backend_fd = -1; |
119 | uv__platform_loop_delete(loop); |
120 | |
121 | err = uv__platform_loop_init(loop); |
122 | if (err) |
123 | return err; |
124 | |
125 | return uv__inotify_fork(loop, old_watchers); |
126 | } |
127 | |
128 | |
129 | void uv__platform_loop_delete(uv_loop_t* loop) { |
130 | if (loop->inotify_fd == -1) return; |
131 | uv__io_stop(loop, &loop->inotify_read_watcher, POLLIN); |
132 | uv__close(loop->inotify_fd); |
133 | loop->inotify_fd = -1; |
134 | } |
135 | |
136 | |
137 | void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) { |
138 | struct epoll_event* events; |
139 | struct epoll_event dummy; |
140 | uintptr_t i; |
141 | uintptr_t nfds; |
142 | |
143 | assert(loop->watchers != NULL); |
144 | assert(fd >= 0); |
145 | |
146 | events = (struct epoll_event*) loop->watchers[loop->nwatchers]; |
147 | nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1]; |
148 | if (events != NULL) |
149 | /* Invalidate events with same file descriptor */ |
150 | for (i = 0; i < nfds; i++) |
151 | if (events[i].data.fd == fd) |
152 | events[i].data.fd = -1; |
153 | |
154 | /* Remove the file descriptor from the epoll. |
155 | * This avoids a problem where the same file description remains open |
156 | * in another process, causing repeated junk epoll events. |
157 | * |
158 | * We pass in a dummy epoll_event, to work around a bug in old kernels. |
159 | */ |
160 | if (loop->backend_fd >= 0) { |
161 | /* Work around a bug in kernels 3.10 to 3.19 where passing a struct that |
162 | * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings. |
163 | */ |
164 | memset(&dummy, 0, sizeof(dummy)); |
165 | epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy); |
166 | } |
167 | } |
168 | |
169 | |
170 | int uv__io_check_fd(uv_loop_t* loop, int fd) { |
171 | struct epoll_event e; |
172 | int rc; |
173 | |
174 | memset(&e, 0, sizeof(e)); |
175 | e.events = POLLIN; |
176 | e.data.fd = -1; |
177 | |
178 | rc = 0; |
179 | if (epoll_ctl(loop->backend_fd, EPOLL_CTL_ADD, fd, &e)) |
180 | if (errno != EEXIST) |
181 | rc = UV__ERR(errno); |
182 | |
183 | if (rc == 0) |
184 | if (epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &e)) |
185 | abort(); |
186 | |
187 | return rc; |
188 | } |
189 | |
190 | |
191 | void uv__io_poll(uv_loop_t* loop, int timeout) { |
192 | /* A bug in kernels < 2.6.37 makes timeouts larger than ~30 minutes |
193 | * effectively infinite on 32 bits architectures. To avoid blocking |
194 | * indefinitely, we cap the timeout and poll again if necessary. |
195 | * |
196 | * Note that "30 minutes" is a simplification because it depends on |
197 | * the value of CONFIG_HZ. The magic constant assumes CONFIG_HZ=1200, |
198 | * that being the largest value I have seen in the wild (and only once.) |
199 | */ |
200 | static const int max_safe_timeout = 1789569; |
201 | static int no_epoll_pwait_cached; |
202 | static int no_epoll_wait_cached; |
203 | int no_epoll_pwait; |
204 | int no_epoll_wait; |
205 | struct epoll_event events[1024]; |
206 | struct epoll_event* pe; |
207 | struct epoll_event e; |
208 | int real_timeout; |
209 | QUEUE* q; |
210 | uv__io_t* w; |
211 | sigset_t sigset; |
212 | uint64_t sigmask; |
213 | uint64_t base; |
214 | int have_signals; |
215 | int nevents; |
216 | int count; |
217 | int nfds; |
218 | int fd; |
219 | int op; |
220 | int i; |
221 | int user_timeout; |
222 | int reset_timeout; |
223 | |
224 | if (loop->nfds == 0) { |
225 | assert(QUEUE_EMPTY(&loop->watcher_queue)); |
226 | return; |
227 | } |
228 | |
229 | memset(&e, 0, sizeof(e)); |
230 | |
231 | while (!QUEUE_EMPTY(&loop->watcher_queue)) { |
232 | q = QUEUE_HEAD(&loop->watcher_queue); |
233 | QUEUE_REMOVE(q); |
234 | QUEUE_INIT(q); |
235 | |
236 | w = QUEUE_DATA(q, uv__io_t, watcher_queue); |
237 | assert(w->pevents != 0); |
238 | assert(w->fd >= 0); |
239 | assert(w->fd < (int) loop->nwatchers); |
240 | |
241 | e.events = w->pevents; |
242 | e.data.fd = w->fd; |
243 | |
244 | if (w->events == 0) |
245 | op = EPOLL_CTL_ADD; |
246 | else |
247 | op = EPOLL_CTL_MOD; |
248 | |
249 | /* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching |
250 | * events, skip the syscall and squelch the events after epoll_wait(). |
251 | */ |
252 | if (epoll_ctl(loop->backend_fd, op, w->fd, &e)) { |
253 | if (errno != EEXIST) |
254 | abort(); |
255 | |
256 | assert(op == EPOLL_CTL_ADD); |
257 | |
258 | /* We've reactivated a file descriptor that's been watched before. */ |
259 | if (epoll_ctl(loop->backend_fd, EPOLL_CTL_MOD, w->fd, &e)) |
260 | abort(); |
261 | } |
262 | |
263 | w->events = w->pevents; |
264 | } |
265 | |
266 | sigmask = 0; |
267 | if (loop->flags & UV_LOOP_BLOCK_SIGPROF) { |
268 | sigemptyset(&sigset); |
269 | sigaddset(&sigset, SIGPROF); |
270 | sigmask |= 1 << (SIGPROF - 1); |
271 | } |
272 | |
273 | assert(timeout >= -1); |
274 | base = loop->time; |
275 | count = 48; /* Benchmarks suggest this gives the best throughput. */ |
276 | real_timeout = timeout; |
277 | |
278 | if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) { |
279 | reset_timeout = 1; |
280 | user_timeout = timeout; |
281 | timeout = 0; |
282 | } else { |
283 | reset_timeout = 0; |
284 | user_timeout = 0; |
285 | } |
286 | |
287 | /* You could argue there is a dependency between these two but |
288 | * ultimately we don't care about their ordering with respect |
289 | * to one another. Worst case, we make a few system calls that |
290 | * could have been avoided because another thread already knows |
291 | * they fail with ENOSYS. Hardly the end of the world. |
292 | */ |
293 | no_epoll_pwait = uv__load_relaxed(&no_epoll_pwait_cached); |
294 | no_epoll_wait = uv__load_relaxed(&no_epoll_wait_cached); |
295 | |
296 | for (;;) { |
297 | /* Only need to set the provider_entry_time if timeout != 0. The function |
298 | * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME. |
299 | */ |
300 | if (timeout != 0) |
301 | uv__metrics_set_provider_entry_time(loop); |
302 | |
303 | /* See the comment for max_safe_timeout for an explanation of why |
304 | * this is necessary. Executive summary: kernel bug workaround. |
305 | */ |
306 | if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout) |
307 | timeout = max_safe_timeout; |
308 | |
309 | if (sigmask != 0 && no_epoll_pwait != 0) |
310 | if (pthread_sigmask(SIG_BLOCK, &sigset, NULL)) |
311 | abort(); |
312 | |
313 | if (no_epoll_wait != 0 || (sigmask != 0 && no_epoll_pwait == 0)) { |
314 | nfds = epoll_pwait(loop->backend_fd, |
315 | events, |
316 | ARRAY_SIZE(events), |
317 | timeout, |
318 | &sigset); |
319 | if (nfds == -1 && errno == ENOSYS) { |
320 | uv__store_relaxed(&no_epoll_pwait_cached, 1); |
321 | no_epoll_pwait = 1; |
322 | } |
323 | } else { |
324 | nfds = epoll_wait(loop->backend_fd, |
325 | events, |
326 | ARRAY_SIZE(events), |
327 | timeout); |
328 | if (nfds == -1 && errno == ENOSYS) { |
329 | uv__store_relaxed(&no_epoll_wait_cached, 1); |
330 | no_epoll_wait = 1; |
331 | } |
332 | } |
333 | |
334 | if (sigmask != 0 && no_epoll_pwait != 0) |
335 | if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL)) |
336 | abort(); |
337 | |
338 | /* Update loop->time unconditionally. It's tempting to skip the update when |
339 | * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the |
340 | * operating system didn't reschedule our process while in the syscall. |
341 | */ |
342 | SAVE_ERRNO(uv__update_time(loop)); |
343 | |
344 | if (nfds == 0) { |
345 | assert(timeout != -1); |
346 | |
347 | if (reset_timeout != 0) { |
348 | timeout = user_timeout; |
349 | reset_timeout = 0; |
350 | } |
351 | |
352 | if (timeout == -1) |
353 | continue; |
354 | |
355 | if (timeout == 0) |
356 | return; |
357 | |
358 | /* We may have been inside the system call for longer than |timeout| |
359 | * milliseconds so we need to update the timestamp to avoid drift. |
360 | */ |
361 | goto update_timeout; |
362 | } |
363 | |
364 | if (nfds == -1) { |
365 | if (errno == ENOSYS) { |
366 | /* epoll_wait() or epoll_pwait() failed, try the other system call. */ |
367 | assert(no_epoll_wait == 0 || no_epoll_pwait == 0); |
368 | continue; |
369 | } |
370 | |
371 | if (errno != EINTR) |
372 | abort(); |
373 | |
374 | if (reset_timeout != 0) { |
375 | timeout = user_timeout; |
376 | reset_timeout = 0; |
377 | } |
378 | |
379 | if (timeout == -1) |
380 | continue; |
381 | |
382 | if (timeout == 0) |
383 | return; |
384 | |
385 | /* Interrupted by a signal. Update timeout and poll again. */ |
386 | goto update_timeout; |
387 | } |
388 | |
389 | have_signals = 0; |
390 | nevents = 0; |
391 | |
392 | { |
393 | /* Squelch a -Waddress-of-packed-member warning with gcc >= 9. */ |
394 | union { |
395 | struct epoll_event* events; |
396 | uv__io_t* watchers; |
397 | } x; |
398 | |
399 | x.events = events; |
400 | assert(loop->watchers != NULL); |
401 | loop->watchers[loop->nwatchers] = x.watchers; |
402 | loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds; |
403 | } |
404 | |
405 | for (i = 0; i < nfds; i++) { |
406 | pe = events + i; |
407 | fd = pe->data.fd; |
408 | |
409 | /* Skip invalidated events, see uv__platform_invalidate_fd */ |
410 | if (fd == -1) |
411 | continue; |
412 | |
413 | assert(fd >= 0); |
414 | assert((unsigned) fd < loop->nwatchers); |
415 | |
416 | w = loop->watchers[fd]; |
417 | |
418 | if (w == NULL) { |
419 | /* File descriptor that we've stopped watching, disarm it. |
420 | * |
421 | * Ignore all errors because we may be racing with another thread |
422 | * when the file descriptor is closed. |
423 | */ |
424 | epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, pe); |
425 | continue; |
426 | } |
427 | |
428 | /* Give users only events they're interested in. Prevents spurious |
429 | * callbacks when previous callback invocation in this loop has stopped |
430 | * the current watcher. Also, filters out events that users has not |
431 | * requested us to watch. |
432 | */ |
433 | pe->events &= w->pevents | POLLERR | POLLHUP; |
434 | |
435 | /* Work around an epoll quirk where it sometimes reports just the |
436 | * EPOLLERR or EPOLLHUP event. In order to force the event loop to |
437 | * move forward, we merge in the read/write events that the watcher |
438 | * is interested in; uv__read() and uv__write() will then deal with |
439 | * the error or hangup in the usual fashion. |
440 | * |
441 | * Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user |
442 | * reads the available data, calls uv_read_stop(), then sometime later |
443 | * calls uv_read_start() again. By then, libuv has forgotten about the |
444 | * hangup and the kernel won't report EPOLLIN again because there's |
445 | * nothing left to read. If anything, libuv is to blame here. The |
446 | * current hack is just a quick bandaid; to properly fix it, libuv |
447 | * needs to remember the error/hangup event. We should get that for |
448 | * free when we switch over to edge-triggered I/O. |
449 | */ |
450 | if (pe->events == POLLERR || pe->events == POLLHUP) |
451 | pe->events |= |
452 | w->pevents & (POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI); |
453 | |
454 | if (pe->events != 0) { |
455 | /* Run signal watchers last. This also affects child process watchers |
456 | * because those are implemented in terms of signal watchers. |
457 | */ |
458 | if (w == &loop->signal_io_watcher) { |
459 | have_signals = 1; |
460 | } else { |
461 | uv__metrics_update_idle_time(loop); |
462 | w->cb(loop, w, pe->events); |
463 | } |
464 | |
465 | nevents++; |
466 | } |
467 | } |
468 | |
469 | if (reset_timeout != 0) { |
470 | timeout = user_timeout; |
471 | reset_timeout = 0; |
472 | } |
473 | |
474 | if (have_signals != 0) { |
475 | uv__metrics_update_idle_time(loop); |
476 | loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN); |
477 | } |
478 | |
479 | loop->watchers[loop->nwatchers] = NULL; |
480 | loop->watchers[loop->nwatchers + 1] = NULL; |
481 | |
482 | if (have_signals != 0) |
483 | return; /* Event loop should cycle now so don't poll again. */ |
484 | |
485 | if (nevents != 0) { |
486 | if (nfds == ARRAY_SIZE(events) && --count != 0) { |
487 | /* Poll for more events but don't block this time. */ |
488 | timeout = 0; |
489 | continue; |
490 | } |
491 | return; |
492 | } |
493 | |
494 | if (timeout == 0) |
495 | return; |
496 | |
497 | if (timeout == -1) |
498 | continue; |
499 | |
500 | update_timeout: |
501 | assert(timeout > 0); |
502 | |
503 | real_timeout -= (loop->time - base); |
504 | if (real_timeout <= 0) |
505 | return; |
506 | |
507 | timeout = real_timeout; |
508 | } |
509 | } |
510 | |
511 | |
512 | uint64_t uv__hrtime(uv_clocktype_t type) { |
513 | static clock_t fast_clock_id = -1; |
514 | struct timespec t; |
515 | clock_t clock_id; |
516 | |
517 | /* Prefer CLOCK_MONOTONIC_COARSE if available but only when it has |
518 | * millisecond granularity or better. CLOCK_MONOTONIC_COARSE is |
519 | * serviced entirely from the vDSO, whereas CLOCK_MONOTONIC may |
520 | * decide to make a costly system call. |
521 | */ |
522 | /* TODO(bnoordhuis) Use CLOCK_MONOTONIC_COARSE for UV_CLOCK_PRECISE |
523 | * when it has microsecond granularity or better (unlikely). |
524 | */ |
525 | clock_id = CLOCK_MONOTONIC; |
526 | if (type != UV_CLOCK_FAST) |
527 | goto done; |
528 | |
529 | clock_id = uv__load_relaxed(&fast_clock_id); |
530 | if (clock_id != -1) |
531 | goto done; |
532 | |
533 | clock_id = CLOCK_MONOTONIC; |
534 | if (0 == clock_getres(CLOCK_MONOTONIC_COARSE, &t)) |
535 | if (t.tv_nsec <= 1 * 1000 * 1000) |
536 | clock_id = CLOCK_MONOTONIC_COARSE; |
537 | |
538 | uv__store_relaxed(&fast_clock_id, clock_id); |
539 | |
540 | done: |
541 | |
542 | if (clock_gettime(clock_id, &t)) |
543 | return 0; /* Not really possible. */ |
544 | |
545 | return t.tv_sec * (uint64_t) 1e9 + t.tv_nsec; |
546 | } |
547 | |
548 | |
549 | int uv_resident_set_memory(size_t* ) { |
550 | char buf[1024]; |
551 | const char* s; |
552 | ssize_t n; |
553 | long val; |
554 | int fd; |
555 | int i; |
556 | |
557 | do |
558 | fd = open("/proc/self/stat" , O_RDONLY); |
559 | while (fd == -1 && errno == EINTR); |
560 | |
561 | if (fd == -1) |
562 | return UV__ERR(errno); |
563 | |
564 | do |
565 | n = read(fd, buf, sizeof(buf) - 1); |
566 | while (n == -1 && errno == EINTR); |
567 | |
568 | uv__close(fd); |
569 | if (n == -1) |
570 | return UV__ERR(errno); |
571 | buf[n] = '\0'; |
572 | |
573 | s = strchr(buf, ' '); |
574 | if (s == NULL) |
575 | goto err; |
576 | |
577 | s += 1; |
578 | if (*s != '(') |
579 | goto err; |
580 | |
581 | s = strchr(s, ')'); |
582 | if (s == NULL) |
583 | goto err; |
584 | |
585 | for (i = 1; i <= 22; i++) { |
586 | s = strchr(s + 1, ' '); |
587 | if (s == NULL) |
588 | goto err; |
589 | } |
590 | |
591 | errno = 0; |
592 | val = strtol(s, NULL, 10); |
593 | if (errno != 0) |
594 | goto err; |
595 | if (val < 0) |
596 | goto err; |
597 | |
598 | *rss = val * getpagesize(); |
599 | return 0; |
600 | |
601 | err: |
602 | return UV_EINVAL; |
603 | } |
604 | |
605 | static int uv__slurp(const char* filename, char* buf, size_t len) { |
606 | ssize_t n; |
607 | int fd; |
608 | |
609 | assert(len > 0); |
610 | |
611 | fd = uv__open_cloexec(filename, O_RDONLY); |
612 | if (fd < 0) |
613 | return fd; |
614 | |
615 | do |
616 | n = read(fd, buf, len - 1); |
617 | while (n == -1 && errno == EINTR); |
618 | |
619 | if (uv__close_nocheckstdio(fd)) |
620 | abort(); |
621 | |
622 | if (n < 0) |
623 | return UV__ERR(errno); |
624 | |
625 | buf[n] = '\0'; |
626 | |
627 | return 0; |
628 | } |
629 | |
630 | int uv_uptime(double* uptime) { |
631 | static volatile int no_clock_boottime; |
632 | char buf[128]; |
633 | struct timespec now; |
634 | int r; |
635 | |
636 | /* Try /proc/uptime first, then fallback to clock_gettime(). */ |
637 | |
638 | if (0 == uv__slurp("/proc/uptime" , buf, sizeof(buf))) |
639 | if (1 == sscanf(buf, "%lf" , uptime)) |
640 | return 0; |
641 | |
642 | /* Try CLOCK_BOOTTIME first, fall back to CLOCK_MONOTONIC if not available |
643 | * (pre-2.6.39 kernels). CLOCK_MONOTONIC doesn't increase when the system |
644 | * is suspended. |
645 | */ |
646 | if (no_clock_boottime) { |
647 | retry_clock_gettime: r = clock_gettime(CLOCK_MONOTONIC, &now); |
648 | } |
649 | else if ((r = clock_gettime(CLOCK_BOOTTIME, &now)) && errno == EINVAL) { |
650 | no_clock_boottime = 1; |
651 | goto retry_clock_gettime; |
652 | } |
653 | |
654 | if (r) |
655 | return UV__ERR(errno); |
656 | |
657 | *uptime = now.tv_sec; |
658 | return 0; |
659 | } |
660 | |
661 | |
662 | static int uv__cpu_num(FILE* statfile_fp, unsigned int* numcpus) { |
663 | unsigned int num; |
664 | char buf[1024]; |
665 | |
666 | if (!fgets(buf, sizeof(buf), statfile_fp)) |
667 | return UV_EIO; |
668 | |
669 | num = 0; |
670 | while (fgets(buf, sizeof(buf), statfile_fp)) { |
671 | if (strncmp(buf, "cpu" , 3)) |
672 | break; |
673 | num++; |
674 | } |
675 | |
676 | if (num == 0) |
677 | return UV_EIO; |
678 | |
679 | *numcpus = num; |
680 | return 0; |
681 | } |
682 | |
683 | |
684 | int uv_cpu_info(uv_cpu_info_t** cpu_infos, int* count) { |
685 | unsigned int numcpus; |
686 | uv_cpu_info_t* ci; |
687 | int err; |
688 | FILE* statfile_fp; |
689 | |
690 | *cpu_infos = NULL; |
691 | *count = 0; |
692 | |
693 | statfile_fp = uv__open_file("/proc/stat" ); |
694 | if (statfile_fp == NULL) |
695 | return UV__ERR(errno); |
696 | |
697 | err = uv__cpu_num(statfile_fp, &numcpus); |
698 | if (err < 0) |
699 | goto out; |
700 | |
701 | err = UV_ENOMEM; |
702 | ci = uv__calloc(numcpus, sizeof(*ci)); |
703 | if (ci == NULL) |
704 | goto out; |
705 | |
706 | err = read_models(numcpus, ci); |
707 | if (err == 0) |
708 | err = read_times(statfile_fp, numcpus, ci); |
709 | |
710 | if (err) { |
711 | uv_free_cpu_info(ci, numcpus); |
712 | goto out; |
713 | } |
714 | |
715 | /* read_models() on x86 also reads the CPU speed from /proc/cpuinfo. |
716 | * We don't check for errors here. Worst case, the field is left zero. |
717 | */ |
718 | if (ci[0].speed == 0) |
719 | read_speeds(numcpus, ci); |
720 | |
721 | *cpu_infos = ci; |
722 | *count = numcpus; |
723 | err = 0; |
724 | |
725 | out: |
726 | |
727 | if (fclose(statfile_fp)) |
728 | if (errno != EINTR && errno != EINPROGRESS) |
729 | abort(); |
730 | |
731 | return err; |
732 | } |
733 | |
734 | |
735 | static void read_speeds(unsigned int numcpus, uv_cpu_info_t* ci) { |
736 | unsigned int num; |
737 | |
738 | for (num = 0; num < numcpus; num++) |
739 | ci[num].speed = read_cpufreq(num) / 1000; |
740 | } |
741 | |
742 | |
743 | /* Also reads the CPU frequency on x86. The other architectures only have |
744 | * a BogoMIPS field, which may not be very accurate. |
745 | * |
746 | * Note: Simply returns on error, uv_cpu_info() takes care of the cleanup. |
747 | */ |
748 | static int read_models(unsigned int numcpus, uv_cpu_info_t* ci) { |
749 | static const char model_marker[] = "model name\t: " ; |
750 | static const char speed_marker[] = "cpu MHz\t\t: " ; |
751 | const char* inferred_model; |
752 | unsigned int model_idx; |
753 | unsigned int speed_idx; |
754 | char buf[1024]; |
755 | char* model; |
756 | FILE* fp; |
757 | |
758 | /* Most are unused on non-ARM, non-MIPS and non-x86 architectures. */ |
759 | (void) &model_marker; |
760 | (void) &speed_marker; |
761 | (void) &speed_idx; |
762 | (void) &model; |
763 | (void) &buf; |
764 | (void) &fp; |
765 | |
766 | model_idx = 0; |
767 | speed_idx = 0; |
768 | |
769 | #if defined(__arm__) || \ |
770 | defined(__i386__) || \ |
771 | defined(__mips__) || \ |
772 | defined(__x86_64__) |
773 | fp = uv__open_file("/proc/cpuinfo" ); |
774 | if (fp == NULL) |
775 | return UV__ERR(errno); |
776 | |
777 | while (fgets(buf, sizeof(buf), fp)) { |
778 | if (model_idx < numcpus) { |
779 | if (strncmp(buf, model_marker, sizeof(model_marker) - 1) == 0) { |
780 | model = buf + sizeof(model_marker) - 1; |
781 | model = uv__strndup(model, strlen(model) - 1); /* Strip newline. */ |
782 | if (model == NULL) { |
783 | fclose(fp); |
784 | return UV_ENOMEM; |
785 | } |
786 | ci[model_idx++].model = model; |
787 | continue; |
788 | } |
789 | } |
790 | #if defined(__arm__) || defined(__mips__) |
791 | if (model_idx < numcpus) { |
792 | #if defined(__arm__) |
793 | /* Fallback for pre-3.8 kernels. */ |
794 | static const char model_marker[] = "Processor\t: " ; |
795 | #else /* defined(__mips__) */ |
796 | static const char model_marker[] = "cpu model\t\t: " ; |
797 | #endif |
798 | if (strncmp(buf, model_marker, sizeof(model_marker) - 1) == 0) { |
799 | model = buf + sizeof(model_marker) - 1; |
800 | model = uv__strndup(model, strlen(model) - 1); /* Strip newline. */ |
801 | if (model == NULL) { |
802 | fclose(fp); |
803 | return UV_ENOMEM; |
804 | } |
805 | ci[model_idx++].model = model; |
806 | continue; |
807 | } |
808 | } |
809 | #else /* !__arm__ && !__mips__ */ |
810 | if (speed_idx < numcpus) { |
811 | if (strncmp(buf, speed_marker, sizeof(speed_marker) - 1) == 0) { |
812 | ci[speed_idx++].speed = atoi(buf + sizeof(speed_marker) - 1); |
813 | continue; |
814 | } |
815 | } |
816 | #endif /* __arm__ || __mips__ */ |
817 | } |
818 | |
819 | fclose(fp); |
820 | #endif /* __arm__ || __i386__ || __mips__ || __x86_64__ */ |
821 | |
822 | /* Now we want to make sure that all the models contain *something* because |
823 | * it's not safe to leave them as null. Copy the last entry unless there |
824 | * isn't one, in that case we simply put "unknown" into everything. |
825 | */ |
826 | inferred_model = "unknown" ; |
827 | if (model_idx > 0) |
828 | inferred_model = ci[model_idx - 1].model; |
829 | |
830 | while (model_idx < numcpus) { |
831 | model = uv__strndup(inferred_model, strlen(inferred_model)); |
832 | if (model == NULL) |
833 | return UV_ENOMEM; |
834 | ci[model_idx++].model = model; |
835 | } |
836 | |
837 | return 0; |
838 | } |
839 | |
840 | |
841 | static int read_times(FILE* statfile_fp, |
842 | unsigned int numcpus, |
843 | uv_cpu_info_t* ci) { |
844 | struct uv_cpu_times_s ts; |
845 | unsigned int ticks; |
846 | unsigned int multiplier; |
847 | uint64_t user; |
848 | uint64_t nice; |
849 | uint64_t sys; |
850 | uint64_t idle; |
851 | uint64_t dummy; |
852 | uint64_t irq; |
853 | uint64_t num; |
854 | uint64_t len; |
855 | char buf[1024]; |
856 | |
857 | ticks = (unsigned int)sysconf(_SC_CLK_TCK); |
858 | multiplier = ((uint64_t)1000L / ticks); |
859 | assert(ticks != (unsigned int) -1); |
860 | assert(ticks != 0); |
861 | |
862 | rewind(statfile_fp); |
863 | |
864 | if (!fgets(buf, sizeof(buf), statfile_fp)) |
865 | abort(); |
866 | |
867 | num = 0; |
868 | |
869 | while (fgets(buf, sizeof(buf), statfile_fp)) { |
870 | if (num >= numcpus) |
871 | break; |
872 | |
873 | if (strncmp(buf, "cpu" , 3)) |
874 | break; |
875 | |
876 | /* skip "cpu<num> " marker */ |
877 | { |
878 | unsigned int n; |
879 | int r = sscanf(buf, "cpu%u " , &n); |
880 | assert(r == 1); |
881 | (void) r; /* silence build warning */ |
882 | for (len = sizeof("cpu0" ); n /= 10; len++); |
883 | } |
884 | |
885 | /* Line contains user, nice, system, idle, iowait, irq, softirq, steal, |
886 | * guest, guest_nice but we're only interested in the first four + irq. |
887 | * |
888 | * Don't use %*s to skip fields or %ll to read straight into the uint64_t |
889 | * fields, they're not allowed in C89 mode. |
890 | */ |
891 | if (6 != sscanf(buf + len, |
892 | "%" PRIu64 " %" PRIu64 " %" PRIu64 |
893 | "%" PRIu64 " %" PRIu64 " %" PRIu64, |
894 | &user, |
895 | &nice, |
896 | &sys, |
897 | &idle, |
898 | &dummy, |
899 | &irq)) |
900 | abort(); |
901 | |
902 | ts.user = user * multiplier; |
903 | ts.nice = nice * multiplier; |
904 | ts.sys = sys * multiplier; |
905 | ts.idle = idle * multiplier; |
906 | ts.irq = irq * multiplier; |
907 | ci[num++].cpu_times = ts; |
908 | } |
909 | assert(num == numcpus); |
910 | |
911 | return 0; |
912 | } |
913 | |
914 | |
915 | static uint64_t read_cpufreq(unsigned int cpunum) { |
916 | uint64_t val; |
917 | char buf[1024]; |
918 | FILE* fp; |
919 | |
920 | snprintf(buf, |
921 | sizeof(buf), |
922 | "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq" , |
923 | cpunum); |
924 | |
925 | fp = uv__open_file(buf); |
926 | if (fp == NULL) |
927 | return 0; |
928 | |
929 | if (fscanf(fp, "%" PRIu64, &val) != 1) |
930 | val = 0; |
931 | |
932 | fclose(fp); |
933 | |
934 | return val; |
935 | } |
936 | |
937 | |
938 | static int uv__ifaddr_exclude(struct ifaddrs *ent, int exclude_type) { |
939 | if (!((ent->ifa_flags & IFF_UP) && (ent->ifa_flags & IFF_RUNNING))) |
940 | return 1; |
941 | if (ent->ifa_addr == NULL) |
942 | return 1; |
943 | /* |
944 | * On Linux getifaddrs returns information related to the raw underlying |
945 | * devices. We're not interested in this information yet. |
946 | */ |
947 | if (ent->ifa_addr->sa_family == PF_PACKET) |
948 | return exclude_type; |
949 | return !exclude_type; |
950 | } |
951 | |
952 | int uv_interface_addresses(uv_interface_address_t** addresses, int* count) { |
953 | #ifndef HAVE_IFADDRS_H |
954 | *count = 0; |
955 | *addresses = NULL; |
956 | return UV_ENOSYS; |
957 | #else |
958 | struct ifaddrs *addrs, *ent; |
959 | uv_interface_address_t* address; |
960 | int i; |
961 | struct sockaddr_ll *sll; |
962 | |
963 | *count = 0; |
964 | *addresses = NULL; |
965 | |
966 | if (getifaddrs(&addrs)) |
967 | return UV__ERR(errno); |
968 | |
969 | /* Count the number of interfaces */ |
970 | for (ent = addrs; ent != NULL; ent = ent->ifa_next) { |
971 | if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFADDR)) |
972 | continue; |
973 | |
974 | (*count)++; |
975 | } |
976 | |
977 | if (*count == 0) { |
978 | freeifaddrs(addrs); |
979 | return 0; |
980 | } |
981 | |
982 | /* Make sure the memory is initiallized to zero using calloc() */ |
983 | *addresses = uv__calloc(*count, sizeof(**addresses)); |
984 | if (!(*addresses)) { |
985 | freeifaddrs(addrs); |
986 | return UV_ENOMEM; |
987 | } |
988 | |
989 | address = *addresses; |
990 | |
991 | for (ent = addrs; ent != NULL; ent = ent->ifa_next) { |
992 | if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFADDR)) |
993 | continue; |
994 | |
995 | address->name = uv__strdup(ent->ifa_name); |
996 | |
997 | if (ent->ifa_addr->sa_family == AF_INET6) { |
998 | address->address.address6 = *((struct sockaddr_in6*) ent->ifa_addr); |
999 | } else { |
1000 | address->address.address4 = *((struct sockaddr_in*) ent->ifa_addr); |
1001 | } |
1002 | |
1003 | if (ent->ifa_netmask->sa_family == AF_INET6) { |
1004 | address->netmask.netmask6 = *((struct sockaddr_in6*) ent->ifa_netmask); |
1005 | } else { |
1006 | address->netmask.netmask4 = *((struct sockaddr_in*) ent->ifa_netmask); |
1007 | } |
1008 | |
1009 | address->is_internal = !!(ent->ifa_flags & IFF_LOOPBACK); |
1010 | |
1011 | address++; |
1012 | } |
1013 | |
1014 | /* Fill in physical addresses for each interface */ |
1015 | for (ent = addrs; ent != NULL; ent = ent->ifa_next) { |
1016 | if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFPHYS)) |
1017 | continue; |
1018 | |
1019 | address = *addresses; |
1020 | |
1021 | for (i = 0; i < (*count); i++) { |
1022 | size_t namelen = strlen(ent->ifa_name); |
1023 | /* Alias interface share the same physical address */ |
1024 | if (strncmp(address->name, ent->ifa_name, namelen) == 0 && |
1025 | (address->name[namelen] == 0 || address->name[namelen] == ':')) { |
1026 | sll = (struct sockaddr_ll*)ent->ifa_addr; |
1027 | memcpy(address->phys_addr, sll->sll_addr, sizeof(address->phys_addr)); |
1028 | } |
1029 | address++; |
1030 | } |
1031 | } |
1032 | |
1033 | freeifaddrs(addrs); |
1034 | |
1035 | return 0; |
1036 | #endif |
1037 | } |
1038 | |
1039 | |
1040 | void uv_free_interface_addresses(uv_interface_address_t* addresses, |
1041 | int count) { |
1042 | int i; |
1043 | |
1044 | for (i = 0; i < count; i++) { |
1045 | uv__free(addresses[i].name); |
1046 | } |
1047 | |
1048 | uv__free(addresses); |
1049 | } |
1050 | |
1051 | |
1052 | void uv__set_process_title(const char* title) { |
1053 | #if defined(PR_SET_NAME) |
1054 | prctl(PR_SET_NAME, title); /* Only copies first 16 characters. */ |
1055 | #endif |
1056 | } |
1057 | |
1058 | |
1059 | static uint64_t uv__read_proc_meminfo(const char* what) { |
1060 | uint64_t rc; |
1061 | char* p; |
1062 | char buf[4096]; /* Large enough to hold all of /proc/meminfo. */ |
1063 | |
1064 | if (uv__slurp("/proc/meminfo" , buf, sizeof(buf))) |
1065 | return 0; |
1066 | |
1067 | p = strstr(buf, what); |
1068 | |
1069 | if (p == NULL) |
1070 | return 0; |
1071 | |
1072 | p += strlen(what); |
1073 | |
1074 | rc = 0; |
1075 | sscanf(p, "%" PRIu64 " kB" , &rc); |
1076 | |
1077 | return rc * 1024; |
1078 | } |
1079 | |
1080 | |
1081 | uint64_t uv_get_free_memory(void) { |
1082 | struct sysinfo info; |
1083 | uint64_t rc; |
1084 | |
1085 | rc = uv__read_proc_meminfo("MemFree:" ); |
1086 | |
1087 | if (rc != 0) |
1088 | return rc; |
1089 | |
1090 | if (0 == sysinfo(&info)) |
1091 | return (uint64_t) info.freeram * info.mem_unit; |
1092 | |
1093 | return 0; |
1094 | } |
1095 | |
1096 | |
1097 | uint64_t uv_get_total_memory(void) { |
1098 | struct sysinfo info; |
1099 | uint64_t rc; |
1100 | |
1101 | rc = uv__read_proc_meminfo("MemTotal:" ); |
1102 | |
1103 | if (rc != 0) |
1104 | return rc; |
1105 | |
1106 | if (0 == sysinfo(&info)) |
1107 | return (uint64_t) info.totalram * info.mem_unit; |
1108 | |
1109 | return 0; |
1110 | } |
1111 | |
1112 | |
1113 | static uint64_t uv__read_cgroups_uint64(const char* cgroup, const char* param) { |
1114 | char filename[256]; |
1115 | char buf[32]; /* Large enough to hold an encoded uint64_t. */ |
1116 | uint64_t rc; |
1117 | |
1118 | rc = 0; |
1119 | snprintf(filename, sizeof(filename), "/sys/fs/cgroup/%s/%s" , cgroup, param); |
1120 | if (0 == uv__slurp(filename, buf, sizeof(buf))) |
1121 | sscanf(buf, "%" PRIu64, &rc); |
1122 | |
1123 | return rc; |
1124 | } |
1125 | |
1126 | |
1127 | uint64_t uv_get_constrained_memory(void) { |
1128 | /* |
1129 | * This might return 0 if there was a problem getting the memory limit from |
1130 | * cgroups. This is OK because a return value of 0 signifies that the memory |
1131 | * limit is unknown. |
1132 | */ |
1133 | return uv__read_cgroups_uint64("memory" , "memory.limit_in_bytes" ); |
1134 | } |
1135 | |
1136 | |
1137 | void uv_loadavg(double avg[3]) { |
1138 | struct sysinfo info; |
1139 | char buf[128]; /* Large enough to hold all of /proc/loadavg. */ |
1140 | |
1141 | if (0 == uv__slurp("/proc/loadavg" , buf, sizeof(buf))) |
1142 | if (3 == sscanf(buf, "%lf %lf %lf" , &avg[0], &avg[1], &avg[2])) |
1143 | return; |
1144 | |
1145 | if (sysinfo(&info) < 0) |
1146 | return; |
1147 | |
1148 | avg[0] = (double) info.loads[0] / 65536.0; |
1149 | avg[1] = (double) info.loads[1] / 65536.0; |
1150 | avg[2] = (double) info.loads[2] / 65536.0; |
1151 | } |
1152 | |