1/*
2 * OS includes and handling of OS dependencies
3 *
4 * This header exists to pull in some common system headers that
5 * most code in QEMU will want, and to fix up some possible issues with
6 * it (missing defines, Windows weirdness, and so on).
7 *
8 * To avoid getting into possible circular include dependencies, this
9 * file should not include any other QEMU headers, with the exceptions
10 * of config-host.h, config-target.h, qemu/compiler.h,
11 * sysemu/os-posix.h, sysemu/os-win32.h, glib-compat.h and
12 * qemu/typedefs.h, all of which are doing a similar job to this file
13 * and are under similar constraints.
14 *
15 * This header also contains prototypes for functions defined in
16 * os-*.c and util/oslib-*.c; those would probably be better split
17 * out into separate header files.
18 *
19 * In an ideal world this header would contain only:
20 * (1) things which everybody needs
21 * (2) things without which code would work on most platforms but
22 * fail to compile or misbehave on a minority of host OSes
23 *
24 * This work is licensed under the terms of the GNU GPL, version 2 or later.
25 * See the COPYING file in the top-level directory.
26 */
27#ifndef QEMU_OSDEP_H
28#define QEMU_OSDEP_H
29
30#include "config-host.h"
31#ifdef NEED_CPU_H
32#include "config-target.h"
33#else
34#include "exec/poison.h"
35#endif
36#ifdef __COVERITY__
37/* Coverity does not like the new _Float* types that are used by
38 * recent glibc, and croaks on every single file that includes
39 * stdlib.h. These typedefs are enough to please it.
40 *
41 * Note that these fix parse errors so they cannot be placed in
42 * scripts/coverity-model.c.
43 */
44typedef float _Float32;
45typedef double _Float32x;
46typedef double _Float64;
47typedef __float80 _Float64x;
48typedef __float128 _Float128;
49#endif
50
51#include "qemu/compiler.h"
52
53/* Older versions of C++ don't get definitions of various macros from
54 * stdlib.h unless we define these macros before first inclusion of
55 * that system header.
56 */
57#ifndef __STDC_CONSTANT_MACROS
58#define __STDC_CONSTANT_MACROS
59#endif
60#ifndef __STDC_LIMIT_MACROS
61#define __STDC_LIMIT_MACROS
62#endif
63#ifndef __STDC_FORMAT_MACROS
64#define __STDC_FORMAT_MACROS
65#endif
66
67/* The following block of code temporarily renames the daemon() function so the
68 * compiler does not see the warning associated with it in stdlib.h on OSX
69 */
70#ifdef __APPLE__
71#define daemon qemu_fake_daemon_function
72#include <stdlib.h>
73#undef daemon
74extern int daemon(int, int);
75#endif
76
77#ifdef _WIN32
78/* as defined in sdkddkver.h */
79#ifndef _WIN32_WINNT
80#define _WIN32_WINNT 0x0600 /* Vista */
81#endif
82/* reduces the number of implicitly included headers */
83#ifndef WIN32_LEAN_AND_MEAN
84#define WIN32_LEAN_AND_MEAN
85#endif
86#endif
87
88/* enable C99/POSIX format strings (needs mingw32-runtime 3.15 or later) */
89#ifdef __MINGW32__
90#define __USE_MINGW_ANSI_STDIO 1
91#endif
92
93#include <stdarg.h>
94#include <stddef.h>
95#include <stdbool.h>
96#include <stdint.h>
97#include <sys/types.h>
98#include <stdlib.h>
99#include <stdio.h>
100
101#include <string.h>
102#include <strings.h>
103#include <inttypes.h>
104#include <limits.h>
105/* Put unistd.h before time.h as that triggers localtime_r/gmtime_r
106 * function availability on recentish Mingw-w64 platforms. */
107#include <unistd.h>
108#include <time.h>
109#include <ctype.h>
110#include <errno.h>
111#include <fcntl.h>
112#include <getopt.h>
113#include <sys/stat.h>
114#include <sys/time.h>
115#include <assert.h>
116/* setjmp must be declared before sysemu/os-win32.h
117 * because it is redefined there. */
118#include <setjmp.h>
119#include <signal.h>
120
121#ifdef __OpenBSD__
122#include <sys/signal.h>
123#endif
124
125#ifndef _WIN32
126#include <sys/wait.h>
127#else
128#define WIFEXITED(x) 1
129#define WEXITSTATUS(x) (x)
130#endif
131
132#ifdef _WIN32
133#include "sysemu/os-win32.h"
134#endif
135
136#ifdef CONFIG_POSIX
137#include "sysemu/os-posix.h"
138#endif
139
140#include "glib-compat.h"
141#include "qemu/typedefs.h"
142
143/*
144 * For mingw, as of v6.0.0, the function implementing the assert macro is
145 * not marked as noreturn, so the compiler cannot delete code following an
146 * assert(false) as unused. We rely on this within the code base to delete
147 * code that is unreachable when features are disabled.
148 * All supported versions of Glib's g_assert() satisfy this requirement.
149 */
150#ifdef __MINGW32__
151#undef assert
152#define assert(x) g_assert(x)
153#endif
154
155/*
156 * According to waitpid man page:
157 * WCOREDUMP
158 * This macro is not specified in POSIX.1-2001 and is not
159 * available on some UNIX implementations (e.g., AIX, SunOS).
160 * Therefore, enclose its use inside #ifdef WCOREDUMP ... #endif.
161 */
162#ifndef WCOREDUMP
163#define WCOREDUMP(status) 0
164#endif
165/*
166 * We have a lot of unaudited code that may fail in strange ways, or
167 * even be a security risk during migration, if you disable assertions
168 * at compile-time. You may comment out these safety checks if you
169 * absolutely want to disable assertion overhead, but it is not
170 * supported upstream so the risk is all yours. Meanwhile, please
171 * submit patches to remove any side-effects inside an assertion, or
172 * fixing error handling that should use Error instead of assert.
173 */
174#ifdef NDEBUG
175#error building with NDEBUG is not supported
176#endif
177#ifdef G_DISABLE_ASSERT
178#error building with G_DISABLE_ASSERT is not supported
179#endif
180
181#ifndef O_LARGEFILE
182#define O_LARGEFILE 0
183#endif
184#ifndef O_BINARY
185#define O_BINARY 0
186#endif
187#ifndef MAP_ANONYMOUS
188#define MAP_ANONYMOUS MAP_ANON
189#endif
190#ifndef ENOMEDIUM
191#define ENOMEDIUM ENODEV
192#endif
193#if !defined(ENOTSUP)
194#define ENOTSUP 4096
195#endif
196#if !defined(ECANCELED)
197#define ECANCELED 4097
198#endif
199#if !defined(EMEDIUMTYPE)
200#define EMEDIUMTYPE 4098
201#endif
202#if !defined(ESHUTDOWN)
203#define ESHUTDOWN 4099
204#endif
205
206/* time_t may be either 32 or 64 bits depending on the host OS, and
207 * can be either signed or unsigned, so we can't just hardcode a
208 * specific maximum value. This is not a C preprocessor constant,
209 * so you can't use TIME_MAX in an #ifdef, but for our purposes
210 * this isn't a problem.
211 */
212
213/* The macros TYPE_SIGNED, TYPE_WIDTH, and TYPE_MAXIMUM are from
214 * Gnulib, and are under the LGPL v2.1 or (at your option) any
215 * later version.
216 */
217
218/* True if the real type T is signed. */
219#define TYPE_SIGNED(t) (!((t)0 < (t)-1))
220
221/* The width in bits of the integer type or expression T.
222 * Padding bits are not supported.
223 */
224#define TYPE_WIDTH(t) (sizeof(t) * CHAR_BIT)
225
226/* The maximum and minimum values for the integer type T. */
227#define TYPE_MAXIMUM(t) \
228 ((t) (!TYPE_SIGNED(t) \
229 ? (t)-1 \
230 : ((((t)1 << (TYPE_WIDTH(t) - 2)) - 1) * 2 + 1)))
231
232#ifndef TIME_MAX
233#define TIME_MAX TYPE_MAXIMUM(time_t)
234#endif
235
236/* HOST_LONG_BITS is the size of a native pointer in bits. */
237#if UINTPTR_MAX == UINT32_MAX
238# define HOST_LONG_BITS 32
239#elif UINTPTR_MAX == UINT64_MAX
240# define HOST_LONG_BITS 64
241#else
242# error Unknown pointer size
243#endif
244
245/* Mac OSX has a <stdint.h> bug that incorrectly defines SIZE_MAX with
246 * the wrong type. Our replacement isn't usable in preprocessor
247 * expressions, but it is sufficient for our needs. */
248#if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX
249#undef SIZE_MAX
250#define SIZE_MAX ((size_t)-1)
251#endif
252
253#ifndef MIN
254#define MIN(a, b) (((a) < (b)) ? (a) : (b))
255#endif
256#ifndef MAX
257#define MAX(a, b) (((a) > (b)) ? (a) : (b))
258#endif
259
260/* Minimum function that returns zero only iff both values are zero.
261 * Intended for use with unsigned values only. */
262#ifndef MIN_NON_ZERO
263#define MIN_NON_ZERO(a, b) ((a) == 0 ? (b) : \
264 ((b) == 0 ? (a) : (MIN(a, b))))
265#endif
266
267/* Round number down to multiple */
268#define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m))
269
270/* Round number up to multiple. Safe when m is not a power of 2 (see
271 * ROUND_UP for a faster version when a power of 2 is guaranteed) */
272#define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m))
273
274/* Check if n is a multiple of m */
275#define QEMU_IS_ALIGNED(n, m) (((n) % (m)) == 0)
276
277/* n-byte align pointer down */
278#define QEMU_ALIGN_PTR_DOWN(p, n) \
279 ((typeof(p))QEMU_ALIGN_DOWN((uintptr_t)(p), (n)))
280
281/* n-byte align pointer up */
282#define QEMU_ALIGN_PTR_UP(p, n) \
283 ((typeof(p))QEMU_ALIGN_UP((uintptr_t)(p), (n)))
284
285/* Check if pointer p is n-bytes aligned */
286#define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n))
287
288/* Round number up to multiple. Requires that d be a power of 2 (see
289 * QEMU_ALIGN_UP for a safer but slower version on arbitrary
290 * numbers); works even if d is a smaller type than n. */
291#ifndef ROUND_UP
292#define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d)))
293#endif
294
295#ifndef DIV_ROUND_UP
296#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
297#endif
298
299/*
300 * &(x)[0] is always a pointer - if it's same type as x then the argument is a
301 * pointer, not an array.
302 */
303#define QEMU_IS_ARRAY(x) (!__builtin_types_compatible_p(typeof(x), \
304 typeof(&(x)[0])))
305#ifndef ARRAY_SIZE
306#define ARRAY_SIZE(x) ((sizeof(x) / sizeof((x)[0])) + \
307 QEMU_BUILD_BUG_ON_ZERO(!QEMU_IS_ARRAY(x)))
308#endif
309
310int qemu_daemon(int nochdir, int noclose);
311void *qemu_try_memalign(size_t alignment, size_t size);
312void *qemu_memalign(size_t alignment, size_t size);
313void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared);
314void qemu_vfree(void *ptr);
315void qemu_anon_ram_free(void *ptr, size_t size);
316
317#define QEMU_MADV_INVALID -1
318
319#if defined(CONFIG_MADVISE)
320
321#define QEMU_MADV_WILLNEED MADV_WILLNEED
322#define QEMU_MADV_DONTNEED MADV_DONTNEED
323#ifdef MADV_DONTFORK
324#define QEMU_MADV_DONTFORK MADV_DONTFORK
325#else
326#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
327#endif
328#ifdef MADV_MERGEABLE
329#define QEMU_MADV_MERGEABLE MADV_MERGEABLE
330#else
331#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
332#endif
333#ifdef MADV_UNMERGEABLE
334#define QEMU_MADV_UNMERGEABLE MADV_UNMERGEABLE
335#else
336#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
337#endif
338#ifdef MADV_DODUMP
339#define QEMU_MADV_DODUMP MADV_DODUMP
340#else
341#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
342#endif
343#ifdef MADV_DONTDUMP
344#define QEMU_MADV_DONTDUMP MADV_DONTDUMP
345#else
346#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
347#endif
348#ifdef MADV_HUGEPAGE
349#define QEMU_MADV_HUGEPAGE MADV_HUGEPAGE
350#else
351#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
352#endif
353#ifdef MADV_NOHUGEPAGE
354#define QEMU_MADV_NOHUGEPAGE MADV_NOHUGEPAGE
355#else
356#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
357#endif
358#ifdef MADV_REMOVE
359#define QEMU_MADV_REMOVE MADV_REMOVE
360#else
361#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
362#endif
363
364#elif defined(CONFIG_POSIX_MADVISE)
365
366#define QEMU_MADV_WILLNEED POSIX_MADV_WILLNEED
367#define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED
368#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
369#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
370#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
371#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
372#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
373#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
374#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
375#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
376
377#else /* no-op */
378
379#define QEMU_MADV_WILLNEED QEMU_MADV_INVALID
380#define QEMU_MADV_DONTNEED QEMU_MADV_INVALID
381#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
382#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
383#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
384#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
385#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
386#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
387#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
388#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
389
390#endif
391
392#ifdef _WIN32
393#define HAVE_CHARDEV_SERIAL 1
394#elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \
395 || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
396 || defined(__GLIBC__)
397#define HAVE_CHARDEV_SERIAL 1
398#endif
399
400#if defined(__linux__) || defined(__FreeBSD__) || \
401 defined(__FreeBSD_kernel__) || defined(__DragonFly__)
402#define HAVE_CHARDEV_PARPORT 1
403#endif
404
405#if defined(CONFIG_LINUX)
406#ifndef BUS_MCEERR_AR
407#define BUS_MCEERR_AR 4
408#endif
409#ifndef BUS_MCEERR_AO
410#define BUS_MCEERR_AO 5
411#endif
412#endif
413
414#if defined(__linux__) && \
415 (defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) \
416 || defined(__powerpc64__))
417 /* Use 2 MiB alignment so transparent hugepages can be used by KVM.
418 Valgrind does not support alignments larger than 1 MiB,
419 therefore we need special code which handles running on Valgrind. */
420# define QEMU_VMALLOC_ALIGN (512 * 4096)
421#elif defined(__linux__) && defined(__s390x__)
422 /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
423# define QEMU_VMALLOC_ALIGN (256 * 4096)
424#elif defined(__linux__) && defined(__sparc__)
425#include <sys/shm.h>
426# define QEMU_VMALLOC_ALIGN MAX(getpagesize(), SHMLBA)
427#else
428# define QEMU_VMALLOC_ALIGN getpagesize()
429#endif
430
431#ifdef CONFIG_POSIX
432struct qemu_signalfd_siginfo {
433 uint32_t ssi_signo; /* Signal number */
434 int32_t ssi_errno; /* Error number (unused) */
435 int32_t ssi_code; /* Signal code */
436 uint32_t ssi_pid; /* PID of sender */
437 uint32_t ssi_uid; /* Real UID of sender */
438 int32_t ssi_fd; /* File descriptor (SIGIO) */
439 uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */
440 uint32_t ssi_band; /* Band event (SIGIO) */
441 uint32_t ssi_overrun; /* POSIX timer overrun count */
442 uint32_t ssi_trapno; /* Trap number that caused signal */
443 int32_t ssi_status; /* Exit status or signal (SIGCHLD) */
444 int32_t ssi_int; /* Integer sent by sigqueue(2) */
445 uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */
446 uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */
447 uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */
448 uint64_t ssi_addr; /* Address that generated signal
449 (for hardware-generated signals) */
450 uint8_t pad[48]; /* Pad size to 128 bytes (allow for
451 additional fields in the future) */
452};
453
454int qemu_signalfd(const sigset_t *mask);
455void sigaction_invoke(struct sigaction *action,
456 struct qemu_signalfd_siginfo *info);
457#endif
458
459int qemu_madvise(void *addr, size_t len, int advice);
460int qemu_mprotect_rwx(void *addr, size_t size);
461int qemu_mprotect_none(void *addr, size_t size);
462
463int qemu_open(const char *name, int flags, ...);
464int qemu_close(int fd);
465#ifndef _WIN32
466int qemu_dup(int fd);
467#endif
468int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive);
469int qemu_unlock_fd(int fd, int64_t start, int64_t len);
470int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive);
471bool qemu_has_ofd_lock(void);
472
473#if defined(__HAIKU__) && defined(__i386__)
474#define FMT_pid "%ld"
475#elif defined(WIN64)
476#define FMT_pid "%" PRId64
477#else
478#define FMT_pid "%d"
479#endif
480
481bool qemu_write_pidfile(const char *pidfile, Error **errp);
482
483int qemu_get_thread_id(void);
484
485#ifndef CONFIG_IOVEC
486struct iovec {
487 void *iov_base;
488 size_t iov_len;
489};
490/*
491 * Use the same value as Linux for now.
492 */
493#define IOV_MAX 1024
494
495ssize_t readv(int fd, const struct iovec *iov, int iov_cnt);
496ssize_t writev(int fd, const struct iovec *iov, int iov_cnt);
497#else
498#include <sys/uio.h>
499#endif
500
501#ifdef _WIN32
502static inline void qemu_timersub(const struct timeval *val1,
503 const struct timeval *val2,
504 struct timeval *res)
505{
506 res->tv_sec = val1->tv_sec - val2->tv_sec;
507 if (val1->tv_usec < val2->tv_usec) {
508 res->tv_sec--;
509 res->tv_usec = val1->tv_usec - val2->tv_usec + 1000 * 1000;
510 } else {
511 res->tv_usec = val1->tv_usec - val2->tv_usec;
512 }
513}
514#else
515#define qemu_timersub timersub
516#endif
517
518void qemu_set_cloexec(int fd);
519
520/* Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default
521 * instead of QEMU_VERSION, so setting hw_version on MachineClass
522 * is no longer mandatory.
523 *
524 * Do NOT change this string, or it will break compatibility on all
525 * machine classes that don't set hw_version.
526 */
527#define QEMU_HW_VERSION "2.5+"
528
529/* QEMU "hardware version" setting. Used to replace code that exposed
530 * QEMU_VERSION to guests in the past and need to keep compatibility.
531 * Do not use qemu_hw_version() in new code.
532 */
533void qemu_set_hw_version(const char *);
534const char *qemu_hw_version(void);
535
536void fips_set_state(bool requested);
537bool fips_get_state(void);
538
539/* Return a dynamically allocated pathname denoting a file or directory that is
540 * appropriate for storing local state.
541 *
542 * @relative_pathname need not start with a directory separator; one will be
543 * added automatically.
544 *
545 * The caller is responsible for releasing the value returned with g_free()
546 * after use.
547 */
548char *qemu_get_local_state_pathname(const char *relative_pathname);
549
550/* Find program directory, and save it for later usage with
551 * qemu_get_exec_dir().
552 * Try OS specific API first, if not working, parse from argv0. */
553void qemu_init_exec_dir(const char *argv0);
554
555/* Get the saved exec dir.
556 * Caller needs to release the returned string by g_free() */
557char *qemu_get_exec_dir(void);
558
559/**
560 * qemu_getauxval:
561 * @type: the auxiliary vector key to lookup
562 *
563 * Search the auxiliary vector for @type, returning the value
564 * or 0 if @type is not present.
565 */
566unsigned long qemu_getauxval(unsigned long type);
567
568void qemu_set_tty_echo(int fd, bool echo);
569
570void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus,
571 Error **errp);
572
573/**
574 * qemu_get_pmem_size:
575 * @filename: path to a pmem file
576 * @errp: pointer to a NULL-initialized error object
577 *
578 * Determine the size of a persistent memory file. Besides supporting files on
579 * DAX file systems, this function also supports Linux devdax character
580 * devices.
581 *
582 * Returns: the size or 0 on failure
583 */
584uint64_t qemu_get_pmem_size(const char *filename, Error **errp);
585
586/**
587 * qemu_get_pid_name:
588 * @pid: pid of a process
589 *
590 * For given @pid fetch its name. Caller is responsible for
591 * freeing the string when no longer needed.
592 * Returns allocated string on success, NULL on failure.
593 */
594char *qemu_get_pid_name(pid_t pid);
595
596/**
597 * qemu_fork:
598 *
599 * A version of fork that avoids signal handler race
600 * conditions that can lead to child process getting
601 * signals that are otherwise only expected by the
602 * parent. It also resets all signal handlers to the
603 * default settings.
604 *
605 * Returns 0 to child process, pid number to parent
606 * or -1 on failure.
607 */
608pid_t qemu_fork(Error **errp);
609
610/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even
611 * when intptr_t is 32-bit and we are aligning a long long.
612 */
613extern uintptr_t qemu_real_host_page_size;
614extern intptr_t qemu_real_host_page_mask;
615
616extern int qemu_icache_linesize;
617extern int qemu_icache_linesize_log;
618extern int qemu_dcache_linesize;
619extern int qemu_dcache_linesize_log;
620
621/*
622 * After using getopt or getopt_long, if you need to parse another set
623 * of options, then you must reset optind. Unfortunately the way to
624 * do this varies between implementations of getopt.
625 */
626static inline void qemu_reset_optind(void)
627{
628#ifdef HAVE_OPTRESET
629 optind = 1;
630 optreset = 1;
631#else
632 optind = 0;
633#endif
634}
635
636#endif
637