1 | /* |
2 | * QEMU low level functions |
3 | * |
4 | * Copyright (c) 2003 Fabrice Bellard |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | * of this software and associated documentation files (the "Software"), to deal |
8 | * in the Software without restriction, including without limitation the rights |
9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | * copies of the Software, and to permit persons to whom the Software is |
11 | * furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be included in |
14 | * all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
22 | * THE SOFTWARE. |
23 | */ |
24 | #include "qemu/osdep.h" |
25 | |
26 | /* Needed early for CONFIG_BSD etc. */ |
27 | |
28 | #ifdef CONFIG_SOLARIS |
29 | #include <sys/statvfs.h> |
30 | /* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for |
31 | discussion about Solaris header problems */ |
32 | extern int madvise(char *, size_t, int); |
33 | #endif |
34 | |
35 | #include "qemu-common.h" |
36 | #include "qemu/cutils.h" |
37 | #include "qemu/sockets.h" |
38 | #include "qemu/error-report.h" |
39 | #include "monitor/monitor.h" |
40 | |
41 | static bool fips_enabled = false; |
42 | |
43 | static const char *hw_version = QEMU_HW_VERSION; |
44 | |
45 | int socket_set_cork(int fd, int v) |
46 | { |
47 | #if defined(SOL_TCP) && defined(TCP_CORK) |
48 | return qemu_setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v)); |
49 | #else |
50 | return 0; |
51 | #endif |
52 | } |
53 | |
54 | int socket_set_nodelay(int fd) |
55 | { |
56 | int v = 1; |
57 | return qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); |
58 | } |
59 | |
60 | int qemu_madvise(void *addr, size_t len, int advice) |
61 | { |
62 | if (advice == QEMU_MADV_INVALID) { |
63 | errno = EINVAL; |
64 | return -1; |
65 | } |
66 | #if defined(CONFIG_MADVISE) |
67 | return madvise(addr, len, advice); |
68 | #elif defined(CONFIG_POSIX_MADVISE) |
69 | return posix_madvise(addr, len, advice); |
70 | #else |
71 | errno = EINVAL; |
72 | return -1; |
73 | #endif |
74 | } |
75 | |
76 | static int qemu_mprotect__osdep(void *addr, size_t size, int prot) |
77 | { |
78 | g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask)); |
79 | g_assert(!(size & ~qemu_real_host_page_mask)); |
80 | |
81 | #ifdef _WIN32 |
82 | DWORD old_protect; |
83 | |
84 | if (!VirtualProtect(addr, size, prot, &old_protect)) { |
85 | error_report("%s: VirtualProtect failed with error code %ld" , |
86 | __func__, GetLastError()); |
87 | return -1; |
88 | } |
89 | return 0; |
90 | #else |
91 | if (mprotect(addr, size, prot)) { |
92 | error_report("%s: mprotect failed: %s" , __func__, strerror(errno)); |
93 | return -1; |
94 | } |
95 | return 0; |
96 | #endif |
97 | } |
98 | |
99 | int qemu_mprotect_rwx(void *addr, size_t size) |
100 | { |
101 | #ifdef _WIN32 |
102 | return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE); |
103 | #else |
104 | return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC); |
105 | #endif |
106 | } |
107 | |
108 | int qemu_mprotect_none(void *addr, size_t size) |
109 | { |
110 | #ifdef _WIN32 |
111 | return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS); |
112 | #else |
113 | return qemu_mprotect__osdep(addr, size, PROT_NONE); |
114 | #endif |
115 | } |
116 | |
117 | #ifndef _WIN32 |
118 | |
119 | static int fcntl_op_setlk = -1; |
120 | static int fcntl_op_getlk = -1; |
121 | |
122 | /* |
123 | * Dups an fd and sets the flags |
124 | */ |
125 | static int qemu_dup_flags(int fd, int flags) |
126 | { |
127 | int ret; |
128 | int serrno; |
129 | int dup_flags; |
130 | |
131 | ret = qemu_dup(fd); |
132 | if (ret == -1) { |
133 | goto fail; |
134 | } |
135 | |
136 | dup_flags = fcntl(ret, F_GETFL); |
137 | if (dup_flags == -1) { |
138 | goto fail; |
139 | } |
140 | |
141 | if ((flags & O_SYNC) != (dup_flags & O_SYNC)) { |
142 | errno = EINVAL; |
143 | goto fail; |
144 | } |
145 | |
146 | /* Set/unset flags that we can with fcntl */ |
147 | if (fcntl(ret, F_SETFL, flags) == -1) { |
148 | goto fail; |
149 | } |
150 | |
151 | /* Truncate the file in the cases that open() would truncate it */ |
152 | if (flags & O_TRUNC || |
153 | ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) { |
154 | if (ftruncate(ret, 0) == -1) { |
155 | goto fail; |
156 | } |
157 | } |
158 | |
159 | return ret; |
160 | |
161 | fail: |
162 | serrno = errno; |
163 | if (ret != -1) { |
164 | close(ret); |
165 | } |
166 | errno = serrno; |
167 | return -1; |
168 | } |
169 | |
170 | int qemu_dup(int fd) |
171 | { |
172 | int ret; |
173 | #ifdef F_DUPFD_CLOEXEC |
174 | ret = fcntl(fd, F_DUPFD_CLOEXEC, 0); |
175 | #else |
176 | ret = dup(fd); |
177 | if (ret != -1) { |
178 | qemu_set_cloexec(ret); |
179 | } |
180 | #endif |
181 | return ret; |
182 | } |
183 | |
184 | static int qemu_parse_fdset(const char *param) |
185 | { |
186 | return qemu_parse_fd(param); |
187 | } |
188 | |
189 | static void qemu_probe_lock_ops(void) |
190 | { |
191 | if (fcntl_op_setlk == -1) { |
192 | #ifdef F_OFD_SETLK |
193 | int fd; |
194 | int ret; |
195 | struct flock fl = { |
196 | .l_whence = SEEK_SET, |
197 | .l_start = 0, |
198 | .l_len = 0, |
199 | .l_type = F_WRLCK, |
200 | }; |
201 | |
202 | fd = open("/dev/null" , O_RDWR); |
203 | if (fd < 0) { |
204 | fprintf(stderr, |
205 | "Failed to open /dev/null for OFD lock probing: %s\n" , |
206 | strerror(errno)); |
207 | fcntl_op_setlk = F_SETLK; |
208 | fcntl_op_getlk = F_GETLK; |
209 | return; |
210 | } |
211 | ret = fcntl(fd, F_OFD_GETLK, &fl); |
212 | close(fd); |
213 | if (!ret) { |
214 | fcntl_op_setlk = F_OFD_SETLK; |
215 | fcntl_op_getlk = F_OFD_GETLK; |
216 | } else { |
217 | fcntl_op_setlk = F_SETLK; |
218 | fcntl_op_getlk = F_GETLK; |
219 | } |
220 | #else |
221 | fcntl_op_setlk = F_SETLK; |
222 | fcntl_op_getlk = F_GETLK; |
223 | #endif |
224 | } |
225 | } |
226 | |
227 | bool qemu_has_ofd_lock(void) |
228 | { |
229 | qemu_probe_lock_ops(); |
230 | #ifdef F_OFD_SETLK |
231 | return fcntl_op_setlk == F_OFD_SETLK; |
232 | #else |
233 | return false; |
234 | #endif |
235 | } |
236 | |
237 | static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type) |
238 | { |
239 | int ret; |
240 | struct flock fl = { |
241 | .l_whence = SEEK_SET, |
242 | .l_start = start, |
243 | .l_len = len, |
244 | .l_type = fl_type, |
245 | }; |
246 | qemu_probe_lock_ops(); |
247 | do { |
248 | ret = fcntl(fd, fcntl_op_setlk, &fl); |
249 | } while (ret == -1 && errno == EINTR); |
250 | return ret == -1 ? -errno : 0; |
251 | } |
252 | |
253 | int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive) |
254 | { |
255 | return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK); |
256 | } |
257 | |
258 | int qemu_unlock_fd(int fd, int64_t start, int64_t len) |
259 | { |
260 | return qemu_lock_fcntl(fd, start, len, F_UNLCK); |
261 | } |
262 | |
263 | int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive) |
264 | { |
265 | int ret; |
266 | struct flock fl = { |
267 | .l_whence = SEEK_SET, |
268 | .l_start = start, |
269 | .l_len = len, |
270 | .l_type = exclusive ? F_WRLCK : F_RDLCK, |
271 | }; |
272 | qemu_probe_lock_ops(); |
273 | ret = fcntl(fd, fcntl_op_getlk, &fl); |
274 | if (ret == -1) { |
275 | return -errno; |
276 | } else { |
277 | return fl.l_type == F_UNLCK ? 0 : -EAGAIN; |
278 | } |
279 | } |
280 | #endif |
281 | |
282 | /* |
283 | * Opens a file with FD_CLOEXEC set |
284 | */ |
285 | int qemu_open(const char *name, int flags, ...) |
286 | { |
287 | int ret; |
288 | int mode = 0; |
289 | |
290 | #ifndef _WIN32 |
291 | const char *fdset_id_str; |
292 | |
293 | /* Attempt dup of fd from fd set */ |
294 | if (strstart(name, "/dev/fdset/" , &fdset_id_str)) { |
295 | int64_t fdset_id; |
296 | int fd, dupfd; |
297 | |
298 | fdset_id = qemu_parse_fdset(fdset_id_str); |
299 | if (fdset_id == -1) { |
300 | errno = EINVAL; |
301 | return -1; |
302 | } |
303 | |
304 | fd = monitor_fdset_get_fd(fdset_id, flags); |
305 | if (fd < 0) { |
306 | errno = -fd; |
307 | return -1; |
308 | } |
309 | |
310 | dupfd = qemu_dup_flags(fd, flags); |
311 | if (dupfd == -1) { |
312 | return -1; |
313 | } |
314 | |
315 | ret = monitor_fdset_dup_fd_add(fdset_id, dupfd); |
316 | if (ret == -1) { |
317 | close(dupfd); |
318 | errno = EINVAL; |
319 | return -1; |
320 | } |
321 | |
322 | return dupfd; |
323 | } |
324 | #endif |
325 | |
326 | if (flags & O_CREAT) { |
327 | va_list ap; |
328 | |
329 | va_start(ap, flags); |
330 | mode = va_arg(ap, int); |
331 | va_end(ap); |
332 | } |
333 | |
334 | #ifdef O_CLOEXEC |
335 | ret = open(name, flags | O_CLOEXEC, mode); |
336 | #else |
337 | ret = open(name, flags, mode); |
338 | if (ret >= 0) { |
339 | qemu_set_cloexec(ret); |
340 | } |
341 | #endif |
342 | |
343 | #ifdef O_DIRECT |
344 | if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) { |
345 | error_report("file system may not support O_DIRECT" ); |
346 | errno = EINVAL; /* in case it was clobbered */ |
347 | } |
348 | #endif /* O_DIRECT */ |
349 | |
350 | return ret; |
351 | } |
352 | |
353 | int qemu_close(int fd) |
354 | { |
355 | int64_t fdset_id; |
356 | |
357 | /* Close fd that was dup'd from an fdset */ |
358 | fdset_id = monitor_fdset_dup_fd_find(fd); |
359 | if (fdset_id != -1) { |
360 | int ret; |
361 | |
362 | ret = close(fd); |
363 | if (ret == 0) { |
364 | monitor_fdset_dup_fd_remove(fd); |
365 | } |
366 | |
367 | return ret; |
368 | } |
369 | |
370 | return close(fd); |
371 | } |
372 | |
373 | /* |
374 | * A variant of write(2) which handles partial write. |
375 | * |
376 | * Return the number of bytes transferred. |
377 | * Set errno if fewer than `count' bytes are written. |
378 | * |
379 | * This function don't work with non-blocking fd's. |
380 | * Any of the possibilities with non-bloking fd's is bad: |
381 | * - return a short write (then name is wrong) |
382 | * - busy wait adding (errno == EAGAIN) to the loop |
383 | */ |
384 | ssize_t qemu_write_full(int fd, const void *buf, size_t count) |
385 | { |
386 | ssize_t ret = 0; |
387 | ssize_t total = 0; |
388 | |
389 | while (count) { |
390 | ret = write(fd, buf, count); |
391 | if (ret < 0) { |
392 | if (errno == EINTR) |
393 | continue; |
394 | break; |
395 | } |
396 | |
397 | count -= ret; |
398 | buf += ret; |
399 | total += ret; |
400 | } |
401 | |
402 | return total; |
403 | } |
404 | |
405 | /* |
406 | * Opens a socket with FD_CLOEXEC set |
407 | */ |
408 | int qemu_socket(int domain, int type, int protocol) |
409 | { |
410 | int ret; |
411 | |
412 | #ifdef SOCK_CLOEXEC |
413 | ret = socket(domain, type | SOCK_CLOEXEC, protocol); |
414 | if (ret != -1 || errno != EINVAL) { |
415 | return ret; |
416 | } |
417 | #endif |
418 | ret = socket(domain, type, protocol); |
419 | if (ret >= 0) { |
420 | qemu_set_cloexec(ret); |
421 | } |
422 | |
423 | return ret; |
424 | } |
425 | |
426 | /* |
427 | * Accept a connection and set FD_CLOEXEC |
428 | */ |
429 | int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen) |
430 | { |
431 | int ret; |
432 | |
433 | #ifdef CONFIG_ACCEPT4 |
434 | ret = accept4(s, addr, addrlen, SOCK_CLOEXEC); |
435 | if (ret != -1 || errno != ENOSYS) { |
436 | return ret; |
437 | } |
438 | #endif |
439 | ret = accept(s, addr, addrlen); |
440 | if (ret >= 0) { |
441 | qemu_set_cloexec(ret); |
442 | } |
443 | |
444 | return ret; |
445 | } |
446 | |
447 | void qemu_set_hw_version(const char *version) |
448 | { |
449 | hw_version = version; |
450 | } |
451 | |
452 | const char *qemu_hw_version(void) |
453 | { |
454 | return hw_version; |
455 | } |
456 | |
457 | void fips_set_state(bool requested) |
458 | { |
459 | #ifdef __linux__ |
460 | if (requested) { |
461 | FILE *fds = fopen("/proc/sys/crypto/fips_enabled" , "r" ); |
462 | if (fds != NULL) { |
463 | fips_enabled = (fgetc(fds) == '1'); |
464 | fclose(fds); |
465 | } |
466 | } |
467 | #else |
468 | fips_enabled = false; |
469 | #endif /* __linux__ */ |
470 | |
471 | #ifdef _FIPS_DEBUG |
472 | fprintf(stderr, "FIPS mode %s (requested %s)\n" , |
473 | (fips_enabled ? "enabled" : "disabled" ), |
474 | (requested ? "enabled" : "disabled" )); |
475 | #endif |
476 | } |
477 | |
478 | bool fips_get_state(void) |
479 | { |
480 | return fips_enabled; |
481 | } |
482 | |
483 | #ifdef _WIN32 |
484 | static void socket_cleanup(void) |
485 | { |
486 | WSACleanup(); |
487 | } |
488 | #endif |
489 | |
490 | int socket_init(void) |
491 | { |
492 | #ifdef _WIN32 |
493 | WSADATA Data; |
494 | int ret, err; |
495 | |
496 | ret = WSAStartup(MAKEWORD(2, 2), &Data); |
497 | if (ret != 0) { |
498 | err = WSAGetLastError(); |
499 | fprintf(stderr, "WSAStartup: %d\n" , err); |
500 | return -1; |
501 | } |
502 | atexit(socket_cleanup); |
503 | #endif |
504 | return 0; |
505 | } |
506 | |
507 | |
508 | #ifndef CONFIG_IOVEC |
509 | /* helper function for iov_send_recv() */ |
510 | static ssize_t |
511 | readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write) |
512 | { |
513 | unsigned i = 0; |
514 | ssize_t ret = 0; |
515 | while (i < iov_cnt) { |
516 | ssize_t r = do_write |
517 | ? write(fd, iov[i].iov_base, iov[i].iov_len) |
518 | : read(fd, iov[i].iov_base, iov[i].iov_len); |
519 | if (r > 0) { |
520 | ret += r; |
521 | } else if (!r) { |
522 | break; |
523 | } else if (errno == EINTR) { |
524 | continue; |
525 | } else { |
526 | /* else it is some "other" error, |
527 | * only return if there was no data processed. */ |
528 | if (ret == 0) { |
529 | ret = -1; |
530 | } |
531 | break; |
532 | } |
533 | i++; |
534 | } |
535 | return ret; |
536 | } |
537 | |
538 | ssize_t |
539 | readv(int fd, const struct iovec *iov, int iov_cnt) |
540 | { |
541 | return readv_writev(fd, iov, iov_cnt, false); |
542 | } |
543 | |
544 | ssize_t |
545 | writev(int fd, const struct iovec *iov, int iov_cnt) |
546 | { |
547 | return readv_writev(fd, iov, iov_cnt, true); |
548 | } |
549 | #endif |
550 | |