1 | /* |
2 | * QEMU System Emulator |
3 | * |
4 | * Copyright (c) 2003-2008 Fabrice Bellard |
5 | * Copyright (c) 2009 Red Hat, Inc. |
6 | * |
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
8 | * of this software and associated documentation files (the "Software"), to deal |
9 | * in the Software without restriction, including without limitation the rights |
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | * copies of the Software, and to permit persons to whom the Software is |
12 | * furnished to do so, subject to the following conditions: |
13 | * |
14 | * The above copyright notice and this permission notice shall be included in |
15 | * all copies or substantial portions of the Software. |
16 | * |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
23 | * THE SOFTWARE. |
24 | */ |
25 | |
26 | #include "qemu/osdep.h" |
27 | #include "tap_int.h" |
28 | |
29 | |
30 | #include <sys/ioctl.h> |
31 | #include <sys/wait.h> |
32 | #include <sys/socket.h> |
33 | #include <net/if.h> |
34 | |
35 | #include "net/net.h" |
36 | #include "clients.h" |
37 | #include "monitor/monitor.h" |
38 | #include "sysemu/sysemu.h" |
39 | #include "qapi/error.h" |
40 | #include "qemu-common.h" |
41 | #include "qemu/cutils.h" |
42 | #include "qemu/error-report.h" |
43 | #include "qemu/main-loop.h" |
44 | #include "qemu/sockets.h" |
45 | |
46 | #include "net/tap.h" |
47 | |
48 | #include "net/vhost_net.h" |
49 | |
50 | typedef struct TAPState { |
51 | NetClientState nc; |
52 | int fd; |
53 | char down_script[1024]; |
54 | char down_script_arg[128]; |
55 | uint8_t buf[NET_BUFSIZE]; |
56 | bool read_poll; |
57 | bool write_poll; |
58 | bool using_vnet_hdr; |
59 | bool has_ufo; |
60 | bool enabled; |
61 | VHostNetState *vhost_net; |
62 | unsigned host_vnet_hdr_len; |
63 | Notifier exit; |
64 | } TAPState; |
65 | |
66 | static void launch_script(const char *setup_script, const char *ifname, |
67 | int fd, Error **errp); |
68 | |
69 | static void tap_send(void *opaque); |
70 | static void tap_writable(void *opaque); |
71 | |
72 | static void tap_update_fd_handler(TAPState *s) |
73 | { |
74 | qemu_set_fd_handler(s->fd, |
75 | s->read_poll && s->enabled ? tap_send : NULL, |
76 | s->write_poll && s->enabled ? tap_writable : NULL, |
77 | s); |
78 | } |
79 | |
80 | static void tap_read_poll(TAPState *s, bool enable) |
81 | { |
82 | s->read_poll = enable; |
83 | tap_update_fd_handler(s); |
84 | } |
85 | |
86 | static void tap_write_poll(TAPState *s, bool enable) |
87 | { |
88 | s->write_poll = enable; |
89 | tap_update_fd_handler(s); |
90 | } |
91 | |
92 | static void tap_writable(void *opaque) |
93 | { |
94 | TAPState *s = opaque; |
95 | |
96 | tap_write_poll(s, false); |
97 | |
98 | qemu_flush_queued_packets(&s->nc); |
99 | } |
100 | |
101 | static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt) |
102 | { |
103 | ssize_t len; |
104 | |
105 | do { |
106 | len = writev(s->fd, iov, iovcnt); |
107 | } while (len == -1 && errno == EINTR); |
108 | |
109 | if (len == -1 && errno == EAGAIN) { |
110 | tap_write_poll(s, true); |
111 | return 0; |
112 | } |
113 | |
114 | return len; |
115 | } |
116 | |
117 | static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov, |
118 | int iovcnt) |
119 | { |
120 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
121 | const struct iovec *iovp = iov; |
122 | struct iovec iov_copy[iovcnt + 1]; |
123 | struct virtio_net_hdr_mrg_rxbuf hdr = { }; |
124 | |
125 | if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { |
126 | iov_copy[0].iov_base = &hdr; |
127 | iov_copy[0].iov_len = s->host_vnet_hdr_len; |
128 | memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); |
129 | iovp = iov_copy; |
130 | iovcnt++; |
131 | } |
132 | |
133 | return tap_write_packet(s, iovp, iovcnt); |
134 | } |
135 | |
136 | static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size) |
137 | { |
138 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
139 | struct iovec iov[2]; |
140 | int iovcnt = 0; |
141 | struct virtio_net_hdr_mrg_rxbuf hdr = { }; |
142 | |
143 | if (s->host_vnet_hdr_len) { |
144 | iov[iovcnt].iov_base = &hdr; |
145 | iov[iovcnt].iov_len = s->host_vnet_hdr_len; |
146 | iovcnt++; |
147 | } |
148 | |
149 | iov[iovcnt].iov_base = (char *)buf; |
150 | iov[iovcnt].iov_len = size; |
151 | iovcnt++; |
152 | |
153 | return tap_write_packet(s, iov, iovcnt); |
154 | } |
155 | |
156 | static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size) |
157 | { |
158 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
159 | struct iovec iov[1]; |
160 | |
161 | if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { |
162 | return tap_receive_raw(nc, buf, size); |
163 | } |
164 | |
165 | iov[0].iov_base = (char *)buf; |
166 | iov[0].iov_len = size; |
167 | |
168 | return tap_write_packet(s, iov, 1); |
169 | } |
170 | |
171 | #ifndef __sun__ |
172 | ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) |
173 | { |
174 | return read(tapfd, buf, maxlen); |
175 | } |
176 | #endif |
177 | |
178 | static void tap_send_completed(NetClientState *nc, ssize_t len) |
179 | { |
180 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
181 | tap_read_poll(s, true); |
182 | } |
183 | |
184 | static void tap_send(void *opaque) |
185 | { |
186 | TAPState *s = opaque; |
187 | int size; |
188 | int packets = 0; |
189 | |
190 | while (true) { |
191 | uint8_t *buf = s->buf; |
192 | |
193 | size = tap_read_packet(s->fd, s->buf, sizeof(s->buf)); |
194 | if (size <= 0) { |
195 | break; |
196 | } |
197 | |
198 | if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { |
199 | buf += s->host_vnet_hdr_len; |
200 | size -= s->host_vnet_hdr_len; |
201 | } |
202 | |
203 | size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed); |
204 | if (size == 0) { |
205 | tap_read_poll(s, false); |
206 | break; |
207 | } else if (size < 0) { |
208 | break; |
209 | } |
210 | |
211 | /* |
212 | * When the host keeps receiving more packets while tap_send() is |
213 | * running we can hog the QEMU global mutex. Limit the number of |
214 | * packets that are processed per tap_send() callback to prevent |
215 | * stalling the guest. |
216 | */ |
217 | packets++; |
218 | if (packets >= 50) { |
219 | break; |
220 | } |
221 | } |
222 | } |
223 | |
224 | static bool tap_has_ufo(NetClientState *nc) |
225 | { |
226 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
227 | |
228 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
229 | |
230 | return s->has_ufo; |
231 | } |
232 | |
233 | static bool tap_has_vnet_hdr(NetClientState *nc) |
234 | { |
235 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
236 | |
237 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
238 | |
239 | return !!s->host_vnet_hdr_len; |
240 | } |
241 | |
242 | static bool tap_has_vnet_hdr_len(NetClientState *nc, int len) |
243 | { |
244 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
245 | |
246 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
247 | |
248 | return !!tap_probe_vnet_hdr_len(s->fd, len); |
249 | } |
250 | |
251 | static void tap_set_vnet_hdr_len(NetClientState *nc, int len) |
252 | { |
253 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
254 | |
255 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
256 | assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || |
257 | len == sizeof(struct virtio_net_hdr)); |
258 | |
259 | tap_fd_set_vnet_hdr_len(s->fd, len); |
260 | s->host_vnet_hdr_len = len; |
261 | } |
262 | |
263 | static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr) |
264 | { |
265 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
266 | |
267 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
268 | assert(!!s->host_vnet_hdr_len == using_vnet_hdr); |
269 | |
270 | s->using_vnet_hdr = using_vnet_hdr; |
271 | } |
272 | |
273 | static int tap_set_vnet_le(NetClientState *nc, bool is_le) |
274 | { |
275 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
276 | |
277 | return tap_fd_set_vnet_le(s->fd, is_le); |
278 | } |
279 | |
280 | static int tap_set_vnet_be(NetClientState *nc, bool is_be) |
281 | { |
282 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
283 | |
284 | return tap_fd_set_vnet_be(s->fd, is_be); |
285 | } |
286 | |
287 | static void tap_set_offload(NetClientState *nc, int csum, int tso4, |
288 | int tso6, int ecn, int ufo) |
289 | { |
290 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
291 | if (s->fd < 0) { |
292 | return; |
293 | } |
294 | |
295 | tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo); |
296 | } |
297 | |
298 | static void tap_exit_notify(Notifier *notifier, void *data) |
299 | { |
300 | TAPState *s = container_of(notifier, TAPState, exit); |
301 | Error *err = NULL; |
302 | |
303 | if (s->down_script[0]) { |
304 | launch_script(s->down_script, s->down_script_arg, s->fd, &err); |
305 | if (err) { |
306 | error_report_err(err); |
307 | } |
308 | } |
309 | } |
310 | |
311 | static void tap_cleanup(NetClientState *nc) |
312 | { |
313 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
314 | |
315 | if (s->vhost_net) { |
316 | vhost_net_cleanup(s->vhost_net); |
317 | g_free(s->vhost_net); |
318 | s->vhost_net = NULL; |
319 | } |
320 | |
321 | qemu_purge_queued_packets(nc); |
322 | |
323 | tap_exit_notify(&s->exit, NULL); |
324 | qemu_remove_exit_notifier(&s->exit); |
325 | |
326 | tap_read_poll(s, false); |
327 | tap_write_poll(s, false); |
328 | close(s->fd); |
329 | s->fd = -1; |
330 | } |
331 | |
332 | static void tap_poll(NetClientState *nc, bool enable) |
333 | { |
334 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
335 | tap_read_poll(s, enable); |
336 | tap_write_poll(s, enable); |
337 | } |
338 | |
339 | int tap_get_fd(NetClientState *nc) |
340 | { |
341 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
342 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
343 | return s->fd; |
344 | } |
345 | |
346 | /* fd support */ |
347 | |
348 | static NetClientInfo net_tap_info = { |
349 | .type = NET_CLIENT_DRIVER_TAP, |
350 | .size = sizeof(TAPState), |
351 | .receive = tap_receive, |
352 | .receive_raw = tap_receive_raw, |
353 | .receive_iov = tap_receive_iov, |
354 | .poll = tap_poll, |
355 | .cleanup = tap_cleanup, |
356 | .has_ufo = tap_has_ufo, |
357 | .has_vnet_hdr = tap_has_vnet_hdr, |
358 | .has_vnet_hdr_len = tap_has_vnet_hdr_len, |
359 | .using_vnet_hdr = tap_using_vnet_hdr, |
360 | .set_offload = tap_set_offload, |
361 | .set_vnet_hdr_len = tap_set_vnet_hdr_len, |
362 | .set_vnet_le = tap_set_vnet_le, |
363 | .set_vnet_be = tap_set_vnet_be, |
364 | }; |
365 | |
366 | static TAPState *net_tap_fd_init(NetClientState *peer, |
367 | const char *model, |
368 | const char *name, |
369 | int fd, |
370 | int vnet_hdr) |
371 | { |
372 | NetClientState *nc; |
373 | TAPState *s; |
374 | |
375 | nc = qemu_new_net_client(&net_tap_info, peer, model, name); |
376 | |
377 | s = DO_UPCAST(TAPState, nc, nc); |
378 | |
379 | s->fd = fd; |
380 | s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0; |
381 | s->using_vnet_hdr = false; |
382 | s->has_ufo = tap_probe_has_ufo(s->fd); |
383 | s->enabled = true; |
384 | tap_set_offload(&s->nc, 0, 0, 0, 0, 0); |
385 | /* |
386 | * Make sure host header length is set correctly in tap: |
387 | * it might have been modified by another instance of qemu. |
388 | */ |
389 | if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) { |
390 | tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len); |
391 | } |
392 | tap_read_poll(s, true); |
393 | s->vhost_net = NULL; |
394 | |
395 | s->exit.notify = tap_exit_notify; |
396 | qemu_add_exit_notifier(&s->exit); |
397 | |
398 | return s; |
399 | } |
400 | |
401 | static void launch_script(const char *setup_script, const char *ifname, |
402 | int fd, Error **errp) |
403 | { |
404 | int pid, status; |
405 | char *args[3]; |
406 | char **parg; |
407 | |
408 | /* try to launch network script */ |
409 | pid = fork(); |
410 | if (pid < 0) { |
411 | error_setg_errno(errp, errno, "could not launch network script %s" , |
412 | setup_script); |
413 | return; |
414 | } |
415 | if (pid == 0) { |
416 | int open_max = sysconf(_SC_OPEN_MAX), i; |
417 | |
418 | for (i = 3; i < open_max; i++) { |
419 | if (i != fd) { |
420 | close(i); |
421 | } |
422 | } |
423 | parg = args; |
424 | *parg++ = (char *)setup_script; |
425 | *parg++ = (char *)ifname; |
426 | *parg = NULL; |
427 | execv(setup_script, args); |
428 | _exit(1); |
429 | } else { |
430 | while (waitpid(pid, &status, 0) != pid) { |
431 | /* loop */ |
432 | } |
433 | |
434 | if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { |
435 | return; |
436 | } |
437 | error_setg(errp, "network script %s failed with status %d" , |
438 | setup_script, status); |
439 | } |
440 | } |
441 | |
442 | static int recv_fd(int c) |
443 | { |
444 | int fd; |
445 | uint8_t msgbuf[CMSG_SPACE(sizeof(fd))]; |
446 | struct msghdr msg = { |
447 | .msg_control = msgbuf, |
448 | .msg_controllen = sizeof(msgbuf), |
449 | }; |
450 | struct cmsghdr *cmsg; |
451 | struct iovec iov; |
452 | uint8_t req[1]; |
453 | ssize_t len; |
454 | |
455 | cmsg = CMSG_FIRSTHDR(&msg); |
456 | cmsg->cmsg_level = SOL_SOCKET; |
457 | cmsg->cmsg_type = SCM_RIGHTS; |
458 | cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); |
459 | msg.msg_controllen = cmsg->cmsg_len; |
460 | |
461 | iov.iov_base = req; |
462 | iov.iov_len = sizeof(req); |
463 | |
464 | msg.msg_iov = &iov; |
465 | msg.msg_iovlen = 1; |
466 | |
467 | len = recvmsg(c, &msg, 0); |
468 | if (len > 0) { |
469 | memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); |
470 | return fd; |
471 | } |
472 | |
473 | return len; |
474 | } |
475 | |
476 | static int net_bridge_run_helper(const char *helper, const char *bridge, |
477 | Error **errp) |
478 | { |
479 | sigset_t oldmask, mask; |
480 | int pid, status; |
481 | char *args[5]; |
482 | char **parg; |
483 | int sv[2]; |
484 | |
485 | sigemptyset(&mask); |
486 | sigaddset(&mask, SIGCHLD); |
487 | sigprocmask(SIG_BLOCK, &mask, &oldmask); |
488 | |
489 | if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { |
490 | error_setg_errno(errp, errno, "socketpair() failed" ); |
491 | return -1; |
492 | } |
493 | |
494 | /* try to launch bridge helper */ |
495 | pid = fork(); |
496 | if (pid < 0) { |
497 | error_setg_errno(errp, errno, "Can't fork bridge helper" ); |
498 | return -1; |
499 | } |
500 | if (pid == 0) { |
501 | int open_max = sysconf(_SC_OPEN_MAX), i; |
502 | char *fd_buf = NULL; |
503 | char *br_buf = NULL; |
504 | char *helper_cmd = NULL; |
505 | |
506 | for (i = 3; i < open_max; i++) { |
507 | if (i != sv[1]) { |
508 | close(i); |
509 | } |
510 | } |
511 | |
512 | fd_buf = g_strdup_printf("%s%d" , "--fd=" , sv[1]); |
513 | |
514 | if (strrchr(helper, ' ') || strrchr(helper, '\t')) { |
515 | /* assume helper is a command */ |
516 | |
517 | if (strstr(helper, "--br=" ) == NULL) { |
518 | br_buf = g_strdup_printf("%s%s" , "--br=" , bridge); |
519 | } |
520 | |
521 | helper_cmd = g_strdup_printf("%s %s %s %s" , helper, |
522 | "--use-vnet" , fd_buf, br_buf ? br_buf : "" ); |
523 | |
524 | parg = args; |
525 | *parg++ = (char *)"sh" ; |
526 | *parg++ = (char *)"-c" ; |
527 | *parg++ = helper_cmd; |
528 | *parg++ = NULL; |
529 | |
530 | execv("/bin/sh" , args); |
531 | g_free(helper_cmd); |
532 | } else { |
533 | /* assume helper is just the executable path name */ |
534 | |
535 | br_buf = g_strdup_printf("%s%s" , "--br=" , bridge); |
536 | |
537 | parg = args; |
538 | *parg++ = (char *)helper; |
539 | *parg++ = (char *)"--use-vnet" ; |
540 | *parg++ = fd_buf; |
541 | *parg++ = br_buf; |
542 | *parg++ = NULL; |
543 | |
544 | execv(helper, args); |
545 | } |
546 | g_free(fd_buf); |
547 | g_free(br_buf); |
548 | _exit(1); |
549 | |
550 | } else { |
551 | int fd; |
552 | int saved_errno; |
553 | |
554 | close(sv[1]); |
555 | |
556 | do { |
557 | fd = recv_fd(sv[0]); |
558 | } while (fd == -1 && errno == EINTR); |
559 | saved_errno = errno; |
560 | |
561 | close(sv[0]); |
562 | |
563 | while (waitpid(pid, &status, 0) != pid) { |
564 | /* loop */ |
565 | } |
566 | sigprocmask(SIG_SETMASK, &oldmask, NULL); |
567 | if (fd < 0) { |
568 | error_setg_errno(errp, saved_errno, |
569 | "failed to recv file descriptor" ); |
570 | return -1; |
571 | } |
572 | if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { |
573 | error_setg(errp, "bridge helper failed" ); |
574 | return -1; |
575 | } |
576 | return fd; |
577 | } |
578 | } |
579 | |
580 | int net_init_bridge(const Netdev *netdev, const char *name, |
581 | NetClientState *peer, Error **errp) |
582 | { |
583 | const NetdevBridgeOptions *bridge; |
584 | const char *helper, *br; |
585 | TAPState *s; |
586 | int fd, vnet_hdr; |
587 | |
588 | assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE); |
589 | bridge = &netdev->u.bridge; |
590 | |
591 | helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER; |
592 | br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE; |
593 | |
594 | fd = net_bridge_run_helper(helper, br, errp); |
595 | if (fd == -1) { |
596 | return -1; |
597 | } |
598 | |
599 | qemu_set_nonblock(fd); |
600 | vnet_hdr = tap_probe_vnet_hdr(fd); |
601 | s = net_tap_fd_init(peer, "bridge" , name, fd, vnet_hdr); |
602 | |
603 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s" , helper, |
604 | br); |
605 | |
606 | return 0; |
607 | } |
608 | |
609 | static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr, |
610 | const char *setup_script, char *ifname, |
611 | size_t ifname_sz, int mq_required, Error **errp) |
612 | { |
613 | Error *err = NULL; |
614 | int fd, vnet_hdr_required; |
615 | |
616 | if (tap->has_vnet_hdr) { |
617 | *vnet_hdr = tap->vnet_hdr; |
618 | vnet_hdr_required = *vnet_hdr; |
619 | } else { |
620 | *vnet_hdr = 1; |
621 | vnet_hdr_required = 0; |
622 | } |
623 | |
624 | TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required, |
625 | mq_required, errp)); |
626 | if (fd < 0) { |
627 | return -1; |
628 | } |
629 | |
630 | if (setup_script && |
631 | setup_script[0] != '\0' && |
632 | strcmp(setup_script, "no" ) != 0) { |
633 | launch_script(setup_script, ifname, fd, &err); |
634 | if (err) { |
635 | error_propagate(errp, err); |
636 | close(fd); |
637 | return -1; |
638 | } |
639 | } |
640 | |
641 | return fd; |
642 | } |
643 | |
644 | #define MAX_TAP_QUEUES 1024 |
645 | |
646 | static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, |
647 | const char *model, const char *name, |
648 | const char *ifname, const char *script, |
649 | const char *downscript, const char *vhostfdname, |
650 | int vnet_hdr, int fd, Error **errp) |
651 | { |
652 | Error *err = NULL; |
653 | TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr); |
654 | int vhostfd; |
655 | |
656 | tap_set_sndbuf(s->fd, tap, &err); |
657 | if (err) { |
658 | error_propagate(errp, err); |
659 | return; |
660 | } |
661 | |
662 | if (tap->has_fd || tap->has_fds) { |
663 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d" , fd); |
664 | } else if (tap->has_helper) { |
665 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s" , |
666 | tap->helper); |
667 | } else { |
668 | snprintf(s->nc.info_str, sizeof(s->nc.info_str), |
669 | "ifname=%s,script=%s,downscript=%s" , ifname, script, |
670 | downscript); |
671 | |
672 | if (strcmp(downscript, "no" ) != 0) { |
673 | snprintf(s->down_script, sizeof(s->down_script), "%s" , downscript); |
674 | snprintf(s->down_script_arg, sizeof(s->down_script_arg), |
675 | "%s" , ifname); |
676 | } |
677 | } |
678 | |
679 | if (tap->has_vhost ? tap->vhost : |
680 | vhostfdname || (tap->has_vhostforce && tap->vhostforce)) { |
681 | VhostNetOptions options; |
682 | |
683 | options.backend_type = VHOST_BACKEND_TYPE_KERNEL; |
684 | options.net_backend = &s->nc; |
685 | if (tap->has_poll_us) { |
686 | options.busyloop_timeout = tap->poll_us; |
687 | } else { |
688 | options.busyloop_timeout = 0; |
689 | } |
690 | |
691 | if (vhostfdname) { |
692 | vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err); |
693 | if (vhostfd == -1) { |
694 | if (tap->has_vhostforce && tap->vhostforce) { |
695 | error_propagate(errp, err); |
696 | } else { |
697 | warn_report_err(err); |
698 | } |
699 | return; |
700 | } |
701 | qemu_set_nonblock(vhostfd); |
702 | } else { |
703 | vhostfd = open("/dev/vhost-net" , O_RDWR); |
704 | if (vhostfd < 0) { |
705 | if (tap->has_vhostforce && tap->vhostforce) { |
706 | error_setg_errno(errp, errno, |
707 | "tap: open vhost char device failed" ); |
708 | } else { |
709 | warn_report("tap: open vhost char device failed: %s" , |
710 | strerror(errno)); |
711 | } |
712 | return; |
713 | } |
714 | qemu_set_nonblock(vhostfd); |
715 | } |
716 | options.opaque = (void *)(uintptr_t)vhostfd; |
717 | |
718 | s->vhost_net = vhost_net_init(&options); |
719 | if (!s->vhost_net) { |
720 | if (tap->has_vhostforce && tap->vhostforce) { |
721 | error_setg(errp, VHOST_NET_INIT_FAILED); |
722 | } else { |
723 | warn_report(VHOST_NET_INIT_FAILED); |
724 | } |
725 | return; |
726 | } |
727 | } else if (vhostfdname) { |
728 | error_setg(errp, "vhostfd(s)= is not valid without vhost" ); |
729 | } |
730 | } |
731 | |
732 | static int get_fds(char *str, char *fds[], int max) |
733 | { |
734 | char *ptr = str, *this; |
735 | size_t len = strlen(str); |
736 | int i = 0; |
737 | |
738 | while (i < max && ptr < str + len) { |
739 | this = strchr(ptr, ':'); |
740 | |
741 | if (this == NULL) { |
742 | fds[i] = g_strdup(ptr); |
743 | } else { |
744 | fds[i] = g_strndup(ptr, this - ptr); |
745 | } |
746 | |
747 | i++; |
748 | if (this == NULL) { |
749 | break; |
750 | } else { |
751 | ptr = this + 1; |
752 | } |
753 | } |
754 | |
755 | return i; |
756 | } |
757 | |
758 | int net_init_tap(const Netdev *netdev, const char *name, |
759 | NetClientState *peer, Error **errp) |
760 | { |
761 | const NetdevTapOptions *tap; |
762 | int fd, vnet_hdr = 0, i = 0, queues; |
763 | /* for the no-fd, no-helper case */ |
764 | const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */ |
765 | const char *downscript = NULL; |
766 | Error *err = NULL; |
767 | const char *vhostfdname; |
768 | char ifname[128]; |
769 | |
770 | assert(netdev->type == NET_CLIENT_DRIVER_TAP); |
771 | tap = &netdev->u.tap; |
772 | queues = tap->has_queues ? tap->queues : 1; |
773 | vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL; |
774 | |
775 | /* QEMU hubs do not support multiqueue tap, in this case peer is set. |
776 | * For -netdev, peer is always NULL. */ |
777 | if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) { |
778 | error_setg(errp, "Multiqueue tap cannot be used with hubs" ); |
779 | return -1; |
780 | } |
781 | |
782 | if (tap->has_fd) { |
783 | if (tap->has_ifname || tap->has_script || tap->has_downscript || |
784 | tap->has_vnet_hdr || tap->has_helper || tap->has_queues || |
785 | tap->has_fds || tap->has_vhostfds) { |
786 | error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " |
787 | "helper=, queues=, fds=, and vhostfds= " |
788 | "are invalid with fd=" ); |
789 | return -1; |
790 | } |
791 | |
792 | fd = monitor_fd_param(cur_mon, tap->fd, &err); |
793 | if (fd == -1) { |
794 | error_propagate(errp, err); |
795 | return -1; |
796 | } |
797 | |
798 | qemu_set_nonblock(fd); |
799 | |
800 | vnet_hdr = tap_probe_vnet_hdr(fd); |
801 | |
802 | net_init_tap_one(tap, peer, "tap" , name, NULL, |
803 | script, downscript, |
804 | vhostfdname, vnet_hdr, fd, &err); |
805 | if (err) { |
806 | error_propagate(errp, err); |
807 | return -1; |
808 | } |
809 | } else if (tap->has_fds) { |
810 | char **fds; |
811 | char **vhost_fds; |
812 | int nfds = 0, nvhosts = 0; |
813 | int ret = 0; |
814 | |
815 | if (tap->has_ifname || tap->has_script || tap->has_downscript || |
816 | tap->has_vnet_hdr || tap->has_helper || tap->has_queues || |
817 | tap->has_vhostfd) { |
818 | error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " |
819 | "helper=, queues=, and vhostfd= " |
820 | "are invalid with fds=" ); |
821 | return -1; |
822 | } |
823 | |
824 | fds = g_new0(char *, MAX_TAP_QUEUES); |
825 | vhost_fds = g_new0(char *, MAX_TAP_QUEUES); |
826 | |
827 | nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES); |
828 | if (tap->has_vhostfds) { |
829 | nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES); |
830 | if (nfds != nvhosts) { |
831 | error_setg(errp, "The number of fds passed does not match " |
832 | "the number of vhostfds passed" ); |
833 | ret = -1; |
834 | goto free_fail; |
835 | } |
836 | } |
837 | |
838 | for (i = 0; i < nfds; i++) { |
839 | fd = monitor_fd_param(cur_mon, fds[i], &err); |
840 | if (fd == -1) { |
841 | error_propagate(errp, err); |
842 | ret = -1; |
843 | goto free_fail; |
844 | } |
845 | |
846 | qemu_set_nonblock(fd); |
847 | |
848 | if (i == 0) { |
849 | vnet_hdr = tap_probe_vnet_hdr(fd); |
850 | } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) { |
851 | error_setg(errp, |
852 | "vnet_hdr not consistent across given tap fds" ); |
853 | ret = -1; |
854 | goto free_fail; |
855 | } |
856 | |
857 | net_init_tap_one(tap, peer, "tap" , name, ifname, |
858 | script, downscript, |
859 | tap->has_vhostfds ? vhost_fds[i] : NULL, |
860 | vnet_hdr, fd, &err); |
861 | if (err) { |
862 | error_propagate(errp, err); |
863 | ret = -1; |
864 | goto free_fail; |
865 | } |
866 | } |
867 | |
868 | free_fail: |
869 | for (i = 0; i < nvhosts; i++) { |
870 | g_free(vhost_fds[i]); |
871 | } |
872 | for (i = 0; i < nfds; i++) { |
873 | g_free(fds[i]); |
874 | } |
875 | g_free(fds); |
876 | g_free(vhost_fds); |
877 | return ret; |
878 | } else if (tap->has_helper) { |
879 | if (tap->has_ifname || tap->has_script || tap->has_downscript || |
880 | tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) { |
881 | error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, " |
882 | "queues=, and vhostfds= are invalid with helper=" ); |
883 | return -1; |
884 | } |
885 | |
886 | fd = net_bridge_run_helper(tap->helper, |
887 | tap->has_br ? |
888 | tap->br : DEFAULT_BRIDGE_INTERFACE, |
889 | errp); |
890 | if (fd == -1) { |
891 | return -1; |
892 | } |
893 | |
894 | qemu_set_nonblock(fd); |
895 | vnet_hdr = tap_probe_vnet_hdr(fd); |
896 | |
897 | net_init_tap_one(tap, peer, "bridge" , name, ifname, |
898 | script, downscript, vhostfdname, |
899 | vnet_hdr, fd, &err); |
900 | if (err) { |
901 | error_propagate(errp, err); |
902 | close(fd); |
903 | return -1; |
904 | } |
905 | } else { |
906 | if (tap->has_vhostfds) { |
907 | error_setg(errp, "vhostfds= is invalid if fds= wasn't specified" ); |
908 | return -1; |
909 | } |
910 | script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT; |
911 | downscript = tap->has_downscript ? tap->downscript : |
912 | DEFAULT_NETWORK_DOWN_SCRIPT; |
913 | |
914 | if (tap->has_ifname) { |
915 | pstrcpy(ifname, sizeof ifname, tap->ifname); |
916 | } else { |
917 | ifname[0] = '\0'; |
918 | } |
919 | |
920 | for (i = 0; i < queues; i++) { |
921 | fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script, |
922 | ifname, sizeof ifname, queues > 1, errp); |
923 | if (fd == -1) { |
924 | return -1; |
925 | } |
926 | |
927 | if (queues > 1 && i == 0 && !tap->has_ifname) { |
928 | if (tap_fd_get_ifname(fd, ifname)) { |
929 | error_setg(errp, "Fail to get ifname" ); |
930 | close(fd); |
931 | return -1; |
932 | } |
933 | } |
934 | |
935 | net_init_tap_one(tap, peer, "tap" , name, ifname, |
936 | i >= 1 ? "no" : script, |
937 | i >= 1 ? "no" : downscript, |
938 | vhostfdname, vnet_hdr, fd, &err); |
939 | if (err) { |
940 | error_propagate(errp, err); |
941 | close(fd); |
942 | return -1; |
943 | } |
944 | } |
945 | } |
946 | |
947 | return 0; |
948 | } |
949 | |
950 | VHostNetState *tap_get_vhost_net(NetClientState *nc) |
951 | { |
952 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
953 | assert(nc->info->type == NET_CLIENT_DRIVER_TAP); |
954 | return s->vhost_net; |
955 | } |
956 | |
957 | int tap_enable(NetClientState *nc) |
958 | { |
959 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
960 | int ret; |
961 | |
962 | if (s->enabled) { |
963 | return 0; |
964 | } else { |
965 | ret = tap_fd_enable(s->fd); |
966 | if (ret == 0) { |
967 | s->enabled = true; |
968 | tap_update_fd_handler(s); |
969 | } |
970 | return ret; |
971 | } |
972 | } |
973 | |
974 | int tap_disable(NetClientState *nc) |
975 | { |
976 | TAPState *s = DO_UPCAST(TAPState, nc, nc); |
977 | int ret; |
978 | |
979 | if (s->enabled == 0) { |
980 | return 0; |
981 | } else { |
982 | ret = tap_fd_disable(s->fd); |
983 | if (ret == 0) { |
984 | qemu_purge_queued_packets(nc); |
985 | s->enabled = false; |
986 | tap_update_fd_handler(s); |
987 | } |
988 | return ret; |
989 | } |
990 | } |
991 | |