1/*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14#include "qemu/osdep.h"
15#include "qemu/iov.h"
16#include "qemu/main-loop.h"
17#include "qemu/module.h"
18#include "hw/virtio/virtio.h"
19#include "net/net.h"
20#include "net/checksum.h"
21#include "net/tap.h"
22#include "qemu/error-report.h"
23#include "qemu/timer.h"
24#include "hw/virtio/virtio-net.h"
25#include "net/vhost_net.h"
26#include "net/announce.h"
27#include "hw/virtio/virtio-bus.h"
28#include "qapi/error.h"
29#include "qapi/qapi-events-net.h"
30#include "hw/qdev-properties.h"
31#include "hw/virtio/virtio-access.h"
32#include "migration/misc.h"
33#include "standard-headers/linux/ethtool.h"
34#include "sysemu/sysemu.h"
35#include "trace.h"
36
37#define VIRTIO_NET_VM_VERSION 11
38
39#define MAC_TABLE_ENTRIES 64
40#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
41
42/* previously fixed value */
43#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
44#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
45
46/* for now, only allow larger queues; with virtio-1, guest can downsize */
47#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
48#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
49
50#define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
51
52#define VIRTIO_NET_TCP_FLAG 0x3F
53#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
54
55/* IPv4 max payload, 16 bits in the header */
56#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
57#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
58
59/* header length value in ip header without option */
60#define VIRTIO_NET_IP4_HEADER_LENGTH 5
61
62#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
63#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
64
65/* Purge coalesced packets timer interval, This value affects the performance
66 a lot, and should be tuned carefully, '300000'(300us) is the recommended
67 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
68 tso/gso/gro 'off'. */
69#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
70
71/* temporary until standard header include it */
72#if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
73
74#define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */
75#define VIRTIO_NET_F_RSC_EXT 61
76
77static inline __virtio16 *virtio_net_rsc_ext_num_packets(
78 struct virtio_net_hdr *hdr)
79{
80 return &hdr->csum_start;
81}
82
83static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
84 struct virtio_net_hdr *hdr)
85{
86 return &hdr->csum_offset;
87}
88
89#endif
90
91static VirtIOFeature feature_sizes[] = {
92 {.flags = 1ULL << VIRTIO_NET_F_MAC,
93 .end = virtio_endof(struct virtio_net_config, mac)},
94 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
95 .end = virtio_endof(struct virtio_net_config, status)},
96 {.flags = 1ULL << VIRTIO_NET_F_MQ,
97 .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)},
98 {.flags = 1ULL << VIRTIO_NET_F_MTU,
99 .end = virtio_endof(struct virtio_net_config, mtu)},
100 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
101 .end = virtio_endof(struct virtio_net_config, duplex)},
102 {}
103};
104
105static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
106{
107 VirtIONet *n = qemu_get_nic_opaque(nc);
108
109 return &n->vqs[nc->queue_index];
110}
111
112static int vq2q(int queue_index)
113{
114 return queue_index / 2;
115}
116
117/* TODO
118 * - we could suppress RX interrupt if we were so inclined.
119 */
120
121static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
122{
123 VirtIONet *n = VIRTIO_NET(vdev);
124 struct virtio_net_config netcfg;
125
126 virtio_stw_p(vdev, &netcfg.status, n->status);
127 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
128 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
129 memcpy(netcfg.mac, n->mac, ETH_ALEN);
130 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
131 netcfg.duplex = n->net_conf.duplex;
132 memcpy(config, &netcfg, n->config_size);
133}
134
135static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
136{
137 VirtIONet *n = VIRTIO_NET(vdev);
138 struct virtio_net_config netcfg = {};
139
140 memcpy(&netcfg, config, n->config_size);
141
142 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
143 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
144 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
145 memcpy(n->mac, netcfg.mac, ETH_ALEN);
146 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
147 }
148}
149
150static bool virtio_net_started(VirtIONet *n, uint8_t status)
151{
152 VirtIODevice *vdev = VIRTIO_DEVICE(n);
153 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
154 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
155}
156
157static void virtio_net_announce_notify(VirtIONet *net)
158{
159 VirtIODevice *vdev = VIRTIO_DEVICE(net);
160 trace_virtio_net_announce_notify();
161
162 net->status |= VIRTIO_NET_S_ANNOUNCE;
163 virtio_notify_config(vdev);
164}
165
166static void virtio_net_announce_timer(void *opaque)
167{
168 VirtIONet *n = opaque;
169 trace_virtio_net_announce_timer(n->announce_timer.round);
170
171 n->announce_timer.round--;
172 virtio_net_announce_notify(n);
173}
174
175static void virtio_net_announce(NetClientState *nc)
176{
177 VirtIONet *n = qemu_get_nic_opaque(nc);
178 VirtIODevice *vdev = VIRTIO_DEVICE(n);
179
180 /*
181 * Make sure the virtio migration announcement timer isn't running
182 * If it is, let it trigger announcement so that we do not cause
183 * confusion.
184 */
185 if (n->announce_timer.round) {
186 return;
187 }
188
189 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
190 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
191 virtio_net_announce_notify(n);
192 }
193}
194
195static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
196{
197 VirtIODevice *vdev = VIRTIO_DEVICE(n);
198 NetClientState *nc = qemu_get_queue(n->nic);
199 int queues = n->multiqueue ? n->max_queues : 1;
200
201 if (!get_vhost_net(nc->peer)) {
202 return;
203 }
204
205 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
206 !!n->vhost_started) {
207 return;
208 }
209 if (!n->vhost_started) {
210 int r, i;
211
212 if (n->needs_vnet_hdr_swap) {
213 error_report("backend does not support %s vnet headers; "
214 "falling back on userspace virtio",
215 virtio_is_big_endian(vdev) ? "BE" : "LE");
216 return;
217 }
218
219 /* Any packets outstanding? Purge them to avoid touching rings
220 * when vhost is running.
221 */
222 for (i = 0; i < queues; i++) {
223 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
224
225 /* Purge both directions: TX and RX. */
226 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
227 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
228 }
229
230 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
231 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
232 if (r < 0) {
233 error_report("%uBytes MTU not supported by the backend",
234 n->net_conf.mtu);
235
236 return;
237 }
238 }
239
240 n->vhost_started = 1;
241 r = vhost_net_start(vdev, n->nic->ncs, queues);
242 if (r < 0) {
243 error_report("unable to start vhost net: %d: "
244 "falling back on userspace virtio", -r);
245 n->vhost_started = 0;
246 }
247 } else {
248 vhost_net_stop(vdev, n->nic->ncs, queues);
249 n->vhost_started = 0;
250 }
251}
252
253static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
254 NetClientState *peer,
255 bool enable)
256{
257 if (virtio_is_big_endian(vdev)) {
258 return qemu_set_vnet_be(peer, enable);
259 } else {
260 return qemu_set_vnet_le(peer, enable);
261 }
262}
263
264static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
265 int queues, bool enable)
266{
267 int i;
268
269 for (i = 0; i < queues; i++) {
270 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
271 enable) {
272 while (--i >= 0) {
273 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
274 }
275
276 return true;
277 }
278 }
279
280 return false;
281}
282
283static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
284{
285 VirtIODevice *vdev = VIRTIO_DEVICE(n);
286 int queues = n->multiqueue ? n->max_queues : 1;
287
288 if (virtio_net_started(n, status)) {
289 /* Before using the device, we tell the network backend about the
290 * endianness to use when parsing vnet headers. If the backend
291 * can't do it, we fallback onto fixing the headers in the core
292 * virtio-net code.
293 */
294 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
295 queues, true);
296 } else if (virtio_net_started(n, vdev->status)) {
297 /* After using the device, we need to reset the network backend to
298 * the default (guest native endianness), otherwise the guest may
299 * lose network connectivity if it is rebooted into a different
300 * endianness.
301 */
302 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
303 }
304}
305
306static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
307{
308 unsigned int dropped = virtqueue_drop_all(vq);
309 if (dropped) {
310 virtio_notify(vdev, vq);
311 }
312}
313
314static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
315{
316 VirtIONet *n = VIRTIO_NET(vdev);
317 VirtIONetQueue *q;
318 int i;
319 uint8_t queue_status;
320
321 virtio_net_vnet_endian_status(n, status);
322 virtio_net_vhost_status(n, status);
323
324 for (i = 0; i < n->max_queues; i++) {
325 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
326 bool queue_started;
327 q = &n->vqs[i];
328
329 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
330 queue_status = 0;
331 } else {
332 queue_status = status;
333 }
334 queue_started =
335 virtio_net_started(n, queue_status) && !n->vhost_started;
336
337 if (queue_started) {
338 qemu_flush_queued_packets(ncs);
339 }
340
341 if (!q->tx_waiting) {
342 continue;
343 }
344
345 if (queue_started) {
346 if (q->tx_timer) {
347 timer_mod(q->tx_timer,
348 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
349 } else {
350 qemu_bh_schedule(q->tx_bh);
351 }
352 } else {
353 if (q->tx_timer) {
354 timer_del(q->tx_timer);
355 } else {
356 qemu_bh_cancel(q->tx_bh);
357 }
358 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
359 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
360 vdev->vm_running) {
361 /* if tx is waiting we are likely have some packets in tx queue
362 * and disabled notification */
363 q->tx_waiting = 0;
364 virtio_queue_set_notification(q->tx_vq, 1);
365 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
366 }
367 }
368 }
369}
370
371static void virtio_net_set_link_status(NetClientState *nc)
372{
373 VirtIONet *n = qemu_get_nic_opaque(nc);
374 VirtIODevice *vdev = VIRTIO_DEVICE(n);
375 uint16_t old_status = n->status;
376
377 if (nc->link_down)
378 n->status &= ~VIRTIO_NET_S_LINK_UP;
379 else
380 n->status |= VIRTIO_NET_S_LINK_UP;
381
382 if (n->status != old_status)
383 virtio_notify_config(vdev);
384
385 virtio_net_set_status(vdev, vdev->status);
386}
387
388static void rxfilter_notify(NetClientState *nc)
389{
390 VirtIONet *n = qemu_get_nic_opaque(nc);
391
392 if (nc->rxfilter_notify_enabled) {
393 gchar *path = object_get_canonical_path(OBJECT(n->qdev));
394 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
395 n->netclient_name, path);
396 g_free(path);
397
398 /* disable event notification to avoid events flooding */
399 nc->rxfilter_notify_enabled = 0;
400 }
401}
402
403static intList *get_vlan_table(VirtIONet *n)
404{
405 intList *list, *entry;
406 int i, j;
407
408 list = NULL;
409 for (i = 0; i < MAX_VLAN >> 5; i++) {
410 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
411 if (n->vlans[i] & (1U << j)) {
412 entry = g_malloc0(sizeof(*entry));
413 entry->value = (i << 5) + j;
414 entry->next = list;
415 list = entry;
416 }
417 }
418 }
419
420 return list;
421}
422
423static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
424{
425 VirtIONet *n = qemu_get_nic_opaque(nc);
426 VirtIODevice *vdev = VIRTIO_DEVICE(n);
427 RxFilterInfo *info;
428 strList *str_list, *entry;
429 int i;
430
431 info = g_malloc0(sizeof(*info));
432 info->name = g_strdup(nc->name);
433 info->promiscuous = n->promisc;
434
435 if (n->nouni) {
436 info->unicast = RX_STATE_NONE;
437 } else if (n->alluni) {
438 info->unicast = RX_STATE_ALL;
439 } else {
440 info->unicast = RX_STATE_NORMAL;
441 }
442
443 if (n->nomulti) {
444 info->multicast = RX_STATE_NONE;
445 } else if (n->allmulti) {
446 info->multicast = RX_STATE_ALL;
447 } else {
448 info->multicast = RX_STATE_NORMAL;
449 }
450
451 info->broadcast_allowed = n->nobcast;
452 info->multicast_overflow = n->mac_table.multi_overflow;
453 info->unicast_overflow = n->mac_table.uni_overflow;
454
455 info->main_mac = qemu_mac_strdup_printf(n->mac);
456
457 str_list = NULL;
458 for (i = 0; i < n->mac_table.first_multi; i++) {
459 entry = g_malloc0(sizeof(*entry));
460 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
461 entry->next = str_list;
462 str_list = entry;
463 }
464 info->unicast_table = str_list;
465
466 str_list = NULL;
467 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
468 entry = g_malloc0(sizeof(*entry));
469 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
470 entry->next = str_list;
471 str_list = entry;
472 }
473 info->multicast_table = str_list;
474 info->vlan_table = get_vlan_table(n);
475
476 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
477 info->vlan = RX_STATE_ALL;
478 } else if (!info->vlan_table) {
479 info->vlan = RX_STATE_NONE;
480 } else {
481 info->vlan = RX_STATE_NORMAL;
482 }
483
484 /* enable event notification after query */
485 nc->rxfilter_notify_enabled = 1;
486
487 return info;
488}
489
490static void virtio_net_reset(VirtIODevice *vdev)
491{
492 VirtIONet *n = VIRTIO_NET(vdev);
493 int i;
494
495 /* Reset back to compatibility mode */
496 n->promisc = 1;
497 n->allmulti = 0;
498 n->alluni = 0;
499 n->nomulti = 0;
500 n->nouni = 0;
501 n->nobcast = 0;
502 /* multiqueue is disabled by default */
503 n->curr_queues = 1;
504 timer_del(n->announce_timer.tm);
505 n->announce_timer.round = 0;
506 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
507
508 /* Flush any MAC and VLAN filter table state */
509 n->mac_table.in_use = 0;
510 n->mac_table.first_multi = 0;
511 n->mac_table.multi_overflow = 0;
512 n->mac_table.uni_overflow = 0;
513 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
514 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
515 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
516 memset(n->vlans, 0, MAX_VLAN >> 3);
517
518 /* Flush any async TX */
519 for (i = 0; i < n->max_queues; i++) {
520 NetClientState *nc = qemu_get_subqueue(n->nic, i);
521
522 if (nc->peer) {
523 qemu_flush_or_purge_queued_packets(nc->peer, true);
524 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
525 }
526 }
527}
528
529static void peer_test_vnet_hdr(VirtIONet *n)
530{
531 NetClientState *nc = qemu_get_queue(n->nic);
532 if (!nc->peer) {
533 return;
534 }
535
536 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
537}
538
539static int peer_has_vnet_hdr(VirtIONet *n)
540{
541 return n->has_vnet_hdr;
542}
543
544static int peer_has_ufo(VirtIONet *n)
545{
546 if (!peer_has_vnet_hdr(n))
547 return 0;
548
549 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
550
551 return n->has_ufo;
552}
553
554static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
555 int version_1)
556{
557 int i;
558 NetClientState *nc;
559
560 n->mergeable_rx_bufs = mergeable_rx_bufs;
561
562 if (version_1) {
563 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
564 } else {
565 n->guest_hdr_len = n->mergeable_rx_bufs ?
566 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
567 sizeof(struct virtio_net_hdr);
568 }
569
570 for (i = 0; i < n->max_queues; i++) {
571 nc = qemu_get_subqueue(n->nic, i);
572
573 if (peer_has_vnet_hdr(n) &&
574 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
575 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
576 n->host_hdr_len = n->guest_hdr_len;
577 }
578 }
579}
580
581static int virtio_net_max_tx_queue_size(VirtIONet *n)
582{
583 NetClientState *peer = n->nic_conf.peers.ncs[0];
584
585 /*
586 * Backends other than vhost-user don't support max queue size.
587 */
588 if (!peer) {
589 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
590 }
591
592 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
593 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
594 }
595
596 return VIRTQUEUE_MAX_SIZE;
597}
598
599static int peer_attach(VirtIONet *n, int index)
600{
601 NetClientState *nc = qemu_get_subqueue(n->nic, index);
602
603 if (!nc->peer) {
604 return 0;
605 }
606
607 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
608 vhost_set_vring_enable(nc->peer, 1);
609 }
610
611 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
612 return 0;
613 }
614
615 if (n->max_queues == 1) {
616 return 0;
617 }
618
619 return tap_enable(nc->peer);
620}
621
622static int peer_detach(VirtIONet *n, int index)
623{
624 NetClientState *nc = qemu_get_subqueue(n->nic, index);
625
626 if (!nc->peer) {
627 return 0;
628 }
629
630 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
631 vhost_set_vring_enable(nc->peer, 0);
632 }
633
634 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
635 return 0;
636 }
637
638 return tap_disable(nc->peer);
639}
640
641static void virtio_net_set_queues(VirtIONet *n)
642{
643 int i;
644 int r;
645
646 if (n->nic->peer_deleted) {
647 return;
648 }
649
650 for (i = 0; i < n->max_queues; i++) {
651 if (i < n->curr_queues) {
652 r = peer_attach(n, i);
653 assert(!r);
654 } else {
655 r = peer_detach(n, i);
656 assert(!r);
657 }
658 }
659}
660
661static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
662
663static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
664 Error **errp)
665{
666 VirtIONet *n = VIRTIO_NET(vdev);
667 NetClientState *nc = qemu_get_queue(n->nic);
668
669 /* Firstly sync all virtio-net possible supported features */
670 features |= n->host_features;
671
672 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
673
674 if (!peer_has_vnet_hdr(n)) {
675 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
676 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
677 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
678 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
679
680 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
681 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
682 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
683 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
684 }
685
686 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
687 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
688 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
689 }
690
691 if (!get_vhost_net(nc->peer)) {
692 return features;
693 }
694
695 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
696 vdev->backend_features = features;
697
698 if (n->mtu_bypass_backend &&
699 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
700 features |= (1ULL << VIRTIO_NET_F_MTU);
701 }
702
703 return features;
704}
705
706static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
707{
708 uint64_t features = 0;
709
710 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
711 * but also these: */
712 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
713 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
714 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
715 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
716 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
717
718 return features;
719}
720
721static void virtio_net_apply_guest_offloads(VirtIONet *n)
722{
723 qemu_set_offload(qemu_get_queue(n->nic)->peer,
724 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
725 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
726 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
727 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
728 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
729}
730
731static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
732{
733 static const uint64_t guest_offloads_mask =
734 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
735 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
736 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
737 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
738 (1ULL << VIRTIO_NET_F_GUEST_UFO);
739
740 return guest_offloads_mask & features;
741}
742
743static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
744{
745 VirtIODevice *vdev = VIRTIO_DEVICE(n);
746 return virtio_net_guest_offloads_by_features(vdev->guest_features);
747}
748
749static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
750{
751 VirtIONet *n = VIRTIO_NET(vdev);
752 int i;
753
754 if (n->mtu_bypass_backend &&
755 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
756 features &= ~(1ULL << VIRTIO_NET_F_MTU);
757 }
758
759 virtio_net_set_multiqueue(n,
760 virtio_has_feature(features, VIRTIO_NET_F_MQ));
761
762 virtio_net_set_mrg_rx_bufs(n,
763 virtio_has_feature(features,
764 VIRTIO_NET_F_MRG_RXBUF),
765 virtio_has_feature(features,
766 VIRTIO_F_VERSION_1));
767
768 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
769 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
770 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
771 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
772
773 if (n->has_vnet_hdr) {
774 n->curr_guest_offloads =
775 virtio_net_guest_offloads_by_features(features);
776 virtio_net_apply_guest_offloads(n);
777 }
778
779 for (i = 0; i < n->max_queues; i++) {
780 NetClientState *nc = qemu_get_subqueue(n->nic, i);
781
782 if (!get_vhost_net(nc->peer)) {
783 continue;
784 }
785 vhost_net_ack_features(get_vhost_net(nc->peer), features);
786 }
787
788 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
789 memset(n->vlans, 0, MAX_VLAN >> 3);
790 } else {
791 memset(n->vlans, 0xff, MAX_VLAN >> 3);
792 }
793}
794
795static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
796 struct iovec *iov, unsigned int iov_cnt)
797{
798 uint8_t on;
799 size_t s;
800 NetClientState *nc = qemu_get_queue(n->nic);
801
802 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
803 if (s != sizeof(on)) {
804 return VIRTIO_NET_ERR;
805 }
806
807 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
808 n->promisc = on;
809 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
810 n->allmulti = on;
811 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
812 n->alluni = on;
813 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
814 n->nomulti = on;
815 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
816 n->nouni = on;
817 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
818 n->nobcast = on;
819 } else {
820 return VIRTIO_NET_ERR;
821 }
822
823 rxfilter_notify(nc);
824
825 return VIRTIO_NET_OK;
826}
827
828static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
829 struct iovec *iov, unsigned int iov_cnt)
830{
831 VirtIODevice *vdev = VIRTIO_DEVICE(n);
832 uint64_t offloads;
833 size_t s;
834
835 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
836 return VIRTIO_NET_ERR;
837 }
838
839 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
840 if (s != sizeof(offloads)) {
841 return VIRTIO_NET_ERR;
842 }
843
844 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
845 uint64_t supported_offloads;
846
847 offloads = virtio_ldq_p(vdev, &offloads);
848
849 if (!n->has_vnet_hdr) {
850 return VIRTIO_NET_ERR;
851 }
852
853 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
854 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
855 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
856 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
857 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
858
859 supported_offloads = virtio_net_supported_guest_offloads(n);
860 if (offloads & ~supported_offloads) {
861 return VIRTIO_NET_ERR;
862 }
863
864 n->curr_guest_offloads = offloads;
865 virtio_net_apply_guest_offloads(n);
866
867 return VIRTIO_NET_OK;
868 } else {
869 return VIRTIO_NET_ERR;
870 }
871}
872
873static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
874 struct iovec *iov, unsigned int iov_cnt)
875{
876 VirtIODevice *vdev = VIRTIO_DEVICE(n);
877 struct virtio_net_ctrl_mac mac_data;
878 size_t s;
879 NetClientState *nc = qemu_get_queue(n->nic);
880
881 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
882 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
883 return VIRTIO_NET_ERR;
884 }
885 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
886 assert(s == sizeof(n->mac));
887 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
888 rxfilter_notify(nc);
889
890 return VIRTIO_NET_OK;
891 }
892
893 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
894 return VIRTIO_NET_ERR;
895 }
896
897 int in_use = 0;
898 int first_multi = 0;
899 uint8_t uni_overflow = 0;
900 uint8_t multi_overflow = 0;
901 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
902
903 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
904 sizeof(mac_data.entries));
905 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
906 if (s != sizeof(mac_data.entries)) {
907 goto error;
908 }
909 iov_discard_front(&iov, &iov_cnt, s);
910
911 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
912 goto error;
913 }
914
915 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
916 s = iov_to_buf(iov, iov_cnt, 0, macs,
917 mac_data.entries * ETH_ALEN);
918 if (s != mac_data.entries * ETH_ALEN) {
919 goto error;
920 }
921 in_use += mac_data.entries;
922 } else {
923 uni_overflow = 1;
924 }
925
926 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
927
928 first_multi = in_use;
929
930 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
931 sizeof(mac_data.entries));
932 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
933 if (s != sizeof(mac_data.entries)) {
934 goto error;
935 }
936
937 iov_discard_front(&iov, &iov_cnt, s);
938
939 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
940 goto error;
941 }
942
943 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
944 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
945 mac_data.entries * ETH_ALEN);
946 if (s != mac_data.entries * ETH_ALEN) {
947 goto error;
948 }
949 in_use += mac_data.entries;
950 } else {
951 multi_overflow = 1;
952 }
953
954 n->mac_table.in_use = in_use;
955 n->mac_table.first_multi = first_multi;
956 n->mac_table.uni_overflow = uni_overflow;
957 n->mac_table.multi_overflow = multi_overflow;
958 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
959 g_free(macs);
960 rxfilter_notify(nc);
961
962 return VIRTIO_NET_OK;
963
964error:
965 g_free(macs);
966 return VIRTIO_NET_ERR;
967}
968
969static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
970 struct iovec *iov, unsigned int iov_cnt)
971{
972 VirtIODevice *vdev = VIRTIO_DEVICE(n);
973 uint16_t vid;
974 size_t s;
975 NetClientState *nc = qemu_get_queue(n->nic);
976
977 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
978 vid = virtio_lduw_p(vdev, &vid);
979 if (s != sizeof(vid)) {
980 return VIRTIO_NET_ERR;
981 }
982
983 if (vid >= MAX_VLAN)
984 return VIRTIO_NET_ERR;
985
986 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
987 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
988 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
989 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
990 else
991 return VIRTIO_NET_ERR;
992
993 rxfilter_notify(nc);
994
995 return VIRTIO_NET_OK;
996}
997
998static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
999 struct iovec *iov, unsigned int iov_cnt)
1000{
1001 trace_virtio_net_handle_announce(n->announce_timer.round);
1002 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1003 n->status & VIRTIO_NET_S_ANNOUNCE) {
1004 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1005 if (n->announce_timer.round) {
1006 qemu_announce_timer_step(&n->announce_timer);
1007 }
1008 return VIRTIO_NET_OK;
1009 } else {
1010 return VIRTIO_NET_ERR;
1011 }
1012}
1013
1014static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1015 struct iovec *iov, unsigned int iov_cnt)
1016{
1017 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1018 struct virtio_net_ctrl_mq mq;
1019 size_t s;
1020 uint16_t queues;
1021
1022 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1023 if (s != sizeof(mq)) {
1024 return VIRTIO_NET_ERR;
1025 }
1026
1027 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1028 return VIRTIO_NET_ERR;
1029 }
1030
1031 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1032
1033 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1034 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1035 queues > n->max_queues ||
1036 !n->multiqueue) {
1037 return VIRTIO_NET_ERR;
1038 }
1039
1040 n->curr_queues = queues;
1041 /* stop the backend before changing the number of queues to avoid handling a
1042 * disabled queue */
1043 virtio_net_set_status(vdev, vdev->status);
1044 virtio_net_set_queues(n);
1045
1046 return VIRTIO_NET_OK;
1047}
1048
1049static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1050{
1051 VirtIONet *n = VIRTIO_NET(vdev);
1052 struct virtio_net_ctrl_hdr ctrl;
1053 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1054 VirtQueueElement *elem;
1055 size_t s;
1056 struct iovec *iov, *iov2;
1057 unsigned int iov_cnt;
1058
1059 for (;;) {
1060 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1061 if (!elem) {
1062 break;
1063 }
1064 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1065 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1066 virtio_error(vdev, "virtio-net ctrl missing headers");
1067 virtqueue_detach_element(vq, elem, 0);
1068 g_free(elem);
1069 break;
1070 }
1071
1072 iov_cnt = elem->out_num;
1073 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1074 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1075 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1076 if (s != sizeof(ctrl)) {
1077 status = VIRTIO_NET_ERR;
1078 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1079 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1080 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1081 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1082 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1083 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1084 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1085 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1086 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1087 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1088 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1089 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1090 }
1091
1092 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1093 assert(s == sizeof(status));
1094
1095 virtqueue_push(vq, elem, sizeof(status));
1096 virtio_notify(vdev, vq);
1097 g_free(iov2);
1098 g_free(elem);
1099 }
1100}
1101
1102/* RX */
1103
1104static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1105{
1106 VirtIONet *n = VIRTIO_NET(vdev);
1107 int queue_index = vq2q(virtio_get_queue_index(vq));
1108
1109 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1110}
1111
1112static int virtio_net_can_receive(NetClientState *nc)
1113{
1114 VirtIONet *n = qemu_get_nic_opaque(nc);
1115 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1116 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1117
1118 if (!vdev->vm_running) {
1119 return 0;
1120 }
1121
1122 if (nc->queue_index >= n->curr_queues) {
1123 return 0;
1124 }
1125
1126 if (!virtio_queue_ready(q->rx_vq) ||
1127 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1128 return 0;
1129 }
1130
1131 return 1;
1132}
1133
1134static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1135{
1136 VirtIONet *n = q->n;
1137 if (virtio_queue_empty(q->rx_vq) ||
1138 (n->mergeable_rx_bufs &&
1139 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1140 virtio_queue_set_notification(q->rx_vq, 1);
1141
1142 /* To avoid a race condition where the guest has made some buffers
1143 * available after the above check but before notification was
1144 * enabled, check for available buffers again.
1145 */
1146 if (virtio_queue_empty(q->rx_vq) ||
1147 (n->mergeable_rx_bufs &&
1148 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1149 return 0;
1150 }
1151 }
1152
1153 virtio_queue_set_notification(q->rx_vq, 0);
1154 return 1;
1155}
1156
1157static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1158{
1159 virtio_tswap16s(vdev, &hdr->hdr_len);
1160 virtio_tswap16s(vdev, &hdr->gso_size);
1161 virtio_tswap16s(vdev, &hdr->csum_start);
1162 virtio_tswap16s(vdev, &hdr->csum_offset);
1163}
1164
1165/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1166 * it never finds out that the packets don't have valid checksums. This
1167 * causes dhclient to get upset. Fedora's carried a patch for ages to
1168 * fix this with Xen but it hasn't appeared in an upstream release of
1169 * dhclient yet.
1170 *
1171 * To avoid breaking existing guests, we catch udp packets and add
1172 * checksums. This is terrible but it's better than hacking the guest
1173 * kernels.
1174 *
1175 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1176 * we should provide a mechanism to disable it to avoid polluting the host
1177 * cache.
1178 */
1179static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1180 uint8_t *buf, size_t size)
1181{
1182 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1183 (size > 27 && size < 1500) && /* normal sized MTU */
1184 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1185 (buf[23] == 17) && /* ip.protocol == UDP */
1186 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1187 net_checksum_calculate(buf, size);
1188 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1189 }
1190}
1191
1192static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1193 const void *buf, size_t size)
1194{
1195 if (n->has_vnet_hdr) {
1196 /* FIXME this cast is evil */
1197 void *wbuf = (void *)buf;
1198 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1199 size - n->host_hdr_len);
1200
1201 if (n->needs_vnet_hdr_swap) {
1202 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1203 }
1204 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1205 } else {
1206 struct virtio_net_hdr hdr = {
1207 .flags = 0,
1208 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1209 };
1210 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1211 }
1212}
1213
1214static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1215{
1216 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1217 static const uint8_t vlan[] = {0x81, 0x00};
1218 uint8_t *ptr = (uint8_t *)buf;
1219 int i;
1220
1221 if (n->promisc)
1222 return 1;
1223
1224 ptr += n->host_hdr_len;
1225
1226 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1227 int vid = lduw_be_p(ptr + 14) & 0xfff;
1228 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1229 return 0;
1230 }
1231
1232 if (ptr[0] & 1) { // multicast
1233 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1234 return !n->nobcast;
1235 } else if (n->nomulti) {
1236 return 0;
1237 } else if (n->allmulti || n->mac_table.multi_overflow) {
1238 return 1;
1239 }
1240
1241 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1242 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1243 return 1;
1244 }
1245 }
1246 } else { // unicast
1247 if (n->nouni) {
1248 return 0;
1249 } else if (n->alluni || n->mac_table.uni_overflow) {
1250 return 1;
1251 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1252 return 1;
1253 }
1254
1255 for (i = 0; i < n->mac_table.first_multi; i++) {
1256 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1257 return 1;
1258 }
1259 }
1260 }
1261
1262 return 0;
1263}
1264
1265static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1266 size_t size)
1267{
1268 VirtIONet *n = qemu_get_nic_opaque(nc);
1269 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1270 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1271 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1272 struct virtio_net_hdr_mrg_rxbuf mhdr;
1273 unsigned mhdr_cnt = 0;
1274 size_t offset, i, guest_offset;
1275
1276 if (!virtio_net_can_receive(nc)) {
1277 return -1;
1278 }
1279
1280 /* hdr_len refers to the header we supply to the guest */
1281 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1282 return 0;
1283 }
1284
1285 if (!receive_filter(n, buf, size))
1286 return size;
1287
1288 offset = i = 0;
1289
1290 while (offset < size) {
1291 VirtQueueElement *elem;
1292 int len, total;
1293 const struct iovec *sg;
1294
1295 total = 0;
1296
1297 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1298 if (!elem) {
1299 if (i) {
1300 virtio_error(vdev, "virtio-net unexpected empty queue: "
1301 "i %zd mergeable %d offset %zd, size %zd, "
1302 "guest hdr len %zd, host hdr len %zd "
1303 "guest features 0x%" PRIx64,
1304 i, n->mergeable_rx_bufs, offset, size,
1305 n->guest_hdr_len, n->host_hdr_len,
1306 vdev->guest_features);
1307 }
1308 return -1;
1309 }
1310
1311 if (elem->in_num < 1) {
1312 virtio_error(vdev,
1313 "virtio-net receive queue contains no in buffers");
1314 virtqueue_detach_element(q->rx_vq, elem, 0);
1315 g_free(elem);
1316 return -1;
1317 }
1318
1319 sg = elem->in_sg;
1320 if (i == 0) {
1321 assert(offset == 0);
1322 if (n->mergeable_rx_bufs) {
1323 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1324 sg, elem->in_num,
1325 offsetof(typeof(mhdr), num_buffers),
1326 sizeof(mhdr.num_buffers));
1327 }
1328
1329 receive_header(n, sg, elem->in_num, buf, size);
1330 offset = n->host_hdr_len;
1331 total += n->guest_hdr_len;
1332 guest_offset = n->guest_hdr_len;
1333 } else {
1334 guest_offset = 0;
1335 }
1336
1337 /* copy in packet. ugh */
1338 len = iov_from_buf(sg, elem->in_num, guest_offset,
1339 buf + offset, size - offset);
1340 total += len;
1341 offset += len;
1342 /* If buffers can't be merged, at this point we
1343 * must have consumed the complete packet.
1344 * Otherwise, drop it. */
1345 if (!n->mergeable_rx_bufs && offset < size) {
1346 virtqueue_unpop(q->rx_vq, elem, total);
1347 g_free(elem);
1348 return size;
1349 }
1350
1351 /* signal other side */
1352 virtqueue_fill(q->rx_vq, elem, total, i++);
1353 g_free(elem);
1354 }
1355
1356 if (mhdr_cnt) {
1357 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1358 iov_from_buf(mhdr_sg, mhdr_cnt,
1359 0,
1360 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1361 }
1362
1363 virtqueue_flush(q->rx_vq, i);
1364 virtio_notify(vdev, q->rx_vq);
1365
1366 return size;
1367}
1368
1369static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1370 size_t size)
1371{
1372 ssize_t r;
1373
1374 rcu_read_lock();
1375 r = virtio_net_receive_rcu(nc, buf, size);
1376 rcu_read_unlock();
1377 return r;
1378}
1379
1380static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1381 const uint8_t *buf,
1382 VirtioNetRscUnit *unit)
1383{
1384 uint16_t ip_hdrlen;
1385 struct ip_header *ip;
1386
1387 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1388 + sizeof(struct eth_header));
1389 unit->ip = (void *)ip;
1390 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1391 unit->ip_plen = &ip->ip_len;
1392 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1393 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1394 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1395}
1396
1397static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1398 const uint8_t *buf,
1399 VirtioNetRscUnit *unit)
1400{
1401 struct ip6_header *ip6;
1402
1403 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1404 + sizeof(struct eth_header));
1405 unit->ip = ip6;
1406 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1407 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1408 + sizeof(struct ip6_header));
1409 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1410
1411 /* There is a difference between payload lenght in ipv4 and v6,
1412 ip header is excluded in ipv6 */
1413 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1414}
1415
1416static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1417 VirtioNetRscSeg *seg)
1418{
1419 int ret;
1420 struct virtio_net_hdr *h;
1421
1422 h = (struct virtio_net_hdr *)seg->buf;
1423 h->flags = 0;
1424 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1425
1426 if (seg->is_coalesced) {
1427 *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1428 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1429 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1430 if (chain->proto == ETH_P_IP) {
1431 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1432 } else {
1433 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1434 }
1435 }
1436
1437 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1438 QTAILQ_REMOVE(&chain->buffers, seg, next);
1439 g_free(seg->buf);
1440 g_free(seg);
1441
1442 return ret;
1443}
1444
1445static void virtio_net_rsc_purge(void *opq)
1446{
1447 VirtioNetRscSeg *seg, *rn;
1448 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1449
1450 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1451 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1452 chain->stat.purge_failed++;
1453 continue;
1454 }
1455 }
1456
1457 chain->stat.timer++;
1458 if (!QTAILQ_EMPTY(&chain->buffers)) {
1459 timer_mod(chain->drain_timer,
1460 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1461 }
1462}
1463
1464static void virtio_net_rsc_cleanup(VirtIONet *n)
1465{
1466 VirtioNetRscChain *chain, *rn_chain;
1467 VirtioNetRscSeg *seg, *rn_seg;
1468
1469 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1470 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1471 QTAILQ_REMOVE(&chain->buffers, seg, next);
1472 g_free(seg->buf);
1473 g_free(seg);
1474 }
1475
1476 timer_del(chain->drain_timer);
1477 timer_free(chain->drain_timer);
1478 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1479 g_free(chain);
1480 }
1481}
1482
1483static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1484 NetClientState *nc,
1485 const uint8_t *buf, size_t size)
1486{
1487 uint16_t hdr_len;
1488 VirtioNetRscSeg *seg;
1489
1490 hdr_len = chain->n->guest_hdr_len;
1491 seg = g_malloc(sizeof(VirtioNetRscSeg));
1492 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1493 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1494 memcpy(seg->buf, buf, size);
1495 seg->size = size;
1496 seg->packets = 1;
1497 seg->dup_ack = 0;
1498 seg->is_coalesced = 0;
1499 seg->nc = nc;
1500
1501 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1502 chain->stat.cache++;
1503
1504 switch (chain->proto) {
1505 case ETH_P_IP:
1506 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1507 break;
1508 case ETH_P_IPV6:
1509 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1510 break;
1511 default:
1512 g_assert_not_reached();
1513 }
1514}
1515
1516static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1517 VirtioNetRscSeg *seg,
1518 const uint8_t *buf,
1519 struct tcp_header *n_tcp,
1520 struct tcp_header *o_tcp)
1521{
1522 uint32_t nack, oack;
1523 uint16_t nwin, owin;
1524
1525 nack = htonl(n_tcp->th_ack);
1526 nwin = htons(n_tcp->th_win);
1527 oack = htonl(o_tcp->th_ack);
1528 owin = htons(o_tcp->th_win);
1529
1530 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1531 chain->stat.ack_out_of_win++;
1532 return RSC_FINAL;
1533 } else if (nack == oack) {
1534 /* duplicated ack or window probe */
1535 if (nwin == owin) {
1536 /* duplicated ack, add dup ack count due to whql test up to 1 */
1537 chain->stat.dup_ack++;
1538 return RSC_FINAL;
1539 } else {
1540 /* Coalesce window update */
1541 o_tcp->th_win = n_tcp->th_win;
1542 chain->stat.win_update++;
1543 return RSC_COALESCE;
1544 }
1545 } else {
1546 /* pure ack, go to 'C', finalize*/
1547 chain->stat.pure_ack++;
1548 return RSC_FINAL;
1549 }
1550}
1551
1552static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1553 VirtioNetRscSeg *seg,
1554 const uint8_t *buf,
1555 VirtioNetRscUnit *n_unit)
1556{
1557 void *data;
1558 uint16_t o_ip_len;
1559 uint32_t nseq, oseq;
1560 VirtioNetRscUnit *o_unit;
1561
1562 o_unit = &seg->unit;
1563 o_ip_len = htons(*o_unit->ip_plen);
1564 nseq = htonl(n_unit->tcp->th_seq);
1565 oseq = htonl(o_unit->tcp->th_seq);
1566
1567 /* out of order or retransmitted. */
1568 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1569 chain->stat.data_out_of_win++;
1570 return RSC_FINAL;
1571 }
1572
1573 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1574 if (nseq == oseq) {
1575 if ((o_unit->payload == 0) && n_unit->payload) {
1576 /* From no payload to payload, normal case, not a dup ack or etc */
1577 chain->stat.data_after_pure_ack++;
1578 goto coalesce;
1579 } else {
1580 return virtio_net_rsc_handle_ack(chain, seg, buf,
1581 n_unit->tcp, o_unit->tcp);
1582 }
1583 } else if ((nseq - oseq) != o_unit->payload) {
1584 /* Not a consistent packet, out of order */
1585 chain->stat.data_out_of_order++;
1586 return RSC_FINAL;
1587 } else {
1588coalesce:
1589 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1590 chain->stat.over_size++;
1591 return RSC_FINAL;
1592 }
1593
1594 /* Here comes the right data, the payload length in v4/v6 is different,
1595 so use the field value to update and record the new data len */
1596 o_unit->payload += n_unit->payload; /* update new data len */
1597
1598 /* update field in ip header */
1599 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1600
1601 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1602 for windows guest, while this may change the behavior for linux
1603 guest (only if it uses RSC feature). */
1604 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1605
1606 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1607 o_unit->tcp->th_win = n_unit->tcp->th_win;
1608
1609 memmove(seg->buf + seg->size, data, n_unit->payload);
1610 seg->size += n_unit->payload;
1611 seg->packets++;
1612 chain->stat.coalesced++;
1613 return RSC_COALESCE;
1614 }
1615}
1616
1617static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1618 VirtioNetRscSeg *seg,
1619 const uint8_t *buf, size_t size,
1620 VirtioNetRscUnit *unit)
1621{
1622 struct ip_header *ip1, *ip2;
1623
1624 ip1 = (struct ip_header *)(unit->ip);
1625 ip2 = (struct ip_header *)(seg->unit.ip);
1626 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1627 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1628 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1629 chain->stat.no_match++;
1630 return RSC_NO_MATCH;
1631 }
1632
1633 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1634}
1635
1636static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1637 VirtioNetRscSeg *seg,
1638 const uint8_t *buf, size_t size,
1639 VirtioNetRscUnit *unit)
1640{
1641 struct ip6_header *ip1, *ip2;
1642
1643 ip1 = (struct ip6_header *)(unit->ip);
1644 ip2 = (struct ip6_header *)(seg->unit.ip);
1645 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1646 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1647 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1648 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1649 chain->stat.no_match++;
1650 return RSC_NO_MATCH;
1651 }
1652
1653 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1654}
1655
1656/* Packets with 'SYN' should bypass, other flag should be sent after drain
1657 * to prevent out of order */
1658static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1659 struct tcp_header *tcp)
1660{
1661 uint16_t tcp_hdr;
1662 uint16_t tcp_flag;
1663
1664 tcp_flag = htons(tcp->th_offset_flags);
1665 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1666 tcp_flag &= VIRTIO_NET_TCP_FLAG;
1667 tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1668 if (tcp_flag & TH_SYN) {
1669 chain->stat.tcp_syn++;
1670 return RSC_BYPASS;
1671 }
1672
1673 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1674 chain->stat.tcp_ctrl_drain++;
1675 return RSC_FINAL;
1676 }
1677
1678 if (tcp_hdr > sizeof(struct tcp_header)) {
1679 chain->stat.tcp_all_opt++;
1680 return RSC_FINAL;
1681 }
1682
1683 return RSC_CANDIDATE;
1684}
1685
1686static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1687 NetClientState *nc,
1688 const uint8_t *buf, size_t size,
1689 VirtioNetRscUnit *unit)
1690{
1691 int ret;
1692 VirtioNetRscSeg *seg, *nseg;
1693
1694 if (QTAILQ_EMPTY(&chain->buffers)) {
1695 chain->stat.empty_cache++;
1696 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1697 timer_mod(chain->drain_timer,
1698 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1699 return size;
1700 }
1701
1702 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1703 if (chain->proto == ETH_P_IP) {
1704 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1705 } else {
1706 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1707 }
1708
1709 if (ret == RSC_FINAL) {
1710 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1711 /* Send failed */
1712 chain->stat.final_failed++;
1713 return 0;
1714 }
1715
1716 /* Send current packet */
1717 return virtio_net_do_receive(nc, buf, size);
1718 } else if (ret == RSC_NO_MATCH) {
1719 continue;
1720 } else {
1721 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1722 seg->is_coalesced = 1;
1723 return size;
1724 }
1725 }
1726
1727 chain->stat.no_match_cache++;
1728 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1729 return size;
1730}
1731
1732/* Drain a connection data, this is to avoid out of order segments */
1733static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1734 NetClientState *nc,
1735 const uint8_t *buf, size_t size,
1736 uint16_t ip_start, uint16_t ip_size,
1737 uint16_t tcp_port)
1738{
1739 VirtioNetRscSeg *seg, *nseg;
1740 uint32_t ppair1, ppair2;
1741
1742 ppair1 = *(uint32_t *)(buf + tcp_port);
1743 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1744 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1745 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1746 || (ppair1 != ppair2)) {
1747 continue;
1748 }
1749 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1750 chain->stat.drain_failed++;
1751 }
1752
1753 break;
1754 }
1755
1756 return virtio_net_do_receive(nc, buf, size);
1757}
1758
1759static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1760 struct ip_header *ip,
1761 const uint8_t *buf, size_t size)
1762{
1763 uint16_t ip_len;
1764
1765 /* Not an ipv4 packet */
1766 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1767 chain->stat.ip_option++;
1768 return RSC_BYPASS;
1769 }
1770
1771 /* Don't handle packets with ip option */
1772 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1773 chain->stat.ip_option++;
1774 return RSC_BYPASS;
1775 }
1776
1777 if (ip->ip_p != IPPROTO_TCP) {
1778 chain->stat.bypass_not_tcp++;
1779 return RSC_BYPASS;
1780 }
1781
1782 /* Don't handle packets with ip fragment */
1783 if (!(htons(ip->ip_off) & IP_DF)) {
1784 chain->stat.ip_frag++;
1785 return RSC_BYPASS;
1786 }
1787
1788 /* Don't handle packets with ecn flag */
1789 if (IPTOS_ECN(ip->ip_tos)) {
1790 chain->stat.ip_ecn++;
1791 return RSC_BYPASS;
1792 }
1793
1794 ip_len = htons(ip->ip_len);
1795 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1796 || ip_len > (size - chain->n->guest_hdr_len -
1797 sizeof(struct eth_header))) {
1798 chain->stat.ip_hacked++;
1799 return RSC_BYPASS;
1800 }
1801
1802 return RSC_CANDIDATE;
1803}
1804
1805static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1806 NetClientState *nc,
1807 const uint8_t *buf, size_t size)
1808{
1809 int32_t ret;
1810 uint16_t hdr_len;
1811 VirtioNetRscUnit unit;
1812
1813 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1814
1815 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1816 + sizeof(struct tcp_header))) {
1817 chain->stat.bypass_not_tcp++;
1818 return virtio_net_do_receive(nc, buf, size);
1819 }
1820
1821 virtio_net_rsc_extract_unit4(chain, buf, &unit);
1822 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1823 != RSC_CANDIDATE) {
1824 return virtio_net_do_receive(nc, buf, size);
1825 }
1826
1827 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1828 if (ret == RSC_BYPASS) {
1829 return virtio_net_do_receive(nc, buf, size);
1830 } else if (ret == RSC_FINAL) {
1831 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1832 ((hdr_len + sizeof(struct eth_header)) + 12),
1833 VIRTIO_NET_IP4_ADDR_SIZE,
1834 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1835 }
1836
1837 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1838}
1839
1840static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1841 struct ip6_header *ip6,
1842 const uint8_t *buf, size_t size)
1843{
1844 uint16_t ip_len;
1845
1846 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1847 != IP_HEADER_VERSION_6) {
1848 return RSC_BYPASS;
1849 }
1850
1851 /* Both option and protocol is checked in this */
1852 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1853 chain->stat.bypass_not_tcp++;
1854 return RSC_BYPASS;
1855 }
1856
1857 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1858 if (ip_len < sizeof(struct tcp_header) ||
1859 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1860 - sizeof(struct ip6_header))) {
1861 chain->stat.ip_hacked++;
1862 return RSC_BYPASS;
1863 }
1864
1865 /* Don't handle packets with ecn flag */
1866 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1867 chain->stat.ip_ecn++;
1868 return RSC_BYPASS;
1869 }
1870
1871 return RSC_CANDIDATE;
1872}
1873
1874static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1875 const uint8_t *buf, size_t size)
1876{
1877 int32_t ret;
1878 uint16_t hdr_len;
1879 VirtioNetRscChain *chain;
1880 VirtioNetRscUnit unit;
1881
1882 chain = (VirtioNetRscChain *)opq;
1883 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1884
1885 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
1886 + sizeof(tcp_header))) {
1887 return virtio_net_do_receive(nc, buf, size);
1888 }
1889
1890 virtio_net_rsc_extract_unit6(chain, buf, &unit);
1891 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
1892 unit.ip, buf, size)) {
1893 return virtio_net_do_receive(nc, buf, size);
1894 }
1895
1896 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1897 if (ret == RSC_BYPASS) {
1898 return virtio_net_do_receive(nc, buf, size);
1899 } else if (ret == RSC_FINAL) {
1900 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1901 ((hdr_len + sizeof(struct eth_header)) + 8),
1902 VIRTIO_NET_IP6_ADDR_SIZE,
1903 hdr_len + sizeof(struct eth_header)
1904 + sizeof(struct ip6_header));
1905 }
1906
1907 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1908}
1909
1910static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
1911 NetClientState *nc,
1912 uint16_t proto)
1913{
1914 VirtioNetRscChain *chain;
1915
1916 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
1917 return NULL;
1918 }
1919
1920 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
1921 if (chain->proto == proto) {
1922 return chain;
1923 }
1924 }
1925
1926 chain = g_malloc(sizeof(*chain));
1927 chain->n = n;
1928 chain->proto = proto;
1929 if (proto == (uint16_t)ETH_P_IP) {
1930 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
1931 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1932 } else {
1933 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
1934 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1935 }
1936 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
1937 virtio_net_rsc_purge, chain);
1938 memset(&chain->stat, 0, sizeof(chain->stat));
1939
1940 QTAILQ_INIT(&chain->buffers);
1941 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
1942
1943 return chain;
1944}
1945
1946static ssize_t virtio_net_rsc_receive(NetClientState *nc,
1947 const uint8_t *buf,
1948 size_t size)
1949{
1950 uint16_t proto;
1951 VirtioNetRscChain *chain;
1952 struct eth_header *eth;
1953 VirtIONet *n;
1954
1955 n = qemu_get_nic_opaque(nc);
1956 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
1957 return virtio_net_do_receive(nc, buf, size);
1958 }
1959
1960 eth = (struct eth_header *)(buf + n->guest_hdr_len);
1961 proto = htons(eth->h_proto);
1962
1963 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
1964 if (chain) {
1965 chain->stat.received++;
1966 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
1967 return virtio_net_rsc_receive4(chain, nc, buf, size);
1968 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
1969 return virtio_net_rsc_receive6(chain, nc, buf, size);
1970 }
1971 }
1972 return virtio_net_do_receive(nc, buf, size);
1973}
1974
1975static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1976 size_t size)
1977{
1978 VirtIONet *n = qemu_get_nic_opaque(nc);
1979 if ((n->rsc4_enabled || n->rsc6_enabled)) {
1980 return virtio_net_rsc_receive(nc, buf, size);
1981 } else {
1982 return virtio_net_do_receive(nc, buf, size);
1983 }
1984}
1985
1986static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1987
1988static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1989{
1990 VirtIONet *n = qemu_get_nic_opaque(nc);
1991 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1992 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1993
1994 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1995 virtio_notify(vdev, q->tx_vq);
1996
1997 g_free(q->async_tx.elem);
1998 q->async_tx.elem = NULL;
1999
2000 virtio_queue_set_notification(q->tx_vq, 1);
2001 virtio_net_flush_tx(q);
2002}
2003
2004/* TX */
2005static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2006{
2007 VirtIONet *n = q->n;
2008 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2009 VirtQueueElement *elem;
2010 int32_t num_packets = 0;
2011 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2012 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2013 return num_packets;
2014 }
2015
2016 if (q->async_tx.elem) {
2017 virtio_queue_set_notification(q->tx_vq, 0);
2018 return num_packets;
2019 }
2020
2021 for (;;) {
2022 ssize_t ret;
2023 unsigned int out_num;
2024 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2025 struct virtio_net_hdr_mrg_rxbuf mhdr;
2026
2027 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2028 if (!elem) {
2029 break;
2030 }
2031
2032 out_num = elem->out_num;
2033 out_sg = elem->out_sg;
2034 if (out_num < 1) {
2035 virtio_error(vdev, "virtio-net header not in first element");
2036 virtqueue_detach_element(q->tx_vq, elem, 0);
2037 g_free(elem);
2038 return -EINVAL;
2039 }
2040
2041 if (n->has_vnet_hdr) {
2042 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2043 n->guest_hdr_len) {
2044 virtio_error(vdev, "virtio-net header incorrect");
2045 virtqueue_detach_element(q->tx_vq, elem, 0);
2046 g_free(elem);
2047 return -EINVAL;
2048 }
2049 if (n->needs_vnet_hdr_swap) {
2050 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2051 sg2[0].iov_base = &mhdr;
2052 sg2[0].iov_len = n->guest_hdr_len;
2053 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2054 out_sg, out_num,
2055 n->guest_hdr_len, -1);
2056 if (out_num == VIRTQUEUE_MAX_SIZE) {
2057 goto drop;
2058 }
2059 out_num += 1;
2060 out_sg = sg2;
2061 }
2062 }
2063 /*
2064 * If host wants to see the guest header as is, we can
2065 * pass it on unchanged. Otherwise, copy just the parts
2066 * that host is interested in.
2067 */
2068 assert(n->host_hdr_len <= n->guest_hdr_len);
2069 if (n->host_hdr_len != n->guest_hdr_len) {
2070 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2071 out_sg, out_num,
2072 0, n->host_hdr_len);
2073 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2074 out_sg, out_num,
2075 n->guest_hdr_len, -1);
2076 out_num = sg_num;
2077 out_sg = sg;
2078 }
2079
2080 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2081 out_sg, out_num, virtio_net_tx_complete);
2082 if (ret == 0) {
2083 virtio_queue_set_notification(q->tx_vq, 0);
2084 q->async_tx.elem = elem;
2085 return -EBUSY;
2086 }
2087
2088drop:
2089 virtqueue_push(q->tx_vq, elem, 0);
2090 virtio_notify(vdev, q->tx_vq);
2091 g_free(elem);
2092
2093 if (++num_packets >= n->tx_burst) {
2094 break;
2095 }
2096 }
2097 return num_packets;
2098}
2099
2100static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2101{
2102 VirtIONet *n = VIRTIO_NET(vdev);
2103 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2104
2105 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2106 virtio_net_drop_tx_queue_data(vdev, vq);
2107 return;
2108 }
2109
2110 /* This happens when device was stopped but VCPU wasn't. */
2111 if (!vdev->vm_running) {
2112 q->tx_waiting = 1;
2113 return;
2114 }
2115
2116 if (q->tx_waiting) {
2117 virtio_queue_set_notification(vq, 1);
2118 timer_del(q->tx_timer);
2119 q->tx_waiting = 0;
2120 if (virtio_net_flush_tx(q) == -EINVAL) {
2121 return;
2122 }
2123 } else {
2124 timer_mod(q->tx_timer,
2125 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2126 q->tx_waiting = 1;
2127 virtio_queue_set_notification(vq, 0);
2128 }
2129}
2130
2131static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2132{
2133 VirtIONet *n = VIRTIO_NET(vdev);
2134 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2135
2136 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2137 virtio_net_drop_tx_queue_data(vdev, vq);
2138 return;
2139 }
2140
2141 if (unlikely(q->tx_waiting)) {
2142 return;
2143 }
2144 q->tx_waiting = 1;
2145 /* This happens when device was stopped but VCPU wasn't. */
2146 if (!vdev->vm_running) {
2147 return;
2148 }
2149 virtio_queue_set_notification(vq, 0);
2150 qemu_bh_schedule(q->tx_bh);
2151}
2152
2153static void virtio_net_tx_timer(void *opaque)
2154{
2155 VirtIONetQueue *q = opaque;
2156 VirtIONet *n = q->n;
2157 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2158 /* This happens when device was stopped but BH wasn't. */
2159 if (!vdev->vm_running) {
2160 /* Make sure tx waiting is set, so we'll run when restarted. */
2161 assert(q->tx_waiting);
2162 return;
2163 }
2164
2165 q->tx_waiting = 0;
2166
2167 /* Just in case the driver is not ready on more */
2168 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2169 return;
2170 }
2171
2172 virtio_queue_set_notification(q->tx_vq, 1);
2173 virtio_net_flush_tx(q);
2174}
2175
2176static void virtio_net_tx_bh(void *opaque)
2177{
2178 VirtIONetQueue *q = opaque;
2179 VirtIONet *n = q->n;
2180 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2181 int32_t ret;
2182
2183 /* This happens when device was stopped but BH wasn't. */
2184 if (!vdev->vm_running) {
2185 /* Make sure tx waiting is set, so we'll run when restarted. */
2186 assert(q->tx_waiting);
2187 return;
2188 }
2189
2190 q->tx_waiting = 0;
2191
2192 /* Just in case the driver is not ready on more */
2193 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2194 return;
2195 }
2196
2197 ret = virtio_net_flush_tx(q);
2198 if (ret == -EBUSY || ret == -EINVAL) {
2199 return; /* Notification re-enable handled by tx_complete or device
2200 * broken */
2201 }
2202
2203 /* If we flush a full burst of packets, assume there are
2204 * more coming and immediately reschedule */
2205 if (ret >= n->tx_burst) {
2206 qemu_bh_schedule(q->tx_bh);
2207 q->tx_waiting = 1;
2208 return;
2209 }
2210
2211 /* If less than a full burst, re-enable notification and flush
2212 * anything that may have come in while we weren't looking. If
2213 * we find something, assume the guest is still active and reschedule */
2214 virtio_queue_set_notification(q->tx_vq, 1);
2215 ret = virtio_net_flush_tx(q);
2216 if (ret == -EINVAL) {
2217 return;
2218 } else if (ret > 0) {
2219 virtio_queue_set_notification(q->tx_vq, 0);
2220 qemu_bh_schedule(q->tx_bh);
2221 q->tx_waiting = 1;
2222 }
2223}
2224
2225static void virtio_net_add_queue(VirtIONet *n, int index)
2226{
2227 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2228
2229 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2230 virtio_net_handle_rx);
2231
2232 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2233 n->vqs[index].tx_vq =
2234 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2235 virtio_net_handle_tx_timer);
2236 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2237 virtio_net_tx_timer,
2238 &n->vqs[index]);
2239 } else {
2240 n->vqs[index].tx_vq =
2241 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2242 virtio_net_handle_tx_bh);
2243 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2244 }
2245
2246 n->vqs[index].tx_waiting = 0;
2247 n->vqs[index].n = n;
2248}
2249
2250static void virtio_net_del_queue(VirtIONet *n, int index)
2251{
2252 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2253 VirtIONetQueue *q = &n->vqs[index];
2254 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2255
2256 qemu_purge_queued_packets(nc);
2257
2258 virtio_del_queue(vdev, index * 2);
2259 if (q->tx_timer) {
2260 timer_del(q->tx_timer);
2261 timer_free(q->tx_timer);
2262 q->tx_timer = NULL;
2263 } else {
2264 qemu_bh_delete(q->tx_bh);
2265 q->tx_bh = NULL;
2266 }
2267 q->tx_waiting = 0;
2268 virtio_del_queue(vdev, index * 2 + 1);
2269}
2270
2271static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2272{
2273 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2274 int old_num_queues = virtio_get_num_queues(vdev);
2275 int new_num_queues = new_max_queues * 2 + 1;
2276 int i;
2277
2278 assert(old_num_queues >= 3);
2279 assert(old_num_queues % 2 == 1);
2280
2281 if (old_num_queues == new_num_queues) {
2282 return;
2283 }
2284
2285 /*
2286 * We always need to remove and add ctrl vq if
2287 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2288 * and then we only enter one of the following two loops.
2289 */
2290 virtio_del_queue(vdev, old_num_queues - 1);
2291
2292 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2293 /* new_num_queues < old_num_queues */
2294 virtio_net_del_queue(n, i / 2);
2295 }
2296
2297 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2298 /* new_num_queues > old_num_queues */
2299 virtio_net_add_queue(n, i / 2);
2300 }
2301
2302 /* add ctrl_vq last */
2303 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2304}
2305
2306static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2307{
2308 int max = multiqueue ? n->max_queues : 1;
2309
2310 n->multiqueue = multiqueue;
2311 virtio_net_change_num_queues(n, max);
2312
2313 virtio_net_set_queues(n);
2314}
2315
2316static int virtio_net_post_load_device(void *opaque, int version_id)
2317{
2318 VirtIONet *n = opaque;
2319 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2320 int i, link_down;
2321
2322 trace_virtio_net_post_load_device();
2323 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2324 virtio_vdev_has_feature(vdev,
2325 VIRTIO_F_VERSION_1));
2326
2327 /* MAC_TABLE_ENTRIES may be different from the saved image */
2328 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2329 n->mac_table.in_use = 0;
2330 }
2331
2332 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2333 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2334 }
2335
2336 if (peer_has_vnet_hdr(n)) {
2337 virtio_net_apply_guest_offloads(n);
2338 }
2339
2340 virtio_net_set_queues(n);
2341
2342 /* Find the first multicast entry in the saved MAC filter */
2343 for (i = 0; i < n->mac_table.in_use; i++) {
2344 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2345 break;
2346 }
2347 }
2348 n->mac_table.first_multi = i;
2349
2350 /* nc.link_down can't be migrated, so infer link_down according
2351 * to link status bit in n->status */
2352 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2353 for (i = 0; i < n->max_queues; i++) {
2354 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2355 }
2356
2357 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2358 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2359 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2360 QEMU_CLOCK_VIRTUAL,
2361 virtio_net_announce_timer, n);
2362 if (n->announce_timer.round) {
2363 timer_mod(n->announce_timer.tm,
2364 qemu_clock_get_ms(n->announce_timer.type));
2365 } else {
2366 qemu_announce_timer_del(&n->announce_timer, false);
2367 }
2368 }
2369
2370 return 0;
2371}
2372
2373/* tx_waiting field of a VirtIONetQueue */
2374static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2375 .name = "virtio-net-queue-tx_waiting",
2376 .fields = (VMStateField[]) {
2377 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2378 VMSTATE_END_OF_LIST()
2379 },
2380};
2381
2382static bool max_queues_gt_1(void *opaque, int version_id)
2383{
2384 return VIRTIO_NET(opaque)->max_queues > 1;
2385}
2386
2387static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2388{
2389 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2390 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2391}
2392
2393static bool mac_table_fits(void *opaque, int version_id)
2394{
2395 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2396}
2397
2398static bool mac_table_doesnt_fit(void *opaque, int version_id)
2399{
2400 return !mac_table_fits(opaque, version_id);
2401}
2402
2403/* This temporary type is shared by all the WITH_TMP methods
2404 * although only some fields are used by each.
2405 */
2406struct VirtIONetMigTmp {
2407 VirtIONet *parent;
2408 VirtIONetQueue *vqs_1;
2409 uint16_t curr_queues_1;
2410 uint8_t has_ufo;
2411 uint32_t has_vnet_hdr;
2412};
2413
2414/* The 2nd and subsequent tx_waiting flags are loaded later than
2415 * the 1st entry in the queues and only if there's more than one
2416 * entry. We use the tmp mechanism to calculate a temporary
2417 * pointer and count and also validate the count.
2418 */
2419
2420static int virtio_net_tx_waiting_pre_save(void *opaque)
2421{
2422 struct VirtIONetMigTmp *tmp = opaque;
2423
2424 tmp->vqs_1 = tmp->parent->vqs + 1;
2425 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2426 if (tmp->parent->curr_queues == 0) {
2427 tmp->curr_queues_1 = 0;
2428 }
2429
2430 return 0;
2431}
2432
2433static int virtio_net_tx_waiting_pre_load(void *opaque)
2434{
2435 struct VirtIONetMigTmp *tmp = opaque;
2436
2437 /* Reuse the pointer setup from save */
2438 virtio_net_tx_waiting_pre_save(opaque);
2439
2440 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2441 error_report("virtio-net: curr_queues %x > max_queues %x",
2442 tmp->parent->curr_queues, tmp->parent->max_queues);
2443
2444 return -EINVAL;
2445 }
2446
2447 return 0; /* all good */
2448}
2449
2450static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2451 .name = "virtio-net-tx_waiting",
2452 .pre_load = virtio_net_tx_waiting_pre_load,
2453 .pre_save = virtio_net_tx_waiting_pre_save,
2454 .fields = (VMStateField[]) {
2455 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2456 curr_queues_1,
2457 vmstate_virtio_net_queue_tx_waiting,
2458 struct VirtIONetQueue),
2459 VMSTATE_END_OF_LIST()
2460 },
2461};
2462
2463/* the 'has_ufo' flag is just tested; if the incoming stream has the
2464 * flag set we need to check that we have it
2465 */
2466static int virtio_net_ufo_post_load(void *opaque, int version_id)
2467{
2468 struct VirtIONetMigTmp *tmp = opaque;
2469
2470 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2471 error_report("virtio-net: saved image requires TUN_F_UFO support");
2472 return -EINVAL;
2473 }
2474
2475 return 0;
2476}
2477
2478static int virtio_net_ufo_pre_save(void *opaque)
2479{
2480 struct VirtIONetMigTmp *tmp = opaque;
2481
2482 tmp->has_ufo = tmp->parent->has_ufo;
2483
2484 return 0;
2485}
2486
2487static const VMStateDescription vmstate_virtio_net_has_ufo = {
2488 .name = "virtio-net-ufo",
2489 .post_load = virtio_net_ufo_post_load,
2490 .pre_save = virtio_net_ufo_pre_save,
2491 .fields = (VMStateField[]) {
2492 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2493 VMSTATE_END_OF_LIST()
2494 },
2495};
2496
2497/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2498 * flag set we need to check that we have it
2499 */
2500static int virtio_net_vnet_post_load(void *opaque, int version_id)
2501{
2502 struct VirtIONetMigTmp *tmp = opaque;
2503
2504 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2505 error_report("virtio-net: saved image requires vnet_hdr=on");
2506 return -EINVAL;
2507 }
2508
2509 return 0;
2510}
2511
2512static int virtio_net_vnet_pre_save(void *opaque)
2513{
2514 struct VirtIONetMigTmp *tmp = opaque;
2515
2516 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2517
2518 return 0;
2519}
2520
2521static const VMStateDescription vmstate_virtio_net_has_vnet = {
2522 .name = "virtio-net-vnet",
2523 .post_load = virtio_net_vnet_post_load,
2524 .pre_save = virtio_net_vnet_pre_save,
2525 .fields = (VMStateField[]) {
2526 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2527 VMSTATE_END_OF_LIST()
2528 },
2529};
2530
2531static const VMStateDescription vmstate_virtio_net_device = {
2532 .name = "virtio-net-device",
2533 .version_id = VIRTIO_NET_VM_VERSION,
2534 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2535 .post_load = virtio_net_post_load_device,
2536 .fields = (VMStateField[]) {
2537 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2538 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2539 vmstate_virtio_net_queue_tx_waiting,
2540 VirtIONetQueue),
2541 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2542 VMSTATE_UINT16(status, VirtIONet),
2543 VMSTATE_UINT8(promisc, VirtIONet),
2544 VMSTATE_UINT8(allmulti, VirtIONet),
2545 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2546
2547 /* Guarded pair: If it fits we load it, else we throw it away
2548 * - can happen if source has a larger MAC table.; post-load
2549 * sets flags in this case.
2550 */
2551 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2552 0, mac_table_fits, mac_table.in_use,
2553 ETH_ALEN),
2554 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2555 mac_table.in_use, ETH_ALEN),
2556
2557 /* Note: This is an array of uint32's that's always been saved as a
2558 * buffer; hold onto your endiannesses; it's actually used as a bitmap
2559 * but based on the uint.
2560 */
2561 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2562 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2563 vmstate_virtio_net_has_vnet),
2564 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2565 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2566 VMSTATE_UINT8(alluni, VirtIONet),
2567 VMSTATE_UINT8(nomulti, VirtIONet),
2568 VMSTATE_UINT8(nouni, VirtIONet),
2569 VMSTATE_UINT8(nobcast, VirtIONet),
2570 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2571 vmstate_virtio_net_has_ufo),
2572 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2573 vmstate_info_uint16_equal, uint16_t),
2574 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2575 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2576 vmstate_virtio_net_tx_waiting),
2577 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2578 has_ctrl_guest_offloads),
2579 VMSTATE_END_OF_LIST()
2580 },
2581};
2582
2583static NetClientInfo net_virtio_info = {
2584 .type = NET_CLIENT_DRIVER_NIC,
2585 .size = sizeof(NICState),
2586 .can_receive = virtio_net_can_receive,
2587 .receive = virtio_net_receive,
2588 .link_status_changed = virtio_net_set_link_status,
2589 .query_rx_filter = virtio_net_query_rxfilter,
2590 .announce = virtio_net_announce,
2591};
2592
2593static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2594{
2595 VirtIONet *n = VIRTIO_NET(vdev);
2596 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2597 assert(n->vhost_started);
2598 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
2599}
2600
2601static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2602 bool mask)
2603{
2604 VirtIONet *n = VIRTIO_NET(vdev);
2605 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2606 assert(n->vhost_started);
2607 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
2608 vdev, idx, mask);
2609}
2610
2611static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
2612{
2613 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
2614
2615 n->config_size = virtio_feature_get_config_size(feature_sizes,
2616 host_features);
2617}
2618
2619void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2620 const char *type)
2621{
2622 /*
2623 * The name can be NULL, the netclient name will be type.x.
2624 */
2625 assert(type != NULL);
2626
2627 g_free(n->netclient_name);
2628 g_free(n->netclient_type);
2629 n->netclient_name = g_strdup(name);
2630 n->netclient_type = g_strdup(type);
2631}
2632
2633static void virtio_net_device_realize(DeviceState *dev, Error **errp)
2634{
2635 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2636 VirtIONet *n = VIRTIO_NET(dev);
2637 NetClientState *nc;
2638 int i;
2639
2640 if (n->net_conf.mtu) {
2641 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
2642 }
2643
2644 if (n->net_conf.duplex_str) {
2645 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2646 n->net_conf.duplex = DUPLEX_HALF;
2647 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2648 n->net_conf.duplex = DUPLEX_FULL;
2649 } else {
2650 error_setg(errp, "'duplex' must be 'half' or 'full'");
2651 }
2652 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2653 } else {
2654 n->net_conf.duplex = DUPLEX_UNKNOWN;
2655 }
2656
2657 if (n->net_conf.speed < SPEED_UNKNOWN) {
2658 error_setg(errp, "'speed' must be between 0 and INT_MAX");
2659 } else if (n->net_conf.speed >= 0) {
2660 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2661 }
2662
2663 virtio_net_set_config_size(n, n->host_features);
2664 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
2665
2666 /*
2667 * We set a lower limit on RX queue size to what it always was.
2668 * Guests that want a smaller ring can always resize it without
2669 * help from us (using virtio 1 and up).
2670 */
2671 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2672 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
2673 !is_power_of_2(n->net_conf.rx_queue_size)) {
2674 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2675 "must be a power of 2 between %d and %d.",
2676 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2677 VIRTQUEUE_MAX_SIZE);
2678 virtio_cleanup(vdev);
2679 return;
2680 }
2681
2682 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2683 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2684 !is_power_of_2(n->net_conf.tx_queue_size)) {
2685 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2686 "must be a power of 2 between %d and %d",
2687 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2688 VIRTQUEUE_MAX_SIZE);
2689 virtio_cleanup(vdev);
2690 return;
2691 }
2692
2693 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
2694 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
2695 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
2696 "must be a positive integer less than %d.",
2697 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
2698 virtio_cleanup(vdev);
2699 return;
2700 }
2701 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
2702 n->curr_queues = 1;
2703 n->tx_timeout = n->net_conf.txtimer;
2704
2705 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2706 && strcmp(n->net_conf.tx, "bh")) {
2707 warn_report("virtio-net: "
2708 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2709 n->net_conf.tx);
2710 error_printf("Defaulting to \"bh\"");
2711 }
2712
2713 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
2714 n->net_conf.tx_queue_size);
2715
2716 for (i = 0; i < n->max_queues; i++) {
2717 virtio_net_add_queue(n, i);
2718 }
2719
2720 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2721 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2722 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
2723 n->status = VIRTIO_NET_S_LINK_UP;
2724 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2725 QEMU_CLOCK_VIRTUAL,
2726 virtio_net_announce_timer, n);
2727 n->announce_timer.round = 0;
2728
2729 if (n->netclient_type) {
2730 /*
2731 * Happen when virtio_net_set_netclient_name has been called.
2732 */
2733 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2734 n->netclient_type, n->netclient_name, n);
2735 } else {
2736 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2737 object_get_typename(OBJECT(dev)), dev->id, n);
2738 }
2739
2740 peer_test_vnet_hdr(n);
2741 if (peer_has_vnet_hdr(n)) {
2742 for (i = 0; i < n->max_queues; i++) {
2743 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
2744 }
2745 n->host_hdr_len = sizeof(struct virtio_net_hdr);
2746 } else {
2747 n->host_hdr_len = 0;
2748 }
2749
2750 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
2751
2752 n->vqs[0].tx_waiting = 0;
2753 n->tx_burst = n->net_conf.txburst;
2754 virtio_net_set_mrg_rx_bufs(n, 0, 0);
2755 n->promisc = 1; /* for compatibility */
2756
2757 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
2758
2759 n->vlans = g_malloc0(MAX_VLAN >> 3);
2760
2761 nc = qemu_get_queue(n->nic);
2762 nc->rxfilter_notify_enabled = 1;
2763
2764 QTAILQ_INIT(&n->rsc_chains);
2765 n->qdev = dev;
2766}
2767
2768static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
2769{
2770 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2771 VirtIONet *n = VIRTIO_NET(dev);
2772 int i, max_queues;
2773
2774 /* This will stop vhost backend if appropriate. */
2775 virtio_net_set_status(vdev, 0);
2776
2777 g_free(n->netclient_name);
2778 n->netclient_name = NULL;
2779 g_free(n->netclient_type);
2780 n->netclient_type = NULL;
2781
2782 g_free(n->mac_table.macs);
2783 g_free(n->vlans);
2784
2785 max_queues = n->multiqueue ? n->max_queues : 1;
2786 for (i = 0; i < max_queues; i++) {
2787 virtio_net_del_queue(n, i);
2788 }
2789
2790 qemu_announce_timer_del(&n->announce_timer, false);
2791 g_free(n->vqs);
2792 qemu_del_nic(n->nic);
2793 virtio_net_rsc_cleanup(n);
2794 virtio_cleanup(vdev);
2795}
2796
2797static void virtio_net_instance_init(Object *obj)
2798{
2799 VirtIONet *n = VIRTIO_NET(obj);
2800
2801 /*
2802 * The default config_size is sizeof(struct virtio_net_config).
2803 * Can be overriden with virtio_net_set_config_size.
2804 */
2805 n->config_size = sizeof(struct virtio_net_config);
2806 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2807 "bootindex", "/ethernet-phy@0",
2808 DEVICE(n), NULL);
2809}
2810
2811static int virtio_net_pre_save(void *opaque)
2812{
2813 VirtIONet *n = opaque;
2814
2815 /* At this point, backend must be stopped, otherwise
2816 * it might keep writing to memory. */
2817 assert(!n->vhost_started);
2818
2819 return 0;
2820}
2821
2822static const VMStateDescription vmstate_virtio_net = {
2823 .name = "virtio-net",
2824 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2825 .version_id = VIRTIO_NET_VM_VERSION,
2826 .fields = (VMStateField[]) {
2827 VMSTATE_VIRTIO_DEVICE,
2828 VMSTATE_END_OF_LIST()
2829 },
2830 .pre_save = virtio_net_pre_save,
2831};
2832
2833static Property virtio_net_properties[] = {
2834 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
2835 VIRTIO_NET_F_CSUM, true),
2836 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
2837 VIRTIO_NET_F_GUEST_CSUM, true),
2838 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2839 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
2840 VIRTIO_NET_F_GUEST_TSO4, true),
2841 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
2842 VIRTIO_NET_F_GUEST_TSO6, true),
2843 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
2844 VIRTIO_NET_F_GUEST_ECN, true),
2845 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
2846 VIRTIO_NET_F_GUEST_UFO, true),
2847 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
2848 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
2849 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
2850 VIRTIO_NET_F_HOST_TSO4, true),
2851 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
2852 VIRTIO_NET_F_HOST_TSO6, true),
2853 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
2854 VIRTIO_NET_F_HOST_ECN, true),
2855 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
2856 VIRTIO_NET_F_HOST_UFO, true),
2857 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
2858 VIRTIO_NET_F_MRG_RXBUF, true),
2859 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
2860 VIRTIO_NET_F_STATUS, true),
2861 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
2862 VIRTIO_NET_F_CTRL_VQ, true),
2863 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
2864 VIRTIO_NET_F_CTRL_RX, true),
2865 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
2866 VIRTIO_NET_F_CTRL_VLAN, true),
2867 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
2868 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
2869 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
2870 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
2871 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
2872 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
2873 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2874 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
2875 VIRTIO_NET_F_RSC_EXT, false),
2876 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
2877 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
2878 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2879 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
2880 TX_TIMER_INTERVAL),
2881 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2882 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
2883 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2884 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
2885 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2886 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
2887 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
2888 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2889 true),
2890 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
2891 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
2892 DEFINE_PROP_END_OF_LIST(),
2893};
2894
2895static void virtio_net_class_init(ObjectClass *klass, void *data)
2896{
2897 DeviceClass *dc = DEVICE_CLASS(klass);
2898 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2899
2900 dc->props = virtio_net_properties;
2901 dc->vmsd = &vmstate_virtio_net;
2902 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2903 vdc->realize = virtio_net_device_realize;
2904 vdc->unrealize = virtio_net_device_unrealize;
2905 vdc->get_config = virtio_net_get_config;
2906 vdc->set_config = virtio_net_set_config;
2907 vdc->get_features = virtio_net_get_features;
2908 vdc->set_features = virtio_net_set_features;
2909 vdc->bad_features = virtio_net_bad_features;
2910 vdc->reset = virtio_net_reset;
2911 vdc->set_status = virtio_net_set_status;
2912 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2913 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2914 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
2915 vdc->vmsd = &vmstate_virtio_net_device;
2916}
2917
2918static const TypeInfo virtio_net_info = {
2919 .name = TYPE_VIRTIO_NET,
2920 .parent = TYPE_VIRTIO_DEVICE,
2921 .instance_size = sizeof(VirtIONet),
2922 .instance_init = virtio_net_instance_init,
2923 .class_init = virtio_net_class_init,
2924};
2925
2926static void virtio_register_types(void)
2927{
2928 type_register_static(&virtio_net_info);
2929}
2930
2931type_init(virtio_register_types)
2932