1 | /* |
2 | * vhost-net support |
3 | * |
4 | * Copyright Red Hat, Inc. 2010 |
5 | * |
6 | * Authors: |
7 | * Michael S. Tsirkin <mst@redhat.com> |
8 | * |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See |
10 | * the COPYING file in the top-level directory. |
11 | * |
12 | * Contributions after 2012-01-13 are licensed under the terms of the |
13 | * GNU GPL, version 2 or (at your option) any later version. |
14 | */ |
15 | |
16 | #include "qemu/osdep.h" |
17 | #include "net/net.h" |
18 | #include "net/tap.h" |
19 | #include "net/vhost-user.h" |
20 | |
21 | #include "standard-headers/linux/vhost_types.h" |
22 | #include "hw/virtio/virtio-net.h" |
23 | #include "net/vhost_net.h" |
24 | #include "qemu/error-report.h" |
25 | #include "qemu/main-loop.h" |
26 | |
27 | #include <sys/socket.h> |
28 | #include <net/if.h> |
29 | #include <netinet/in.h> |
30 | |
31 | |
32 | #include "standard-headers/linux/virtio_ring.h" |
33 | #include "hw/virtio/vhost.h" |
34 | #include "hw/virtio/virtio-bus.h" |
35 | |
36 | struct vhost_net { |
37 | struct vhost_dev dev; |
38 | struct vhost_virtqueue vqs[2]; |
39 | int backend; |
40 | NetClientState *nc; |
41 | }; |
42 | |
43 | /* Features supported by host kernel. */ |
44 | static const int kernel_feature_bits[] = { |
45 | VIRTIO_F_NOTIFY_ON_EMPTY, |
46 | VIRTIO_RING_F_INDIRECT_DESC, |
47 | VIRTIO_RING_F_EVENT_IDX, |
48 | VIRTIO_NET_F_MRG_RXBUF, |
49 | VIRTIO_F_VERSION_1, |
50 | VIRTIO_NET_F_MTU, |
51 | VIRTIO_F_IOMMU_PLATFORM, |
52 | VHOST_INVALID_FEATURE_BIT |
53 | }; |
54 | |
55 | /* Features supported by others. */ |
56 | static const int user_feature_bits[] = { |
57 | VIRTIO_F_NOTIFY_ON_EMPTY, |
58 | VIRTIO_RING_F_INDIRECT_DESC, |
59 | VIRTIO_RING_F_EVENT_IDX, |
60 | |
61 | VIRTIO_F_ANY_LAYOUT, |
62 | VIRTIO_F_VERSION_1, |
63 | VIRTIO_NET_F_CSUM, |
64 | VIRTIO_NET_F_GUEST_CSUM, |
65 | VIRTIO_NET_F_GSO, |
66 | VIRTIO_NET_F_GUEST_TSO4, |
67 | VIRTIO_NET_F_GUEST_TSO6, |
68 | VIRTIO_NET_F_GUEST_ECN, |
69 | VIRTIO_NET_F_GUEST_UFO, |
70 | VIRTIO_NET_F_HOST_TSO4, |
71 | VIRTIO_NET_F_HOST_TSO6, |
72 | VIRTIO_NET_F_HOST_ECN, |
73 | VIRTIO_NET_F_HOST_UFO, |
74 | VIRTIO_NET_F_MRG_RXBUF, |
75 | VIRTIO_NET_F_MTU, |
76 | VIRTIO_F_IOMMU_PLATFORM, |
77 | |
78 | /* This bit implies RARP isn't sent by QEMU out of band */ |
79 | VIRTIO_NET_F_GUEST_ANNOUNCE, |
80 | |
81 | VIRTIO_NET_F_MQ, |
82 | |
83 | VHOST_INVALID_FEATURE_BIT |
84 | }; |
85 | |
86 | static const int *vhost_net_get_feature_bits(struct vhost_net *net) |
87 | { |
88 | const int *feature_bits = 0; |
89 | |
90 | switch (net->nc->info->type) { |
91 | case NET_CLIENT_DRIVER_TAP: |
92 | feature_bits = kernel_feature_bits; |
93 | break; |
94 | case NET_CLIENT_DRIVER_VHOST_USER: |
95 | feature_bits = user_feature_bits; |
96 | break; |
97 | default: |
98 | error_report("Feature bits not defined for this type: %d" , |
99 | net->nc->info->type); |
100 | break; |
101 | } |
102 | |
103 | return feature_bits; |
104 | } |
105 | |
106 | uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) |
107 | { |
108 | return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), |
109 | features); |
110 | } |
111 | |
112 | void vhost_net_ack_features(struct vhost_net *net, uint64_t features) |
113 | { |
114 | net->dev.acked_features = net->dev.backend_features; |
115 | vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); |
116 | } |
117 | |
118 | uint64_t vhost_net_get_max_queues(VHostNetState *net) |
119 | { |
120 | return net->dev.max_queues; |
121 | } |
122 | |
123 | uint64_t vhost_net_get_acked_features(VHostNetState *net) |
124 | { |
125 | return net->dev.acked_features; |
126 | } |
127 | |
128 | static int vhost_net_get_fd(NetClientState *backend) |
129 | { |
130 | switch (backend->info->type) { |
131 | case NET_CLIENT_DRIVER_TAP: |
132 | return tap_get_fd(backend); |
133 | default: |
134 | fprintf(stderr, "vhost-net requires tap backend\n" ); |
135 | return -ENOSYS; |
136 | } |
137 | } |
138 | |
139 | struct vhost_net *vhost_net_init(VhostNetOptions *options) |
140 | { |
141 | int r; |
142 | bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; |
143 | struct vhost_net *net = g_new0(struct vhost_net, 1); |
144 | uint64_t features = 0; |
145 | |
146 | if (!options->net_backend) { |
147 | fprintf(stderr, "vhost-net requires net backend to be setup\n" ); |
148 | goto fail; |
149 | } |
150 | net->nc = options->net_backend; |
151 | |
152 | net->dev.max_queues = 1; |
153 | net->dev.nvqs = 2; |
154 | net->dev.vqs = net->vqs; |
155 | |
156 | if (backend_kernel) { |
157 | r = vhost_net_get_fd(options->net_backend); |
158 | if (r < 0) { |
159 | goto fail; |
160 | } |
161 | net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend) |
162 | ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR); |
163 | net->backend = r; |
164 | net->dev.protocol_features = 0; |
165 | } else { |
166 | net->dev.backend_features = 0; |
167 | net->dev.protocol_features = 0; |
168 | net->backend = -1; |
169 | |
170 | /* vhost-user needs vq_index to initiate a specific queue pair */ |
171 | net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; |
172 | } |
173 | |
174 | r = vhost_dev_init(&net->dev, options->opaque, |
175 | options->backend_type, options->busyloop_timeout); |
176 | if (r < 0) { |
177 | goto fail; |
178 | } |
179 | if (backend_kernel) { |
180 | if (!qemu_has_vnet_hdr_len(options->net_backend, |
181 | sizeof(struct virtio_net_hdr_mrg_rxbuf))) { |
182 | net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); |
183 | } |
184 | if (~net->dev.features & net->dev.backend_features) { |
185 | fprintf(stderr, "vhost lacks feature mask %" PRIu64 |
186 | " for backend\n" , |
187 | (uint64_t)(~net->dev.features & net->dev.backend_features)); |
188 | goto fail; |
189 | } |
190 | } |
191 | |
192 | /* Set sane init value. Override when guest acks. */ |
193 | #ifdef CONFIG_VHOST_NET_USER |
194 | if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { |
195 | features = vhost_user_get_acked_features(net->nc); |
196 | if (~net->dev.features & features) { |
197 | fprintf(stderr, "vhost lacks feature mask %" PRIu64 |
198 | " for backend\n" , |
199 | (uint64_t)(~net->dev.features & features)); |
200 | goto fail; |
201 | } |
202 | } |
203 | #endif |
204 | |
205 | vhost_net_ack_features(net, features); |
206 | |
207 | return net; |
208 | |
209 | fail: |
210 | vhost_dev_cleanup(&net->dev); |
211 | g_free(net); |
212 | return NULL; |
213 | } |
214 | |
215 | static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index) |
216 | { |
217 | net->dev.vq_index = vq_index; |
218 | } |
219 | |
220 | static int vhost_net_start_one(struct vhost_net *net, |
221 | VirtIODevice *dev) |
222 | { |
223 | struct vhost_vring_file file = { }; |
224 | int r; |
225 | |
226 | net->dev.nvqs = 2; |
227 | net->dev.vqs = net->vqs; |
228 | |
229 | r = vhost_dev_enable_notifiers(&net->dev, dev); |
230 | if (r < 0) { |
231 | goto fail_notifiers; |
232 | } |
233 | |
234 | r = vhost_dev_start(&net->dev, dev); |
235 | if (r < 0) { |
236 | goto fail_start; |
237 | } |
238 | |
239 | if (net->nc->info->poll) { |
240 | net->nc->info->poll(net->nc, false); |
241 | } |
242 | |
243 | if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { |
244 | qemu_set_fd_handler(net->backend, NULL, NULL, NULL); |
245 | file.fd = net->backend; |
246 | for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { |
247 | if (!virtio_queue_enabled(dev, net->dev.vq_index + |
248 | file.index)) { |
249 | /* Queue might not be ready for start */ |
250 | continue; |
251 | } |
252 | r = vhost_net_set_backend(&net->dev, &file); |
253 | if (r < 0) { |
254 | r = -errno; |
255 | goto fail; |
256 | } |
257 | } |
258 | } |
259 | return 0; |
260 | fail: |
261 | file.fd = -1; |
262 | if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { |
263 | while (file.index-- > 0) { |
264 | if (!virtio_queue_enabled(dev, net->dev.vq_index + |
265 | file.index)) { |
266 | /* Queue might not be ready for start */ |
267 | continue; |
268 | } |
269 | int r = vhost_net_set_backend(&net->dev, &file); |
270 | assert(r >= 0); |
271 | } |
272 | } |
273 | if (net->nc->info->poll) { |
274 | net->nc->info->poll(net->nc, true); |
275 | } |
276 | vhost_dev_stop(&net->dev, dev); |
277 | fail_start: |
278 | vhost_dev_disable_notifiers(&net->dev, dev); |
279 | fail_notifiers: |
280 | return r; |
281 | } |
282 | |
283 | static void vhost_net_stop_one(struct vhost_net *net, |
284 | VirtIODevice *dev) |
285 | { |
286 | struct vhost_vring_file file = { .fd = -1 }; |
287 | |
288 | if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { |
289 | for (file.index = 0; file.index < net->dev.nvqs; ++file.index) { |
290 | int r = vhost_net_set_backend(&net->dev, &file); |
291 | assert(r >= 0); |
292 | } |
293 | } |
294 | if (net->nc->info->poll) { |
295 | net->nc->info->poll(net->nc, true); |
296 | } |
297 | vhost_dev_stop(&net->dev, dev); |
298 | vhost_dev_disable_notifiers(&net->dev, dev); |
299 | } |
300 | |
301 | int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, |
302 | int total_queues) |
303 | { |
304 | BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); |
305 | VirtioBusState *vbus = VIRTIO_BUS(qbus); |
306 | VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); |
307 | int r, e, i; |
308 | |
309 | if (!k->set_guest_notifiers) { |
310 | error_report("binding does not support guest notifiers" ); |
311 | return -ENOSYS; |
312 | } |
313 | |
314 | for (i = 0; i < total_queues; i++) { |
315 | struct vhost_net *net; |
316 | |
317 | net = get_vhost_net(ncs[i].peer); |
318 | vhost_net_set_vq_index(net, i * 2); |
319 | |
320 | /* Suppress the masking guest notifiers on vhost user |
321 | * because vhost user doesn't interrupt masking/unmasking |
322 | * properly. |
323 | */ |
324 | if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { |
325 | dev->use_guest_notifier_mask = false; |
326 | } |
327 | } |
328 | |
329 | r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true); |
330 | if (r < 0) { |
331 | error_report("Error binding guest notifier: %d" , -r); |
332 | goto err; |
333 | } |
334 | |
335 | for (i = 0; i < total_queues; i++) { |
336 | r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev); |
337 | |
338 | if (r < 0) { |
339 | goto err_start; |
340 | } |
341 | |
342 | if (ncs[i].peer->vring_enable) { |
343 | /* restore vring enable state */ |
344 | r = vhost_set_vring_enable(ncs[i].peer, ncs[i].peer->vring_enable); |
345 | |
346 | if (r < 0) { |
347 | goto err_start; |
348 | } |
349 | } |
350 | } |
351 | |
352 | return 0; |
353 | |
354 | err_start: |
355 | while (--i >= 0) { |
356 | vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev); |
357 | } |
358 | e = k->set_guest_notifiers(qbus->parent, total_queues * 2, false); |
359 | if (e < 0) { |
360 | fprintf(stderr, "vhost guest notifier cleanup failed: %d\n" , e); |
361 | fflush(stderr); |
362 | } |
363 | err: |
364 | return r; |
365 | } |
366 | |
367 | void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, |
368 | int total_queues) |
369 | { |
370 | BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); |
371 | VirtioBusState *vbus = VIRTIO_BUS(qbus); |
372 | VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); |
373 | int i, r; |
374 | |
375 | for (i = 0; i < total_queues; i++) { |
376 | vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev); |
377 | } |
378 | |
379 | r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false); |
380 | if (r < 0) { |
381 | fprintf(stderr, "vhost guest notifier cleanup failed: %d\n" , r); |
382 | fflush(stderr); |
383 | } |
384 | assert(r >= 0); |
385 | } |
386 | |
387 | void vhost_net_cleanup(struct vhost_net *net) |
388 | { |
389 | vhost_dev_cleanup(&net->dev); |
390 | } |
391 | |
392 | int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) |
393 | { |
394 | const VhostOps *vhost_ops = net->dev.vhost_ops; |
395 | |
396 | assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); |
397 | assert(vhost_ops->vhost_migration_done); |
398 | |
399 | return vhost_ops->vhost_migration_done(&net->dev, mac_addr); |
400 | } |
401 | |
402 | bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) |
403 | { |
404 | return vhost_virtqueue_pending(&net->dev, idx); |
405 | } |
406 | |
407 | void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, |
408 | int idx, bool mask) |
409 | { |
410 | vhost_virtqueue_mask(&net->dev, dev, idx, mask); |
411 | } |
412 | |
413 | VHostNetState *get_vhost_net(NetClientState *nc) |
414 | { |
415 | VHostNetState *vhost_net = 0; |
416 | |
417 | if (!nc) { |
418 | return 0; |
419 | } |
420 | |
421 | switch (nc->info->type) { |
422 | case NET_CLIENT_DRIVER_TAP: |
423 | vhost_net = tap_get_vhost_net(nc); |
424 | break; |
425 | #ifdef CONFIG_VHOST_NET_USER |
426 | case NET_CLIENT_DRIVER_VHOST_USER: |
427 | vhost_net = vhost_user_get_vhost_net(nc); |
428 | assert(vhost_net); |
429 | break; |
430 | #endif |
431 | default: |
432 | break; |
433 | } |
434 | |
435 | return vhost_net; |
436 | } |
437 | |
438 | int vhost_set_vring_enable(NetClientState *nc, int enable) |
439 | { |
440 | VHostNetState *net = get_vhost_net(nc); |
441 | const VhostOps *vhost_ops = net->dev.vhost_ops; |
442 | |
443 | nc->vring_enable = enable; |
444 | |
445 | if (vhost_ops && vhost_ops->vhost_set_vring_enable) { |
446 | return vhost_ops->vhost_set_vring_enable(&net->dev, enable); |
447 | } |
448 | |
449 | return 0; |
450 | } |
451 | |
452 | int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) |
453 | { |
454 | const VhostOps *vhost_ops = net->dev.vhost_ops; |
455 | |
456 | if (!vhost_ops->vhost_net_set_mtu) { |
457 | return 0; |
458 | } |
459 | |
460 | return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); |
461 | } |
462 | |