1/*
2 * vhost-user
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11#include "qemu/osdep.h"
12#include "qapi/error.h"
13#include "hw/virtio/vhost.h"
14#include "hw/virtio/vhost-user.h"
15#include "hw/virtio/vhost-backend.h"
16#include "hw/virtio/virtio.h"
17#include "hw/virtio/virtio-net.h"
18#include "chardev/char-fe.h"
19#include "sysemu/kvm.h"
20#include "qemu/error-report.h"
21#include "qemu/main-loop.h"
22#include "qemu/sockets.h"
23#include "sysemu/cryptodev.h"
24#include "migration/migration.h"
25#include "migration/postcopy-ram.h"
26#include "trace.h"
27
28#include <sys/ioctl.h>
29#include <sys/socket.h>
30#include <sys/un.h>
31
32#include "standard-headers/linux/vhost_types.h"
33
34#ifdef CONFIG_LINUX
35#include <linux/userfaultfd.h>
36#endif
37
38#define VHOST_MEMORY_MAX_NREGIONS 8
39#define VHOST_USER_F_PROTOCOL_FEATURES 30
40#define VHOST_USER_SLAVE_MAX_FDS 8
41
42/*
43 * Maximum size of virtio device config space
44 */
45#define VHOST_USER_MAX_CONFIG_SIZE 256
46
47enum VhostUserProtocolFeature {
48 VHOST_USER_PROTOCOL_F_MQ = 0,
49 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
50 VHOST_USER_PROTOCOL_F_RARP = 2,
51 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
52 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
53 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
54 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
55 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
56 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
57 VHOST_USER_PROTOCOL_F_CONFIG = 9,
58 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
59 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
60 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
61 VHOST_USER_PROTOCOL_F_MAX
62};
63
64#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
65
66typedef enum VhostUserRequest {
67 VHOST_USER_NONE = 0,
68 VHOST_USER_GET_FEATURES = 1,
69 VHOST_USER_SET_FEATURES = 2,
70 VHOST_USER_SET_OWNER = 3,
71 VHOST_USER_RESET_OWNER = 4,
72 VHOST_USER_SET_MEM_TABLE = 5,
73 VHOST_USER_SET_LOG_BASE = 6,
74 VHOST_USER_SET_LOG_FD = 7,
75 VHOST_USER_SET_VRING_NUM = 8,
76 VHOST_USER_SET_VRING_ADDR = 9,
77 VHOST_USER_SET_VRING_BASE = 10,
78 VHOST_USER_GET_VRING_BASE = 11,
79 VHOST_USER_SET_VRING_KICK = 12,
80 VHOST_USER_SET_VRING_CALL = 13,
81 VHOST_USER_SET_VRING_ERR = 14,
82 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
83 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
84 VHOST_USER_GET_QUEUE_NUM = 17,
85 VHOST_USER_SET_VRING_ENABLE = 18,
86 VHOST_USER_SEND_RARP = 19,
87 VHOST_USER_NET_SET_MTU = 20,
88 VHOST_USER_SET_SLAVE_REQ_FD = 21,
89 VHOST_USER_IOTLB_MSG = 22,
90 VHOST_USER_SET_VRING_ENDIAN = 23,
91 VHOST_USER_GET_CONFIG = 24,
92 VHOST_USER_SET_CONFIG = 25,
93 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
94 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
95 VHOST_USER_POSTCOPY_ADVISE = 28,
96 VHOST_USER_POSTCOPY_LISTEN = 29,
97 VHOST_USER_POSTCOPY_END = 30,
98 VHOST_USER_GET_INFLIGHT_FD = 31,
99 VHOST_USER_SET_INFLIGHT_FD = 32,
100 VHOST_USER_GPU_SET_SOCKET = 33,
101 VHOST_USER_MAX
102} VhostUserRequest;
103
104typedef enum VhostUserSlaveRequest {
105 VHOST_USER_SLAVE_NONE = 0,
106 VHOST_USER_SLAVE_IOTLB_MSG = 1,
107 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
108 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
109 VHOST_USER_SLAVE_MAX
110} VhostUserSlaveRequest;
111
112typedef struct VhostUserMemoryRegion {
113 uint64_t guest_phys_addr;
114 uint64_t memory_size;
115 uint64_t userspace_addr;
116 uint64_t mmap_offset;
117} VhostUserMemoryRegion;
118
119typedef struct VhostUserMemory {
120 uint32_t nregions;
121 uint32_t padding;
122 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
123} VhostUserMemory;
124
125typedef struct VhostUserLog {
126 uint64_t mmap_size;
127 uint64_t mmap_offset;
128} VhostUserLog;
129
130typedef struct VhostUserConfig {
131 uint32_t offset;
132 uint32_t size;
133 uint32_t flags;
134 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
135} VhostUserConfig;
136
137#define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512
138#define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64
139
140typedef struct VhostUserCryptoSession {
141 /* session id for success, -1 on errors */
142 int64_t session_id;
143 CryptoDevBackendSymSessionInfo session_setup_data;
144 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
145 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
146} VhostUserCryptoSession;
147
148static VhostUserConfig c __attribute__ ((unused));
149#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
150 + sizeof(c.size) \
151 + sizeof(c.flags))
152
153typedef struct VhostUserVringArea {
154 uint64_t u64;
155 uint64_t size;
156 uint64_t offset;
157} VhostUserVringArea;
158
159typedef struct VhostUserInflight {
160 uint64_t mmap_size;
161 uint64_t mmap_offset;
162 uint16_t num_queues;
163 uint16_t queue_size;
164} VhostUserInflight;
165
166typedef struct {
167 VhostUserRequest request;
168
169#define VHOST_USER_VERSION_MASK (0x3)
170#define VHOST_USER_REPLY_MASK (0x1<<2)
171#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
172 uint32_t flags;
173 uint32_t size; /* the following payload size */
174} QEMU_PACKED VhostUserHeader;
175
176typedef union {
177#define VHOST_USER_VRING_IDX_MASK (0xff)
178#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
179 uint64_t u64;
180 struct vhost_vring_state state;
181 struct vhost_vring_addr addr;
182 VhostUserMemory memory;
183 VhostUserLog log;
184 struct vhost_iotlb_msg iotlb;
185 VhostUserConfig config;
186 VhostUserCryptoSession session;
187 VhostUserVringArea area;
188 VhostUserInflight inflight;
189} VhostUserPayload;
190
191typedef struct VhostUserMsg {
192 VhostUserHeader hdr;
193 VhostUserPayload payload;
194} QEMU_PACKED VhostUserMsg;
195
196static VhostUserMsg m __attribute__ ((unused));
197#define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
198
199#define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
200
201/* The version of the protocol we support */
202#define VHOST_USER_VERSION (0x1)
203
204struct vhost_user {
205 struct vhost_dev *dev;
206 /* Shared between vhost devs of the same virtio device */
207 VhostUserState *user;
208 int slave_fd;
209 NotifierWithReturn postcopy_notifier;
210 struct PostCopyFD postcopy_fd;
211 uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
212 /* Length of the region_rb and region_rb_offset arrays */
213 size_t region_rb_len;
214 /* RAMBlock associated with a given region */
215 RAMBlock **region_rb;
216 /* The offset from the start of the RAMBlock to the start of the
217 * vhost region.
218 */
219 ram_addr_t *region_rb_offset;
220
221 /* True once we've entered postcopy_listen */
222 bool postcopy_listen;
223};
224
225static bool ioeventfd_enabled(void)
226{
227 return !kvm_enabled() || kvm_eventfds_enabled();
228}
229
230static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
231{
232 struct vhost_user *u = dev->opaque;
233 CharBackend *chr = u->user->chr;
234 uint8_t *p = (uint8_t *) msg;
235 int r, size = VHOST_USER_HDR_SIZE;
236
237 r = qemu_chr_fe_read_all(chr, p, size);
238 if (r != size) {
239 error_report("Failed to read msg header. Read %d instead of %d."
240 " Original request %d.", r, size, msg->hdr.request);
241 return -1;
242 }
243
244 /* validate received flags */
245 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
246 error_report("Failed to read msg header."
247 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
248 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
249 return -1;
250 }
251
252 return 0;
253}
254
255static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
256{
257 struct vhost_user *u = dev->opaque;
258 CharBackend *chr = u->user->chr;
259 uint8_t *p = (uint8_t *) msg;
260 int r, size;
261
262 if (vhost_user_read_header(dev, msg) < 0) {
263 return -1;
264 }
265
266 /* validate message size is sane */
267 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
268 error_report("Failed to read msg header."
269 " Size %d exceeds the maximum %zu.", msg->hdr.size,
270 VHOST_USER_PAYLOAD_SIZE);
271 return -1;
272 }
273
274 if (msg->hdr.size) {
275 p += VHOST_USER_HDR_SIZE;
276 size = msg->hdr.size;
277 r = qemu_chr_fe_read_all(chr, p, size);
278 if (r != size) {
279 error_report("Failed to read msg payload."
280 " Read %d instead of %d.", r, msg->hdr.size);
281 return -1;
282 }
283 }
284
285 return 0;
286}
287
288static int process_message_reply(struct vhost_dev *dev,
289 const VhostUserMsg *msg)
290{
291 VhostUserMsg msg_reply;
292
293 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
294 return 0;
295 }
296
297 if (vhost_user_read(dev, &msg_reply) < 0) {
298 return -1;
299 }
300
301 if (msg_reply.hdr.request != msg->hdr.request) {
302 error_report("Received unexpected msg type."
303 "Expected %d received %d",
304 msg->hdr.request, msg_reply.hdr.request);
305 return -1;
306 }
307
308 return msg_reply.payload.u64 ? -1 : 0;
309}
310
311static bool vhost_user_one_time_request(VhostUserRequest request)
312{
313 switch (request) {
314 case VHOST_USER_SET_OWNER:
315 case VHOST_USER_RESET_OWNER:
316 case VHOST_USER_SET_MEM_TABLE:
317 case VHOST_USER_GET_QUEUE_NUM:
318 case VHOST_USER_NET_SET_MTU:
319 return true;
320 default:
321 return false;
322 }
323}
324
325/* most non-init callers ignore the error */
326static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
327 int *fds, int fd_num)
328{
329 struct vhost_user *u = dev->opaque;
330 CharBackend *chr = u->user->chr;
331 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
332
333 /*
334 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
335 * we just need send it once in the first time. For later such
336 * request, we just ignore it.
337 */
338 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
339 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
340 return 0;
341 }
342
343 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
344 error_report("Failed to set msg fds.");
345 return -1;
346 }
347
348 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
349 if (ret != size) {
350 error_report("Failed to write msg."
351 " Wrote %d instead of %d.", ret, size);
352 return -1;
353 }
354
355 return 0;
356}
357
358int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
359{
360 VhostUserMsg msg = {
361 .hdr.request = VHOST_USER_GPU_SET_SOCKET,
362 .hdr.flags = VHOST_USER_VERSION,
363 };
364
365 return vhost_user_write(dev, &msg, &fd, 1);
366}
367
368static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
369 struct vhost_log *log)
370{
371 int fds[VHOST_MEMORY_MAX_NREGIONS];
372 size_t fd_num = 0;
373 bool shmfd = virtio_has_feature(dev->protocol_features,
374 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
375 VhostUserMsg msg = {
376 .hdr.request = VHOST_USER_SET_LOG_BASE,
377 .hdr.flags = VHOST_USER_VERSION,
378 .payload.log.mmap_size = log->size * sizeof(*(log->log)),
379 .payload.log.mmap_offset = 0,
380 .hdr.size = sizeof(msg.payload.log),
381 };
382
383 if (shmfd && log->fd != -1) {
384 fds[fd_num++] = log->fd;
385 }
386
387 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
388 return -1;
389 }
390
391 if (shmfd) {
392 msg.hdr.size = 0;
393 if (vhost_user_read(dev, &msg) < 0) {
394 return -1;
395 }
396
397 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
398 error_report("Received unexpected msg type. "
399 "Expected %d received %d",
400 VHOST_USER_SET_LOG_BASE, msg.hdr.request);
401 return -1;
402 }
403 }
404
405 return 0;
406}
407
408static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
409 struct vhost_memory *mem)
410{
411 struct vhost_user *u = dev->opaque;
412 int fds[VHOST_MEMORY_MAX_NREGIONS];
413 int i, fd;
414 size_t fd_num = 0;
415 VhostUserMsg msg_reply;
416 int region_i, msg_i;
417
418 VhostUserMsg msg = {
419 .hdr.request = VHOST_USER_SET_MEM_TABLE,
420 .hdr.flags = VHOST_USER_VERSION,
421 };
422
423 if (u->region_rb_len < dev->mem->nregions) {
424 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
425 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
426 dev->mem->nregions);
427 memset(&(u->region_rb[u->region_rb_len]), '\0',
428 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
429 memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
430 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
431 u->region_rb_len = dev->mem->nregions;
432 }
433
434 for (i = 0; i < dev->mem->nregions; ++i) {
435 struct vhost_memory_region *reg = dev->mem->regions + i;
436 ram_addr_t offset;
437 MemoryRegion *mr;
438
439 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
440 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
441 &offset);
442 fd = memory_region_get_fd(mr);
443 if (fd > 0) {
444 trace_vhost_user_set_mem_table_withfd(fd_num, mr->name,
445 reg->memory_size,
446 reg->guest_phys_addr,
447 reg->userspace_addr, offset);
448 u->region_rb_offset[i] = offset;
449 u->region_rb[i] = mr->ram_block;
450 msg.payload.memory.regions[fd_num].userspace_addr =
451 reg->userspace_addr;
452 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
453 msg.payload.memory.regions[fd_num].guest_phys_addr =
454 reg->guest_phys_addr;
455 msg.payload.memory.regions[fd_num].mmap_offset = offset;
456 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
457 fds[fd_num++] = fd;
458 } else {
459 u->region_rb_offset[i] = 0;
460 u->region_rb[i] = NULL;
461 }
462 }
463
464 msg.payload.memory.nregions = fd_num;
465
466 if (!fd_num) {
467 error_report("Failed initializing vhost-user memory map, "
468 "consider using -object memory-backend-file share=on");
469 return -1;
470 }
471
472 msg.hdr.size = sizeof(msg.payload.memory.nregions);
473 msg.hdr.size += sizeof(msg.payload.memory.padding);
474 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
475
476 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
477 return -1;
478 }
479
480 if (vhost_user_read(dev, &msg_reply) < 0) {
481 return -1;
482 }
483
484 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
485 error_report("%s: Received unexpected msg type."
486 "Expected %d received %d", __func__,
487 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
488 return -1;
489 }
490 /* We're using the same structure, just reusing one of the
491 * fields, so it should be the same size.
492 */
493 if (msg_reply.hdr.size != msg.hdr.size) {
494 error_report("%s: Unexpected size for postcopy reply "
495 "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size);
496 return -1;
497 }
498
499 memset(u->postcopy_client_bases, 0,
500 sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
501
502 /* They're in the same order as the regions that were sent
503 * but some of the regions were skipped (above) if they
504 * didn't have fd's
505 */
506 for (msg_i = 0, region_i = 0;
507 region_i < dev->mem->nregions;
508 region_i++) {
509 if (msg_i < fd_num &&
510 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
511 dev->mem->regions[region_i].guest_phys_addr) {
512 u->postcopy_client_bases[region_i] =
513 msg_reply.payload.memory.regions[msg_i].userspace_addr;
514 trace_vhost_user_set_mem_table_postcopy(
515 msg_reply.payload.memory.regions[msg_i].userspace_addr,
516 msg.payload.memory.regions[msg_i].userspace_addr,
517 msg_i, region_i);
518 msg_i++;
519 }
520 }
521 if (msg_i != fd_num) {
522 error_report("%s: postcopy reply not fully consumed "
523 "%d vs %zd",
524 __func__, msg_i, fd_num);
525 return -1;
526 }
527 /* Now we've registered this with the postcopy code, we ack to the client,
528 * because now we're in the position to be able to deal with any faults
529 * it generates.
530 */
531 /* TODO: Use this for failure cases as well with a bad value */
532 msg.hdr.size = sizeof(msg.payload.u64);
533 msg.payload.u64 = 0; /* OK */
534 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
535 return -1;
536 }
537
538 return 0;
539}
540
541static int vhost_user_set_mem_table(struct vhost_dev *dev,
542 struct vhost_memory *mem)
543{
544 struct vhost_user *u = dev->opaque;
545 int fds[VHOST_MEMORY_MAX_NREGIONS];
546 int i, fd;
547 size_t fd_num = 0;
548 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
549 bool reply_supported = virtio_has_feature(dev->protocol_features,
550 VHOST_USER_PROTOCOL_F_REPLY_ACK);
551
552 if (do_postcopy) {
553 /* Postcopy has enough differences that it's best done in it's own
554 * version
555 */
556 return vhost_user_set_mem_table_postcopy(dev, mem);
557 }
558
559 VhostUserMsg msg = {
560 .hdr.request = VHOST_USER_SET_MEM_TABLE,
561 .hdr.flags = VHOST_USER_VERSION,
562 };
563
564 if (reply_supported) {
565 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
566 }
567
568 for (i = 0; i < dev->mem->nregions; ++i) {
569 struct vhost_memory_region *reg = dev->mem->regions + i;
570 ram_addr_t offset;
571 MemoryRegion *mr;
572
573 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
574 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
575 &offset);
576 fd = memory_region_get_fd(mr);
577 if (fd > 0) {
578 if (fd_num == VHOST_MEMORY_MAX_NREGIONS) {
579 error_report("Failed preparing vhost-user memory table msg");
580 return -1;
581 }
582 msg.payload.memory.regions[fd_num].userspace_addr =
583 reg->userspace_addr;
584 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
585 msg.payload.memory.regions[fd_num].guest_phys_addr =
586 reg->guest_phys_addr;
587 msg.payload.memory.regions[fd_num].mmap_offset = offset;
588 fds[fd_num++] = fd;
589 }
590 }
591
592 msg.payload.memory.nregions = fd_num;
593
594 if (!fd_num) {
595 error_report("Failed initializing vhost-user memory map, "
596 "consider using -object memory-backend-file share=on");
597 return -1;
598 }
599
600 msg.hdr.size = sizeof(msg.payload.memory.nregions);
601 msg.hdr.size += sizeof(msg.payload.memory.padding);
602 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
603
604 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
605 return -1;
606 }
607
608 if (reply_supported) {
609 return process_message_reply(dev, &msg);
610 }
611
612 return 0;
613}
614
615static int vhost_user_set_vring_addr(struct vhost_dev *dev,
616 struct vhost_vring_addr *addr)
617{
618 VhostUserMsg msg = {
619 .hdr.request = VHOST_USER_SET_VRING_ADDR,
620 .hdr.flags = VHOST_USER_VERSION,
621 .payload.addr = *addr,
622 .hdr.size = sizeof(msg.payload.addr),
623 };
624
625 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
626 return -1;
627 }
628
629 return 0;
630}
631
632static int vhost_user_set_vring_endian(struct vhost_dev *dev,
633 struct vhost_vring_state *ring)
634{
635 bool cross_endian = virtio_has_feature(dev->protocol_features,
636 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
637 VhostUserMsg msg = {
638 .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
639 .hdr.flags = VHOST_USER_VERSION,
640 .payload.state = *ring,
641 .hdr.size = sizeof(msg.payload.state),
642 };
643
644 if (!cross_endian) {
645 error_report("vhost-user trying to send unhandled ioctl");
646 return -1;
647 }
648
649 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
650 return -1;
651 }
652
653 return 0;
654}
655
656static int vhost_set_vring(struct vhost_dev *dev,
657 unsigned long int request,
658 struct vhost_vring_state *ring)
659{
660 VhostUserMsg msg = {
661 .hdr.request = request,
662 .hdr.flags = VHOST_USER_VERSION,
663 .payload.state = *ring,
664 .hdr.size = sizeof(msg.payload.state),
665 };
666
667 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
668 return -1;
669 }
670
671 return 0;
672}
673
674static int vhost_user_set_vring_num(struct vhost_dev *dev,
675 struct vhost_vring_state *ring)
676{
677 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
678}
679
680static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
681 int queue_idx)
682{
683 struct vhost_user *u = dev->opaque;
684 VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
685 VirtIODevice *vdev = dev->vdev;
686
687 if (n->addr && !n->set) {
688 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
689 n->set = true;
690 }
691}
692
693static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
694 int queue_idx)
695{
696 struct vhost_user *u = dev->opaque;
697 VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
698 VirtIODevice *vdev = dev->vdev;
699
700 if (n->addr && n->set) {
701 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
702 n->set = false;
703 }
704}
705
706static int vhost_user_set_vring_base(struct vhost_dev *dev,
707 struct vhost_vring_state *ring)
708{
709 vhost_user_host_notifier_restore(dev, ring->index);
710
711 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
712}
713
714static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
715{
716 int i;
717
718 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
719 return -1;
720 }
721
722 for (i = 0; i < dev->nvqs; ++i) {
723 struct vhost_vring_state state = {
724 .index = dev->vq_index + i,
725 .num = enable,
726 };
727
728 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
729 }
730
731 return 0;
732}
733
734static int vhost_user_get_vring_base(struct vhost_dev *dev,
735 struct vhost_vring_state *ring)
736{
737 VhostUserMsg msg = {
738 .hdr.request = VHOST_USER_GET_VRING_BASE,
739 .hdr.flags = VHOST_USER_VERSION,
740 .payload.state = *ring,
741 .hdr.size = sizeof(msg.payload.state),
742 };
743
744 vhost_user_host_notifier_remove(dev, ring->index);
745
746 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
747 return -1;
748 }
749
750 if (vhost_user_read(dev, &msg) < 0) {
751 return -1;
752 }
753
754 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
755 error_report("Received unexpected msg type. Expected %d received %d",
756 VHOST_USER_GET_VRING_BASE, msg.hdr.request);
757 return -1;
758 }
759
760 if (msg.hdr.size != sizeof(msg.payload.state)) {
761 error_report("Received bad msg size.");
762 return -1;
763 }
764
765 *ring = msg.payload.state;
766
767 return 0;
768}
769
770static int vhost_set_vring_file(struct vhost_dev *dev,
771 VhostUserRequest request,
772 struct vhost_vring_file *file)
773{
774 int fds[VHOST_MEMORY_MAX_NREGIONS];
775 size_t fd_num = 0;
776 VhostUserMsg msg = {
777 .hdr.request = request,
778 .hdr.flags = VHOST_USER_VERSION,
779 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
780 .hdr.size = sizeof(msg.payload.u64),
781 };
782
783 if (ioeventfd_enabled() && file->fd > 0) {
784 fds[fd_num++] = file->fd;
785 } else {
786 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
787 }
788
789 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
790 return -1;
791 }
792
793 return 0;
794}
795
796static int vhost_user_set_vring_kick(struct vhost_dev *dev,
797 struct vhost_vring_file *file)
798{
799 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
800}
801
802static int vhost_user_set_vring_call(struct vhost_dev *dev,
803 struct vhost_vring_file *file)
804{
805 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
806}
807
808static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
809{
810 VhostUserMsg msg = {
811 .hdr.request = request,
812 .hdr.flags = VHOST_USER_VERSION,
813 .payload.u64 = u64,
814 .hdr.size = sizeof(msg.payload.u64),
815 };
816
817 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
818 return -1;
819 }
820
821 return 0;
822}
823
824static int vhost_user_set_features(struct vhost_dev *dev,
825 uint64_t features)
826{
827 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
828}
829
830static int vhost_user_set_protocol_features(struct vhost_dev *dev,
831 uint64_t features)
832{
833 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
834}
835
836static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
837{
838 VhostUserMsg msg = {
839 .hdr.request = request,
840 .hdr.flags = VHOST_USER_VERSION,
841 };
842
843 if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
844 return 0;
845 }
846
847 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
848 return -1;
849 }
850
851 if (vhost_user_read(dev, &msg) < 0) {
852 return -1;
853 }
854
855 if (msg.hdr.request != request) {
856 error_report("Received unexpected msg type. Expected %d received %d",
857 request, msg.hdr.request);
858 return -1;
859 }
860
861 if (msg.hdr.size != sizeof(msg.payload.u64)) {
862 error_report("Received bad msg size.");
863 return -1;
864 }
865
866 *u64 = msg.payload.u64;
867
868 return 0;
869}
870
871static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
872{
873 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
874}
875
876static int vhost_user_set_owner(struct vhost_dev *dev)
877{
878 VhostUserMsg msg = {
879 .hdr.request = VHOST_USER_SET_OWNER,
880 .hdr.flags = VHOST_USER_VERSION,
881 };
882
883 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
884 return -1;
885 }
886
887 return 0;
888}
889
890static int vhost_user_reset_device(struct vhost_dev *dev)
891{
892 VhostUserMsg msg = {
893 .hdr.request = VHOST_USER_RESET_OWNER,
894 .hdr.flags = VHOST_USER_VERSION,
895 };
896
897 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
898 return -1;
899 }
900
901 return 0;
902}
903
904static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
905{
906 int ret = -1;
907
908 if (!dev->config_ops) {
909 return -1;
910 }
911
912 if (dev->config_ops->vhost_dev_config_notifier) {
913 ret = dev->config_ops->vhost_dev_config_notifier(dev);
914 }
915
916 return ret;
917}
918
919static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
920 VhostUserVringArea *area,
921 int fd)
922{
923 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
924 size_t page_size = qemu_real_host_page_size;
925 struct vhost_user *u = dev->opaque;
926 VhostUserState *user = u->user;
927 VirtIODevice *vdev = dev->vdev;
928 VhostUserHostNotifier *n;
929 void *addr;
930 char *name;
931
932 if (!virtio_has_feature(dev->protocol_features,
933 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
934 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
935 return -1;
936 }
937
938 n = &user->notifier[queue_idx];
939
940 if (n->addr) {
941 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
942 object_unparent(OBJECT(&n->mr));
943 munmap(n->addr, page_size);
944 n->addr = NULL;
945 }
946
947 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
948 return 0;
949 }
950
951 /* Sanity check. */
952 if (area->size != page_size) {
953 return -1;
954 }
955
956 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
957 fd, area->offset);
958 if (addr == MAP_FAILED) {
959 return -1;
960 }
961
962 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
963 user, queue_idx);
964 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
965 page_size, addr);
966 g_free(name);
967
968 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
969 munmap(addr, page_size);
970 return -1;
971 }
972
973 n->addr = addr;
974 n->set = true;
975
976 return 0;
977}
978
979static void slave_read(void *opaque)
980{
981 struct vhost_dev *dev = opaque;
982 struct vhost_user *u = dev->opaque;
983 VhostUserHeader hdr = { 0, };
984 VhostUserPayload payload = { 0, };
985 int size, ret = 0;
986 struct iovec iov;
987 struct msghdr msgh;
988 int fd[VHOST_USER_SLAVE_MAX_FDS];
989 char control[CMSG_SPACE(sizeof(fd))];
990 struct cmsghdr *cmsg;
991 int i, fdsize = 0;
992
993 memset(&msgh, 0, sizeof(msgh));
994 msgh.msg_iov = &iov;
995 msgh.msg_iovlen = 1;
996 msgh.msg_control = control;
997 msgh.msg_controllen = sizeof(control);
998
999 memset(fd, -1, sizeof(fd));
1000
1001 /* Read header */
1002 iov.iov_base = &hdr;
1003 iov.iov_len = VHOST_USER_HDR_SIZE;
1004
1005 do {
1006 size = recvmsg(u->slave_fd, &msgh, 0);
1007 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1008
1009 if (size != VHOST_USER_HDR_SIZE) {
1010 error_report("Failed to read from slave.");
1011 goto err;
1012 }
1013
1014 if (msgh.msg_flags & MSG_CTRUNC) {
1015 error_report("Truncated message.");
1016 goto err;
1017 }
1018
1019 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
1020 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
1021 if (cmsg->cmsg_level == SOL_SOCKET &&
1022 cmsg->cmsg_type == SCM_RIGHTS) {
1023 fdsize = cmsg->cmsg_len - CMSG_LEN(0);
1024 memcpy(fd, CMSG_DATA(cmsg), fdsize);
1025 break;
1026 }
1027 }
1028
1029 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1030 error_report("Failed to read msg header."
1031 " Size %d exceeds the maximum %zu.", hdr.size,
1032 VHOST_USER_PAYLOAD_SIZE);
1033 goto err;
1034 }
1035
1036 /* Read payload */
1037 do {
1038 size = read(u->slave_fd, &payload, hdr.size);
1039 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1040
1041 if (size != hdr.size) {
1042 error_report("Failed to read payload from slave.");
1043 goto err;
1044 }
1045
1046 switch (hdr.request) {
1047 case VHOST_USER_SLAVE_IOTLB_MSG:
1048 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1049 break;
1050 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
1051 ret = vhost_user_slave_handle_config_change(dev);
1052 break;
1053 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
1054 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1055 fd[0]);
1056 break;
1057 default:
1058 error_report("Received unexpected msg type.");
1059 ret = -EINVAL;
1060 }
1061
1062 /* Close the remaining file descriptors. */
1063 for (i = 0; i < fdsize; i++) {
1064 if (fd[i] != -1) {
1065 close(fd[i]);
1066 }
1067 }
1068
1069 /*
1070 * REPLY_ACK feature handling. Other reply types has to be managed
1071 * directly in their request handlers.
1072 */
1073 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1074 struct iovec iovec[2];
1075
1076
1077 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1078 hdr.flags |= VHOST_USER_REPLY_MASK;
1079
1080 payload.u64 = !!ret;
1081 hdr.size = sizeof(payload.u64);
1082
1083 iovec[0].iov_base = &hdr;
1084 iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1085 iovec[1].iov_base = &payload;
1086 iovec[1].iov_len = hdr.size;
1087
1088 do {
1089 size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
1090 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1091
1092 if (size != VHOST_USER_HDR_SIZE + hdr.size) {
1093 error_report("Failed to send msg reply to slave.");
1094 goto err;
1095 }
1096 }
1097
1098 return;
1099
1100err:
1101 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1102 close(u->slave_fd);
1103 u->slave_fd = -1;
1104 for (i = 0; i < fdsize; i++) {
1105 if (fd[i] != -1) {
1106 close(fd[i]);
1107 }
1108 }
1109 return;
1110}
1111
1112static int vhost_setup_slave_channel(struct vhost_dev *dev)
1113{
1114 VhostUserMsg msg = {
1115 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
1116 .hdr.flags = VHOST_USER_VERSION,
1117 };
1118 struct vhost_user *u = dev->opaque;
1119 int sv[2], ret = 0;
1120 bool reply_supported = virtio_has_feature(dev->protocol_features,
1121 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1122
1123 if (!virtio_has_feature(dev->protocol_features,
1124 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
1125 return 0;
1126 }
1127
1128 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1129 error_report("socketpair() failed");
1130 return -1;
1131 }
1132
1133 u->slave_fd = sv[0];
1134 qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
1135
1136 if (reply_supported) {
1137 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1138 }
1139
1140 ret = vhost_user_write(dev, &msg, &sv[1], 1);
1141 if (ret) {
1142 goto out;
1143 }
1144
1145 if (reply_supported) {
1146 ret = process_message_reply(dev, &msg);
1147 }
1148
1149out:
1150 close(sv[1]);
1151 if (ret) {
1152 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1153 close(u->slave_fd);
1154 u->slave_fd = -1;
1155 }
1156
1157 return ret;
1158}
1159
1160#ifdef CONFIG_LINUX
1161/*
1162 * Called back from the postcopy fault thread when a fault is received on our
1163 * ufd.
1164 * TODO: This is Linux specific
1165 */
1166static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1167 void *ufd)
1168{
1169 struct vhost_dev *dev = pcfd->data;
1170 struct vhost_user *u = dev->opaque;
1171 struct uffd_msg *msg = ufd;
1172 uint64_t faultaddr = msg->arg.pagefault.address;
1173 RAMBlock *rb = NULL;
1174 uint64_t rb_offset;
1175 int i;
1176
1177 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1178 dev->mem->nregions);
1179 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1180 trace_vhost_user_postcopy_fault_handler_loop(i,
1181 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1182 if (faultaddr >= u->postcopy_client_bases[i]) {
1183 /* Ofset of the fault address in the vhost region */
1184 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1185 if (region_offset < dev->mem->regions[i].memory_size) {
1186 rb_offset = region_offset + u->region_rb_offset[i];
1187 trace_vhost_user_postcopy_fault_handler_found(i,
1188 region_offset, rb_offset);
1189 rb = u->region_rb[i];
1190 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1191 rb_offset);
1192 }
1193 }
1194 }
1195 error_report("%s: Failed to find region for fault %" PRIx64,
1196 __func__, faultaddr);
1197 return -1;
1198}
1199
1200static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1201 uint64_t offset)
1202{
1203 struct vhost_dev *dev = pcfd->data;
1204 struct vhost_user *u = dev->opaque;
1205 int i;
1206
1207 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1208
1209 if (!u) {
1210 return 0;
1211 }
1212 /* Translate the offset into an address in the clients address space */
1213 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1214 if (u->region_rb[i] == rb &&
1215 offset >= u->region_rb_offset[i] &&
1216 offset < (u->region_rb_offset[i] +
1217 dev->mem->regions[i].memory_size)) {
1218 uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1219 u->postcopy_client_bases[i];
1220 trace_vhost_user_postcopy_waker_found(client_addr);
1221 return postcopy_wake_shared(pcfd, client_addr, rb);
1222 }
1223 }
1224
1225 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1226 return 0;
1227}
1228#endif
1229
1230/*
1231 * Called at the start of an inbound postcopy on reception of the
1232 * 'advise' command.
1233 */
1234static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1235{
1236#ifdef CONFIG_LINUX
1237 struct vhost_user *u = dev->opaque;
1238 CharBackend *chr = u->user->chr;
1239 int ufd;
1240 VhostUserMsg msg = {
1241 .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1242 .hdr.flags = VHOST_USER_VERSION,
1243 };
1244
1245 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1246 error_setg(errp, "Failed to send postcopy_advise to vhost");
1247 return -1;
1248 }
1249
1250 if (vhost_user_read(dev, &msg) < 0) {
1251 error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1252 return -1;
1253 }
1254
1255 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1256 error_setg(errp, "Unexpected msg type. Expected %d received %d",
1257 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1258 return -1;
1259 }
1260
1261 if (msg.hdr.size) {
1262 error_setg(errp, "Received bad msg size.");
1263 return -1;
1264 }
1265 ufd = qemu_chr_fe_get_msgfd(chr);
1266 if (ufd < 0) {
1267 error_setg(errp, "%s: Failed to get ufd", __func__);
1268 return -1;
1269 }
1270 qemu_set_nonblock(ufd);
1271
1272 /* register ufd with userfault thread */
1273 u->postcopy_fd.fd = ufd;
1274 u->postcopy_fd.data = dev;
1275 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1276 u->postcopy_fd.waker = vhost_user_postcopy_waker;
1277 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1278 postcopy_register_shared_ufd(&u->postcopy_fd);
1279 return 0;
1280#else
1281 error_setg(errp, "Postcopy not supported on non-Linux systems");
1282 return -1;
1283#endif
1284}
1285
1286/*
1287 * Called at the switch to postcopy on reception of the 'listen' command.
1288 */
1289static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1290{
1291 struct vhost_user *u = dev->opaque;
1292 int ret;
1293 VhostUserMsg msg = {
1294 .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1295 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1296 };
1297 u->postcopy_listen = true;
1298 trace_vhost_user_postcopy_listen();
1299 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1300 error_setg(errp, "Failed to send postcopy_listen to vhost");
1301 return -1;
1302 }
1303
1304 ret = process_message_reply(dev, &msg);
1305 if (ret) {
1306 error_setg(errp, "Failed to receive reply to postcopy_listen");
1307 return ret;
1308 }
1309
1310 return 0;
1311}
1312
1313/*
1314 * Called at the end of postcopy
1315 */
1316static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1317{
1318 VhostUserMsg msg = {
1319 .hdr.request = VHOST_USER_POSTCOPY_END,
1320 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1321 };
1322 int ret;
1323 struct vhost_user *u = dev->opaque;
1324
1325 trace_vhost_user_postcopy_end_entry();
1326 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1327 error_setg(errp, "Failed to send postcopy_end to vhost");
1328 return -1;
1329 }
1330
1331 ret = process_message_reply(dev, &msg);
1332 if (ret) {
1333 error_setg(errp, "Failed to receive reply to postcopy_end");
1334 return ret;
1335 }
1336 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1337 close(u->postcopy_fd.fd);
1338 u->postcopy_fd.handler = NULL;
1339
1340 trace_vhost_user_postcopy_end_exit();
1341
1342 return 0;
1343}
1344
1345static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1346 void *opaque)
1347{
1348 struct PostcopyNotifyData *pnd = opaque;
1349 struct vhost_user *u = container_of(notifier, struct vhost_user,
1350 postcopy_notifier);
1351 struct vhost_dev *dev = u->dev;
1352
1353 switch (pnd->reason) {
1354 case POSTCOPY_NOTIFY_PROBE:
1355 if (!virtio_has_feature(dev->protocol_features,
1356 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1357 /* TODO: Get the device name into this error somehow */
1358 error_setg(pnd->errp,
1359 "vhost-user backend not capable of postcopy");
1360 return -ENOENT;
1361 }
1362 break;
1363
1364 case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1365 return vhost_user_postcopy_advise(dev, pnd->errp);
1366
1367 case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1368 return vhost_user_postcopy_listen(dev, pnd->errp);
1369
1370 case POSTCOPY_NOTIFY_INBOUND_END:
1371 return vhost_user_postcopy_end(dev, pnd->errp);
1372
1373 default:
1374 /* We ignore notifications we don't know */
1375 break;
1376 }
1377
1378 return 0;
1379}
1380
1381static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
1382{
1383 uint64_t features, protocol_features;
1384 struct vhost_user *u;
1385 int err;
1386
1387 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1388
1389 u = g_new0(struct vhost_user, 1);
1390 u->user = opaque;
1391 u->slave_fd = -1;
1392 u->dev = dev;
1393 dev->opaque = u;
1394
1395 err = vhost_user_get_features(dev, &features);
1396 if (err < 0) {
1397 return err;
1398 }
1399
1400 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1401 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1402
1403 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1404 &protocol_features);
1405 if (err < 0) {
1406 return err;
1407 }
1408
1409 dev->protocol_features =
1410 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1411
1412 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1413 /* Don't acknowledge CONFIG feature if device doesn't support it */
1414 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1415 } else if (!(protocol_features &
1416 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1417 error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1418 "but backend does not support it.");
1419 return -1;
1420 }
1421
1422 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1423 if (err < 0) {
1424 return err;
1425 }
1426
1427 /* query the max queues we support if backend supports Multiple Queue */
1428 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
1429 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
1430 &dev->max_queues);
1431 if (err < 0) {
1432 return err;
1433 }
1434 }
1435
1436 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
1437 !(virtio_has_feature(dev->protocol_features,
1438 VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
1439 virtio_has_feature(dev->protocol_features,
1440 VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1441 error_report("IOMMU support requires reply-ack and "
1442 "slave-req protocol features.");
1443 return -1;
1444 }
1445 }
1446
1447 if (dev->migration_blocker == NULL &&
1448 !virtio_has_feature(dev->protocol_features,
1449 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
1450 error_setg(&dev->migration_blocker,
1451 "Migration disabled: vhost-user backend lacks "
1452 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
1453 }
1454
1455 err = vhost_setup_slave_channel(dev);
1456 if (err < 0) {
1457 return err;
1458 }
1459
1460 u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
1461 postcopy_add_notifier(&u->postcopy_notifier);
1462
1463 return 0;
1464}
1465
1466static int vhost_user_backend_cleanup(struct vhost_dev *dev)
1467{
1468 struct vhost_user *u;
1469
1470 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1471
1472 u = dev->opaque;
1473 if (u->postcopy_notifier.notify) {
1474 postcopy_remove_notifier(&u->postcopy_notifier);
1475 u->postcopy_notifier.notify = NULL;
1476 }
1477 u->postcopy_listen = false;
1478 if (u->postcopy_fd.handler) {
1479 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1480 close(u->postcopy_fd.fd);
1481 u->postcopy_fd.handler = NULL;
1482 }
1483 if (u->slave_fd >= 0) {
1484 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1485 close(u->slave_fd);
1486 u->slave_fd = -1;
1487 }
1488 g_free(u->region_rb);
1489 u->region_rb = NULL;
1490 g_free(u->region_rb_offset);
1491 u->region_rb_offset = NULL;
1492 u->region_rb_len = 0;
1493 g_free(u);
1494 dev->opaque = 0;
1495
1496 return 0;
1497}
1498
1499static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
1500{
1501 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
1502
1503 return idx;
1504}
1505
1506static int vhost_user_memslots_limit(struct vhost_dev *dev)
1507{
1508 return VHOST_MEMORY_MAX_NREGIONS;
1509}
1510
1511static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
1512{
1513 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1514
1515 return virtio_has_feature(dev->protocol_features,
1516 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
1517}
1518
1519static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
1520{
1521 VhostUserMsg msg = { };
1522
1523 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1524
1525 /* If guest supports GUEST_ANNOUNCE do nothing */
1526 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
1527 return 0;
1528 }
1529
1530 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
1531 if (virtio_has_feature(dev->protocol_features,
1532 VHOST_USER_PROTOCOL_F_RARP)) {
1533 msg.hdr.request = VHOST_USER_SEND_RARP;
1534 msg.hdr.flags = VHOST_USER_VERSION;
1535 memcpy((char *)&msg.payload.u64, mac_addr, 6);
1536 msg.hdr.size = sizeof(msg.payload.u64);
1537
1538 return vhost_user_write(dev, &msg, NULL, 0);
1539 }
1540 return -1;
1541}
1542
1543static bool vhost_user_can_merge(struct vhost_dev *dev,
1544 uint64_t start1, uint64_t size1,
1545 uint64_t start2, uint64_t size2)
1546{
1547 ram_addr_t offset;
1548 int mfd, rfd;
1549 MemoryRegion *mr;
1550
1551 mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
1552 mfd = memory_region_get_fd(mr);
1553
1554 mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
1555 rfd = memory_region_get_fd(mr);
1556
1557 return mfd == rfd;
1558}
1559
1560static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
1561{
1562 VhostUserMsg msg;
1563 bool reply_supported = virtio_has_feature(dev->protocol_features,
1564 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1565
1566 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
1567 return 0;
1568 }
1569
1570 msg.hdr.request = VHOST_USER_NET_SET_MTU;
1571 msg.payload.u64 = mtu;
1572 msg.hdr.size = sizeof(msg.payload.u64);
1573 msg.hdr.flags = VHOST_USER_VERSION;
1574 if (reply_supported) {
1575 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1576 }
1577
1578 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1579 return -1;
1580 }
1581
1582 /* If reply_ack supported, slave has to ack specified MTU is valid */
1583 if (reply_supported) {
1584 return process_message_reply(dev, &msg);
1585 }
1586
1587 return 0;
1588}
1589
1590static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
1591 struct vhost_iotlb_msg *imsg)
1592{
1593 VhostUserMsg msg = {
1594 .hdr.request = VHOST_USER_IOTLB_MSG,
1595 .hdr.size = sizeof(msg.payload.iotlb),
1596 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1597 .payload.iotlb = *imsg,
1598 };
1599
1600 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1601 return -EFAULT;
1602 }
1603
1604 return process_message_reply(dev, &msg);
1605}
1606
1607
1608static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
1609{
1610 /* No-op as the receive channel is not dedicated to IOTLB messages. */
1611}
1612
1613static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
1614 uint32_t config_len)
1615{
1616 VhostUserMsg msg = {
1617 .hdr.request = VHOST_USER_GET_CONFIG,
1618 .hdr.flags = VHOST_USER_VERSION,
1619 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
1620 };
1621
1622 if (!virtio_has_feature(dev->protocol_features,
1623 VHOST_USER_PROTOCOL_F_CONFIG)) {
1624 return -1;
1625 }
1626
1627 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
1628 return -1;
1629 }
1630
1631 msg.payload.config.offset = 0;
1632 msg.payload.config.size = config_len;
1633 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1634 return -1;
1635 }
1636
1637 if (vhost_user_read(dev, &msg) < 0) {
1638 return -1;
1639 }
1640
1641 if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
1642 error_report("Received unexpected msg type. Expected %d received %d",
1643 VHOST_USER_GET_CONFIG, msg.hdr.request);
1644 return -1;
1645 }
1646
1647 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
1648 error_report("Received bad msg size.");
1649 return -1;
1650 }
1651
1652 memcpy(config, msg.payload.config.region, config_len);
1653
1654 return 0;
1655}
1656
1657static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
1658 uint32_t offset, uint32_t size, uint32_t flags)
1659{
1660 uint8_t *p;
1661 bool reply_supported = virtio_has_feature(dev->protocol_features,
1662 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1663
1664 VhostUserMsg msg = {
1665 .hdr.request = VHOST_USER_SET_CONFIG,
1666 .hdr.flags = VHOST_USER_VERSION,
1667 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
1668 };
1669
1670 if (!virtio_has_feature(dev->protocol_features,
1671 VHOST_USER_PROTOCOL_F_CONFIG)) {
1672 return -1;
1673 }
1674
1675 if (reply_supported) {
1676 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1677 }
1678
1679 if (size > VHOST_USER_MAX_CONFIG_SIZE) {
1680 return -1;
1681 }
1682
1683 msg.payload.config.offset = offset,
1684 msg.payload.config.size = size,
1685 msg.payload.config.flags = flags,
1686 p = msg.payload.config.region;
1687 memcpy(p, data, size);
1688
1689 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1690 return -1;
1691 }
1692
1693 if (reply_supported) {
1694 return process_message_reply(dev, &msg);
1695 }
1696
1697 return 0;
1698}
1699
1700static int vhost_user_crypto_create_session(struct vhost_dev *dev,
1701 void *session_info,
1702 uint64_t *session_id)
1703{
1704 bool crypto_session = virtio_has_feature(dev->protocol_features,
1705 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1706 CryptoDevBackendSymSessionInfo *sess_info = session_info;
1707 VhostUserMsg msg = {
1708 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
1709 .hdr.flags = VHOST_USER_VERSION,
1710 .hdr.size = sizeof(msg.payload.session),
1711 };
1712
1713 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1714
1715 if (!crypto_session) {
1716 error_report("vhost-user trying to send unhandled ioctl");
1717 return -1;
1718 }
1719
1720 memcpy(&msg.payload.session.session_setup_data, sess_info,
1721 sizeof(CryptoDevBackendSymSessionInfo));
1722 if (sess_info->key_len) {
1723 memcpy(&msg.payload.session.key, sess_info->cipher_key,
1724 sess_info->key_len);
1725 }
1726 if (sess_info->auth_key_len > 0) {
1727 memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
1728 sess_info->auth_key_len);
1729 }
1730 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1731 error_report("vhost_user_write() return -1, create session failed");
1732 return -1;
1733 }
1734
1735 if (vhost_user_read(dev, &msg) < 0) {
1736 error_report("vhost_user_read() return -1, create session failed");
1737 return -1;
1738 }
1739
1740 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
1741 error_report("Received unexpected msg type. Expected %d received %d",
1742 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
1743 return -1;
1744 }
1745
1746 if (msg.hdr.size != sizeof(msg.payload.session)) {
1747 error_report("Received bad msg size.");
1748 return -1;
1749 }
1750
1751 if (msg.payload.session.session_id < 0) {
1752 error_report("Bad session id: %" PRId64 "",
1753 msg.payload.session.session_id);
1754 return -1;
1755 }
1756 *session_id = msg.payload.session.session_id;
1757
1758 return 0;
1759}
1760
1761static int
1762vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
1763{
1764 bool crypto_session = virtio_has_feature(dev->protocol_features,
1765 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1766 VhostUserMsg msg = {
1767 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
1768 .hdr.flags = VHOST_USER_VERSION,
1769 .hdr.size = sizeof(msg.payload.u64),
1770 };
1771 msg.payload.u64 = session_id;
1772
1773 if (!crypto_session) {
1774 error_report("vhost-user trying to send unhandled ioctl");
1775 return -1;
1776 }
1777
1778 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1779 error_report("vhost_user_write() return -1, close session failed");
1780 return -1;
1781 }
1782
1783 return 0;
1784}
1785
1786static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
1787 MemoryRegionSection *section)
1788{
1789 bool result;
1790
1791 result = memory_region_get_fd(section->mr) >= 0;
1792
1793 return result;
1794}
1795
1796static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
1797 uint16_t queue_size,
1798 struct vhost_inflight *inflight)
1799{
1800 void *addr;
1801 int fd;
1802 struct vhost_user *u = dev->opaque;
1803 CharBackend *chr = u->user->chr;
1804 VhostUserMsg msg = {
1805 .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
1806 .hdr.flags = VHOST_USER_VERSION,
1807 .payload.inflight.num_queues = dev->nvqs,
1808 .payload.inflight.queue_size = queue_size,
1809 .hdr.size = sizeof(msg.payload.inflight),
1810 };
1811
1812 if (!virtio_has_feature(dev->protocol_features,
1813 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
1814 return 0;
1815 }
1816
1817 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1818 return -1;
1819 }
1820
1821 if (vhost_user_read(dev, &msg) < 0) {
1822 return -1;
1823 }
1824
1825 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
1826 error_report("Received unexpected msg type. "
1827 "Expected %d received %d",
1828 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
1829 return -1;
1830 }
1831
1832 if (msg.hdr.size != sizeof(msg.payload.inflight)) {
1833 error_report("Received bad msg size.");
1834 return -1;
1835 }
1836
1837 if (!msg.payload.inflight.mmap_size) {
1838 return 0;
1839 }
1840
1841 fd = qemu_chr_fe_get_msgfd(chr);
1842 if (fd < 0) {
1843 error_report("Failed to get mem fd");
1844 return -1;
1845 }
1846
1847 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
1848 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
1849
1850 if (addr == MAP_FAILED) {
1851 error_report("Failed to mmap mem fd");
1852 close(fd);
1853 return -1;
1854 }
1855
1856 inflight->addr = addr;
1857 inflight->fd = fd;
1858 inflight->size = msg.payload.inflight.mmap_size;
1859 inflight->offset = msg.payload.inflight.mmap_offset;
1860 inflight->queue_size = queue_size;
1861
1862 return 0;
1863}
1864
1865static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
1866 struct vhost_inflight *inflight)
1867{
1868 VhostUserMsg msg = {
1869 .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
1870 .hdr.flags = VHOST_USER_VERSION,
1871 .payload.inflight.mmap_size = inflight->size,
1872 .payload.inflight.mmap_offset = inflight->offset,
1873 .payload.inflight.num_queues = dev->nvqs,
1874 .payload.inflight.queue_size = inflight->queue_size,
1875 .hdr.size = sizeof(msg.payload.inflight),
1876 };
1877
1878 if (!virtio_has_feature(dev->protocol_features,
1879 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
1880 return 0;
1881 }
1882
1883 if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) {
1884 return -1;
1885 }
1886
1887 return 0;
1888}
1889
1890bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
1891{
1892 if (user->chr) {
1893 error_setg(errp, "Cannot initialize vhost-user state");
1894 return false;
1895 }
1896 user->chr = chr;
1897 return true;
1898}
1899
1900void vhost_user_cleanup(VhostUserState *user)
1901{
1902 int i;
1903
1904 if (!user->chr) {
1905 return;
1906 }
1907
1908 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1909 if (user->notifier[i].addr) {
1910 object_unparent(OBJECT(&user->notifier[i].mr));
1911 munmap(user->notifier[i].addr, qemu_real_host_page_size);
1912 user->notifier[i].addr = NULL;
1913 }
1914 }
1915 user->chr = NULL;
1916}
1917
1918const VhostOps user_ops = {
1919 .backend_type = VHOST_BACKEND_TYPE_USER,
1920 .vhost_backend_init = vhost_user_backend_init,
1921 .vhost_backend_cleanup = vhost_user_backend_cleanup,
1922 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
1923 .vhost_set_log_base = vhost_user_set_log_base,
1924 .vhost_set_mem_table = vhost_user_set_mem_table,
1925 .vhost_set_vring_addr = vhost_user_set_vring_addr,
1926 .vhost_set_vring_endian = vhost_user_set_vring_endian,
1927 .vhost_set_vring_num = vhost_user_set_vring_num,
1928 .vhost_set_vring_base = vhost_user_set_vring_base,
1929 .vhost_get_vring_base = vhost_user_get_vring_base,
1930 .vhost_set_vring_kick = vhost_user_set_vring_kick,
1931 .vhost_set_vring_call = vhost_user_set_vring_call,
1932 .vhost_set_features = vhost_user_set_features,
1933 .vhost_get_features = vhost_user_get_features,
1934 .vhost_set_owner = vhost_user_set_owner,
1935 .vhost_reset_device = vhost_user_reset_device,
1936 .vhost_get_vq_index = vhost_user_get_vq_index,
1937 .vhost_set_vring_enable = vhost_user_set_vring_enable,
1938 .vhost_requires_shm_log = vhost_user_requires_shm_log,
1939 .vhost_migration_done = vhost_user_migration_done,
1940 .vhost_backend_can_merge = vhost_user_can_merge,
1941 .vhost_net_set_mtu = vhost_user_net_set_mtu,
1942 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
1943 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
1944 .vhost_get_config = vhost_user_get_config,
1945 .vhost_set_config = vhost_user_set_config,
1946 .vhost_crypto_create_session = vhost_user_crypto_create_session,
1947 .vhost_crypto_close_session = vhost_user_crypto_close_session,
1948 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
1949 .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
1950 .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
1951};
1952