1/*
2 * Hyper-V guest/hypervisor interaction
3 *
4 * Copyright (c) 2015-2018 Virtuozzo International GmbH.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 */
9
10#include "qemu/osdep.h"
11#include "qemu/main-loop.h"
12#include "qemu/module.h"
13#include "qapi/error.h"
14#include "exec/address-spaces.h"
15#include "sysemu/kvm.h"
16#include "qemu/bitops.h"
17#include "qemu/error-report.h"
18#include "qemu/queue.h"
19#include "qemu/rcu.h"
20#include "qemu/rcu_queue.h"
21#include "hw/hyperv/hyperv.h"
22
23typedef struct SynICState {
24 DeviceState parent_obj;
25
26 CPUState *cs;
27
28 bool enabled;
29 hwaddr msg_page_addr;
30 hwaddr event_page_addr;
31 MemoryRegion msg_page_mr;
32 MemoryRegion event_page_mr;
33 struct hyperv_message_page *msg_page;
34 struct hyperv_event_flags_page *event_page;
35} SynICState;
36
37#define TYPE_SYNIC "hyperv-synic"
38#define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
39
40static SynICState *get_synic(CPUState *cs)
41{
42 return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
43}
44
45static void synic_update(SynICState *synic, bool enable,
46 hwaddr msg_page_addr, hwaddr event_page_addr)
47{
48
49 synic->enabled = enable;
50 if (synic->msg_page_addr != msg_page_addr) {
51 if (synic->msg_page_addr) {
52 memory_region_del_subregion(get_system_memory(),
53 &synic->msg_page_mr);
54 }
55 if (msg_page_addr) {
56 memory_region_add_subregion(get_system_memory(), msg_page_addr,
57 &synic->msg_page_mr);
58 }
59 synic->msg_page_addr = msg_page_addr;
60 }
61 if (synic->event_page_addr != event_page_addr) {
62 if (synic->event_page_addr) {
63 memory_region_del_subregion(get_system_memory(),
64 &synic->event_page_mr);
65 }
66 if (event_page_addr) {
67 memory_region_add_subregion(get_system_memory(), event_page_addr,
68 &synic->event_page_mr);
69 }
70 synic->event_page_addr = event_page_addr;
71 }
72}
73
74void hyperv_synic_update(CPUState *cs, bool enable,
75 hwaddr msg_page_addr, hwaddr event_page_addr)
76{
77 SynICState *synic = get_synic(cs);
78
79 if (!synic) {
80 return;
81 }
82
83 synic_update(synic, enable, msg_page_addr, event_page_addr);
84}
85
86static void synic_realize(DeviceState *dev, Error **errp)
87{
88 Object *obj = OBJECT(dev);
89 SynICState *synic = SYNIC(dev);
90 char *msgp_name, *eventp_name;
91 uint32_t vp_index;
92
93 /* memory region names have to be globally unique */
94 vp_index = hyperv_vp_index(synic->cs);
95 msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
96 eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
97
98 memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
99 sizeof(*synic->msg_page), &error_abort);
100 memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
101 sizeof(*synic->event_page), &error_abort);
102 synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
103 synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
104
105 g_free(msgp_name);
106 g_free(eventp_name);
107}
108static void synic_reset(DeviceState *dev)
109{
110 SynICState *synic = SYNIC(dev);
111 memset(synic->msg_page, 0, sizeof(*synic->msg_page));
112 memset(synic->event_page, 0, sizeof(*synic->event_page));
113 synic_update(synic, false, 0, 0);
114}
115
116static void synic_class_init(ObjectClass *klass, void *data)
117{
118 DeviceClass *dc = DEVICE_CLASS(klass);
119
120 dc->realize = synic_realize;
121 dc->reset = synic_reset;
122 dc->user_creatable = false;
123}
124
125void hyperv_synic_add(CPUState *cs)
126{
127 Object *obj;
128 SynICState *synic;
129
130 obj = object_new(TYPE_SYNIC);
131 synic = SYNIC(obj);
132 synic->cs = cs;
133 object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
134 object_unref(obj);
135 object_property_set_bool(obj, true, "realized", &error_abort);
136}
137
138void hyperv_synic_reset(CPUState *cs)
139{
140 SynICState *synic = get_synic(cs);
141
142 if (synic) {
143 device_reset(DEVICE(synic));
144 }
145}
146
147static const TypeInfo synic_type_info = {
148 .name = TYPE_SYNIC,
149 .parent = TYPE_DEVICE,
150 .instance_size = sizeof(SynICState),
151 .class_init = synic_class_init,
152};
153
154static void synic_register_types(void)
155{
156 type_register_static(&synic_type_info);
157}
158
159type_init(synic_register_types)
160
161/*
162 * KVM has its own message producers (SynIC timers). To guarantee
163 * serialization with both KVM vcpu and the guest cpu, the messages are first
164 * staged in an intermediate area and then posted to the SynIC message page in
165 * the vcpu thread.
166 */
167typedef struct HvSintStagedMessage {
168 /* message content staged by hyperv_post_msg */
169 struct hyperv_message msg;
170 /* callback + data (r/o) to complete the processing in a BH */
171 HvSintMsgCb cb;
172 void *cb_data;
173 /* message posting status filled by cpu_post_msg */
174 int status;
175 /* passing the buck: */
176 enum {
177 /* initial state */
178 HV_STAGED_MSG_FREE,
179 /*
180 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
181 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
182 */
183 HV_STAGED_MSG_BUSY,
184 /*
185 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
186 * notify the guest, records the status, marks the posting done (BUSY
187 * -> POSTED), and schedules sint_msg_bh BH
188 */
189 HV_STAGED_MSG_POSTED,
190 /*
191 * sint_msg_bh (BH) verifies that the posting is done, runs the
192 * callback, and starts over (POSTED -> FREE)
193 */
194 } state;
195} HvSintStagedMessage;
196
197struct HvSintRoute {
198 uint32_t sint;
199 SynICState *synic;
200 int gsi;
201 EventNotifier sint_set_notifier;
202 EventNotifier sint_ack_notifier;
203
204 HvSintStagedMessage *staged_msg;
205
206 unsigned refcount;
207};
208
209static CPUState *hyperv_find_vcpu(uint32_t vp_index)
210{
211 CPUState *cs = qemu_get_cpu(vp_index);
212 assert(hyperv_vp_index(cs) == vp_index);
213 return cs;
214}
215
216/*
217 * BH to complete the processing of a staged message.
218 */
219static void sint_msg_bh(void *opaque)
220{
221 HvSintRoute *sint_route = opaque;
222 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
223
224 if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
225 /* status nor ready yet (spurious ack from guest?), ignore */
226 return;
227 }
228
229 staged_msg->cb(staged_msg->cb_data, staged_msg->status);
230 staged_msg->status = 0;
231
232 /* staged message processing finished, ready to start over */
233 atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
234 /* drop the reference taken in hyperv_post_msg */
235 hyperv_sint_route_unref(sint_route);
236}
237
238/*
239 * Worker to transfer the message from the staging area into the SynIC message
240 * page in vcpu context.
241 */
242static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
243{
244 HvSintRoute *sint_route = data.host_ptr;
245 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
246 SynICState *synic = sint_route->synic;
247 struct hyperv_message *dst_msg;
248 bool wait_for_sint_ack = false;
249
250 assert(staged_msg->state == HV_STAGED_MSG_BUSY);
251
252 if (!synic->enabled || !synic->msg_page_addr) {
253 staged_msg->status = -ENXIO;
254 goto posted;
255 }
256
257 dst_msg = &synic->msg_page->slot[sint_route->sint];
258
259 if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
260 dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
261 staged_msg->status = -EAGAIN;
262 wait_for_sint_ack = true;
263 } else {
264 memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
265 staged_msg->status = hyperv_sint_route_set_sint(sint_route);
266 }
267
268 memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
269
270posted:
271 atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
272 /*
273 * Notify the msg originator of the progress made; if the slot was busy we
274 * set msg_pending flag in it so it will be the guest who will do EOM and
275 * trigger the notification from KVM via sint_ack_notifier
276 */
277 if (!wait_for_sint_ack) {
278 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
279 sint_route);
280 }
281}
282
283/*
284 * Post a Hyper-V message to the staging area, for delivery to guest in the
285 * vcpu thread.
286 */
287int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
288{
289 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
290
291 assert(staged_msg);
292
293 /* grab the staging area */
294 if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
295 HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
296 return -EAGAIN;
297 }
298
299 memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
300
301 /* hold a reference on sint_route until the callback is finished */
302 hyperv_sint_route_ref(sint_route);
303
304 /* schedule message posting attempt in vcpu thread */
305 async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
306 RUN_ON_CPU_HOST_PTR(sint_route));
307 return 0;
308}
309
310static void sint_ack_handler(EventNotifier *notifier)
311{
312 HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
313 sint_ack_notifier);
314 event_notifier_test_and_clear(notifier);
315
316 /*
317 * the guest consumed the previous message so complete the current one with
318 * -EAGAIN and let the msg originator retry
319 */
320 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
321}
322
323/*
324 * Set given event flag for a given sint on a given vcpu, and signal the sint.
325 */
326int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
327{
328 int ret;
329 SynICState *synic = sint_route->synic;
330 unsigned long *flags, set_mask;
331 unsigned set_idx;
332
333 if (eventno > HV_EVENT_FLAGS_COUNT) {
334 return -EINVAL;
335 }
336 if (!synic->enabled || !synic->event_page_addr) {
337 return -ENXIO;
338 }
339
340 set_idx = BIT_WORD(eventno);
341 set_mask = BIT_MASK(eventno);
342 flags = synic->event_page->slot[sint_route->sint].flags;
343
344 if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
345 memory_region_set_dirty(&synic->event_page_mr, 0,
346 sizeof(*synic->event_page));
347 ret = hyperv_sint_route_set_sint(sint_route);
348 } else {
349 ret = 0;
350 }
351 return ret;
352}
353
354HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
355 HvSintMsgCb cb, void *cb_data)
356{
357 HvSintRoute *sint_route;
358 EventNotifier *ack_notifier;
359 int r, gsi;
360 CPUState *cs;
361 SynICState *synic;
362
363 cs = hyperv_find_vcpu(vp_index);
364 if (!cs) {
365 return NULL;
366 }
367
368 synic = get_synic(cs);
369 if (!synic) {
370 return NULL;
371 }
372
373 sint_route = g_new0(HvSintRoute, 1);
374 r = event_notifier_init(&sint_route->sint_set_notifier, false);
375 if (r) {
376 goto err;
377 }
378
379
380 ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
381 if (ack_notifier) {
382 sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
383 sint_route->staged_msg->cb = cb;
384 sint_route->staged_msg->cb_data = cb_data;
385
386 r = event_notifier_init(ack_notifier, false);
387 if (r) {
388 goto err_sint_set_notifier;
389 }
390
391 event_notifier_set_handler(ack_notifier, sint_ack_handler);
392 }
393
394 gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
395 if (gsi < 0) {
396 goto err_gsi;
397 }
398
399 r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
400 &sint_route->sint_set_notifier,
401 ack_notifier, gsi);
402 if (r) {
403 goto err_irqfd;
404 }
405 sint_route->gsi = gsi;
406 sint_route->synic = synic;
407 sint_route->sint = sint;
408 sint_route->refcount = 1;
409
410 return sint_route;
411
412err_irqfd:
413 kvm_irqchip_release_virq(kvm_state, gsi);
414err_gsi:
415 if (ack_notifier) {
416 event_notifier_set_handler(ack_notifier, NULL);
417 event_notifier_cleanup(ack_notifier);
418 g_free(sint_route->staged_msg);
419 }
420err_sint_set_notifier:
421 event_notifier_cleanup(&sint_route->sint_set_notifier);
422err:
423 g_free(sint_route);
424
425 return NULL;
426}
427
428void hyperv_sint_route_ref(HvSintRoute *sint_route)
429{
430 sint_route->refcount++;
431}
432
433void hyperv_sint_route_unref(HvSintRoute *sint_route)
434{
435 if (!sint_route) {
436 return;
437 }
438
439 assert(sint_route->refcount > 0);
440
441 if (--sint_route->refcount) {
442 return;
443 }
444
445 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
446 &sint_route->sint_set_notifier,
447 sint_route->gsi);
448 kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
449 if (sint_route->staged_msg) {
450 event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
451 event_notifier_cleanup(&sint_route->sint_ack_notifier);
452 g_free(sint_route->staged_msg);
453 }
454 event_notifier_cleanup(&sint_route->sint_set_notifier);
455 g_free(sint_route);
456}
457
458int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
459{
460 return event_notifier_set(&sint_route->sint_set_notifier);
461}
462
463typedef struct MsgHandler {
464 struct rcu_head rcu;
465 QLIST_ENTRY(MsgHandler) link;
466 uint32_t conn_id;
467 HvMsgHandler handler;
468 void *data;
469} MsgHandler;
470
471typedef struct EventFlagHandler {
472 struct rcu_head rcu;
473 QLIST_ENTRY(EventFlagHandler) link;
474 uint32_t conn_id;
475 EventNotifier *notifier;
476} EventFlagHandler;
477
478static QLIST_HEAD(, MsgHandler) msg_handlers;
479static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
480static QemuMutex handlers_mutex;
481
482static void __attribute__((constructor)) hv_init(void)
483{
484 QLIST_INIT(&msg_handlers);
485 QLIST_INIT(&event_flag_handlers);
486 qemu_mutex_init(&handlers_mutex);
487}
488
489int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
490{
491 int ret;
492 MsgHandler *mh;
493
494 qemu_mutex_lock(&handlers_mutex);
495 QLIST_FOREACH(mh, &msg_handlers, link) {
496 if (mh->conn_id == conn_id) {
497 if (handler) {
498 ret = -EEXIST;
499 } else {
500 QLIST_REMOVE_RCU(mh, link);
501 g_free_rcu(mh, rcu);
502 ret = 0;
503 }
504 goto unlock;
505 }
506 }
507
508 if (handler) {
509 mh = g_new(MsgHandler, 1);
510 mh->conn_id = conn_id;
511 mh->handler = handler;
512 mh->data = data;
513 QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
514 ret = 0;
515 } else {
516 ret = -ENOENT;
517 }
518unlock:
519 qemu_mutex_unlock(&handlers_mutex);
520 return ret;
521}
522
523uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
524{
525 uint16_t ret;
526 hwaddr len;
527 struct hyperv_post_message_input *msg;
528 MsgHandler *mh;
529
530 if (fast) {
531 return HV_STATUS_INVALID_HYPERCALL_CODE;
532 }
533 if (param & (__alignof__(*msg) - 1)) {
534 return HV_STATUS_INVALID_ALIGNMENT;
535 }
536
537 len = sizeof(*msg);
538 msg = cpu_physical_memory_map(param, &len, 0);
539 if (len < sizeof(*msg)) {
540 ret = HV_STATUS_INSUFFICIENT_MEMORY;
541 goto unmap;
542 }
543 if (msg->payload_size > sizeof(msg->payload)) {
544 ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
545 goto unmap;
546 }
547
548 ret = HV_STATUS_INVALID_CONNECTION_ID;
549 rcu_read_lock();
550 QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
551 if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
552 ret = mh->handler(msg, mh->data);
553 break;
554 }
555 }
556 rcu_read_unlock();
557
558unmap:
559 cpu_physical_memory_unmap(msg, len, 0, 0);
560 return ret;
561}
562
563static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
564{
565 int ret;
566 EventFlagHandler *handler;
567
568 qemu_mutex_lock(&handlers_mutex);
569 QLIST_FOREACH(handler, &event_flag_handlers, link) {
570 if (handler->conn_id == conn_id) {
571 if (notifier) {
572 ret = -EEXIST;
573 } else {
574 QLIST_REMOVE_RCU(handler, link);
575 g_free_rcu(handler, rcu);
576 ret = 0;
577 }
578 goto unlock;
579 }
580 }
581
582 if (notifier) {
583 handler = g_new(EventFlagHandler, 1);
584 handler->conn_id = conn_id;
585 handler->notifier = notifier;
586 QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
587 ret = 0;
588 } else {
589 ret = -ENOENT;
590 }
591unlock:
592 qemu_mutex_unlock(&handlers_mutex);
593 return ret;
594}
595
596static bool process_event_flags_userspace;
597
598int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
599{
600 if (!process_event_flags_userspace &&
601 !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
602 process_event_flags_userspace = true;
603
604 warn_report("Hyper-V event signaling is not supported by this kernel; "
605 "using slower userspace hypercall processing");
606 }
607
608 if (!process_event_flags_userspace) {
609 struct kvm_hyperv_eventfd hvevfd = {
610 .conn_id = conn_id,
611 .fd = notifier ? event_notifier_get_fd(notifier) : -1,
612 .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
613 };
614
615 return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
616 }
617 return set_event_flag_handler(conn_id, notifier);
618}
619
620uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
621{
622 uint16_t ret;
623 EventFlagHandler *handler;
624
625 if (unlikely(!fast)) {
626 hwaddr addr = param;
627
628 if (addr & (__alignof__(addr) - 1)) {
629 return HV_STATUS_INVALID_ALIGNMENT;
630 }
631
632 param = ldq_phys(&address_space_memory, addr);
633 }
634
635 /*
636 * Per spec, bits 32-47 contain the extra "flag number". However, we
637 * have no use for it, and in all known usecases it is zero, so just
638 * report lookup failure if it isn't.
639 */
640 if (param & 0xffff00000000ULL) {
641 return HV_STATUS_INVALID_PORT_ID;
642 }
643 /* remaining bits are reserved-zero */
644 if (param & ~HV_CONNECTION_ID_MASK) {
645 return HV_STATUS_INVALID_HYPERCALL_INPUT;
646 }
647
648 ret = HV_STATUS_INVALID_CONNECTION_ID;
649 rcu_read_lock();
650 QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
651 if (handler->conn_id == param) {
652 event_notifier_set(handler->notifier);
653 ret = 0;
654 break;
655 }
656 }
657 rcu_read_unlock();
658 return ret;
659}
660