1 | /* |
2 | * TPR optimization for 32-bit Windows guests (XP and Server 2003) |
3 | * |
4 | * Copyright (C) 2007-2008 Qumranet Technologies |
5 | * Copyright (C) 2012 Jan Kiszka, Siemens AG |
6 | * |
7 | * This work is licensed under the terms of the GNU GPL version 2, or |
8 | * (at your option) any later version. See the COPYING file in the |
9 | * top-level directory. |
10 | */ |
11 | |
12 | #include "qemu/osdep.h" |
13 | #include "qemu/module.h" |
14 | #include "cpu.h" |
15 | #include "sysemu/sysemu.h" |
16 | #include "sysemu/cpus.h" |
17 | #include "sysemu/hw_accel.h" |
18 | #include "sysemu/kvm.h" |
19 | #include "sysemu/runstate.h" |
20 | #include "hw/i386/apic_internal.h" |
21 | #include "hw/sysbus.h" |
22 | #include "hw/boards.h" |
23 | #include "migration/vmstate.h" |
24 | #include "tcg/tcg.h" |
25 | |
26 | #define VAPIC_IO_PORT 0x7e |
27 | |
28 | #define VAPIC_CPU_SHIFT 7 |
29 | |
30 | #define ROM_BLOCK_SIZE 512 |
31 | #define ROM_BLOCK_MASK (~(ROM_BLOCK_SIZE - 1)) |
32 | |
33 | typedef enum VAPICMode { |
34 | VAPIC_INACTIVE = 0, |
35 | VAPIC_ACTIVE = 1, |
36 | VAPIC_STANDBY = 2, |
37 | } VAPICMode; |
38 | |
39 | typedef struct VAPICHandlers { |
40 | uint32_t set_tpr; |
41 | uint32_t set_tpr_eax; |
42 | uint32_t get_tpr[8]; |
43 | uint32_t get_tpr_stack; |
44 | } QEMU_PACKED VAPICHandlers; |
45 | |
46 | typedef struct GuestROMState { |
47 | char signature[8]; |
48 | uint32_t vaddr; |
49 | uint32_t fixup_start; |
50 | uint32_t fixup_end; |
51 | uint32_t vapic_vaddr; |
52 | uint32_t vapic_size; |
53 | uint32_t vcpu_shift; |
54 | uint32_t real_tpr_addr; |
55 | VAPICHandlers up; |
56 | VAPICHandlers mp; |
57 | } QEMU_PACKED GuestROMState; |
58 | |
59 | typedef struct VAPICROMState { |
60 | SysBusDevice busdev; |
61 | MemoryRegion io; |
62 | MemoryRegion rom; |
63 | uint32_t state; |
64 | uint32_t rom_state_paddr; |
65 | uint32_t rom_state_vaddr; |
66 | uint32_t vapic_paddr; |
67 | uint32_t real_tpr_addr; |
68 | GuestROMState rom_state; |
69 | size_t rom_size; |
70 | bool rom_mapped_writable; |
71 | VMChangeStateEntry *vmsentry; |
72 | } VAPICROMState; |
73 | |
74 | #define TYPE_VAPIC "kvmvapic" |
75 | #define VAPIC(obj) OBJECT_CHECK(VAPICROMState, (obj), TYPE_VAPIC) |
76 | |
77 | #define TPR_INSTR_ABS_MODRM 0x1 |
78 | #define TPR_INSTR_MATCH_MODRM_REG 0x2 |
79 | |
80 | typedef struct TPRInstruction { |
81 | uint8_t opcode; |
82 | uint8_t modrm_reg; |
83 | unsigned int flags; |
84 | TPRAccess access; |
85 | size_t length; |
86 | off_t addr_offset; |
87 | } TPRInstruction; |
88 | |
89 | /* must be sorted by length, shortest first */ |
90 | static const TPRInstruction tpr_instr[] = { |
91 | { /* mov abs to eax */ |
92 | .opcode = 0xa1, |
93 | .access = TPR_ACCESS_READ, |
94 | .length = 5, |
95 | .addr_offset = 1, |
96 | }, |
97 | { /* mov eax to abs */ |
98 | .opcode = 0xa3, |
99 | .access = TPR_ACCESS_WRITE, |
100 | .length = 5, |
101 | .addr_offset = 1, |
102 | }, |
103 | { /* mov r32 to r/m32 */ |
104 | .opcode = 0x89, |
105 | .flags = TPR_INSTR_ABS_MODRM, |
106 | .access = TPR_ACCESS_WRITE, |
107 | .length = 6, |
108 | .addr_offset = 2, |
109 | }, |
110 | { /* mov r/m32 to r32 */ |
111 | .opcode = 0x8b, |
112 | .flags = TPR_INSTR_ABS_MODRM, |
113 | .access = TPR_ACCESS_READ, |
114 | .length = 6, |
115 | .addr_offset = 2, |
116 | }, |
117 | { /* push r/m32 */ |
118 | .opcode = 0xff, |
119 | .modrm_reg = 6, |
120 | .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG, |
121 | .access = TPR_ACCESS_READ, |
122 | .length = 6, |
123 | .addr_offset = 2, |
124 | }, |
125 | { /* mov imm32, r/m32 (c7/0) */ |
126 | .opcode = 0xc7, |
127 | .modrm_reg = 0, |
128 | .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG, |
129 | .access = TPR_ACCESS_WRITE, |
130 | .length = 10, |
131 | .addr_offset = 2, |
132 | }, |
133 | }; |
134 | |
135 | static void read_guest_rom_state(VAPICROMState *s) |
136 | { |
137 | cpu_physical_memory_read(s->rom_state_paddr, &s->rom_state, |
138 | sizeof(GuestROMState)); |
139 | } |
140 | |
141 | static void write_guest_rom_state(VAPICROMState *s) |
142 | { |
143 | cpu_physical_memory_write(s->rom_state_paddr, &s->rom_state, |
144 | sizeof(GuestROMState)); |
145 | } |
146 | |
147 | static void update_guest_rom_state(VAPICROMState *s) |
148 | { |
149 | read_guest_rom_state(s); |
150 | |
151 | s->rom_state.real_tpr_addr = cpu_to_le32(s->real_tpr_addr); |
152 | s->rom_state.vcpu_shift = cpu_to_le32(VAPIC_CPU_SHIFT); |
153 | |
154 | write_guest_rom_state(s); |
155 | } |
156 | |
157 | static int find_real_tpr_addr(VAPICROMState *s, CPUX86State *env) |
158 | { |
159 | CPUState *cs = env_cpu(env); |
160 | hwaddr paddr; |
161 | target_ulong addr; |
162 | |
163 | if (s->state == VAPIC_ACTIVE) { |
164 | return 0; |
165 | } |
166 | /* |
167 | * If there is no prior TPR access instruction we could analyze (which is |
168 | * the case after resume from hibernation), we need to scan the possible |
169 | * virtual address space for the APIC mapping. |
170 | */ |
171 | for (addr = 0xfffff000; addr >= 0x80000000; addr -= TARGET_PAGE_SIZE) { |
172 | paddr = cpu_get_phys_page_debug(cs, addr); |
173 | if (paddr != APIC_DEFAULT_ADDRESS) { |
174 | continue; |
175 | } |
176 | s->real_tpr_addr = addr + 0x80; |
177 | update_guest_rom_state(s); |
178 | return 0; |
179 | } |
180 | return -1; |
181 | } |
182 | |
183 | static uint8_t modrm_reg(uint8_t modrm) |
184 | { |
185 | return (modrm >> 3) & 7; |
186 | } |
187 | |
188 | static bool is_abs_modrm(uint8_t modrm) |
189 | { |
190 | return (modrm & 0xc7) == 0x05; |
191 | } |
192 | |
193 | static bool opcode_matches(uint8_t *opcode, const TPRInstruction *instr) |
194 | { |
195 | return opcode[0] == instr->opcode && |
196 | (!(instr->flags & TPR_INSTR_ABS_MODRM) || is_abs_modrm(opcode[1])) && |
197 | (!(instr->flags & TPR_INSTR_MATCH_MODRM_REG) || |
198 | modrm_reg(opcode[1]) == instr->modrm_reg); |
199 | } |
200 | |
201 | static int evaluate_tpr_instruction(VAPICROMState *s, X86CPU *cpu, |
202 | target_ulong *pip, TPRAccess access) |
203 | { |
204 | CPUState *cs = CPU(cpu); |
205 | const TPRInstruction *instr; |
206 | target_ulong ip = *pip; |
207 | uint8_t opcode[2]; |
208 | uint32_t real_tpr_addr; |
209 | int i; |
210 | |
211 | if ((ip & 0xf0000000ULL) != 0x80000000ULL && |
212 | (ip & 0xf0000000ULL) != 0xe0000000ULL) { |
213 | return -1; |
214 | } |
215 | |
216 | /* |
217 | * Early Windows 2003 SMP initialization contains a |
218 | * |
219 | * mov imm32, r/m32 |
220 | * |
221 | * instruction that is patched by TPR optimization. The problem is that |
222 | * RSP, used by the patched instruction, is zero, so the guest gets a |
223 | * double fault and dies. |
224 | */ |
225 | if (cpu->env.regs[R_ESP] == 0) { |
226 | return -1; |
227 | } |
228 | |
229 | if (kvm_enabled() && !kvm_irqchip_in_kernel()) { |
230 | /* |
231 | * KVM without kernel-based TPR access reporting will pass an IP that |
232 | * points after the accessing instruction. So we need to look backward |
233 | * to find the reason. |
234 | */ |
235 | for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) { |
236 | instr = &tpr_instr[i]; |
237 | if (instr->access != access) { |
238 | continue; |
239 | } |
240 | if (cpu_memory_rw_debug(cs, ip - instr->length, opcode, |
241 | sizeof(opcode), 0) < 0) { |
242 | return -1; |
243 | } |
244 | if (opcode_matches(opcode, instr)) { |
245 | ip -= instr->length; |
246 | goto instruction_ok; |
247 | } |
248 | } |
249 | return -1; |
250 | } else { |
251 | if (cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0) < 0) { |
252 | return -1; |
253 | } |
254 | for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) { |
255 | instr = &tpr_instr[i]; |
256 | if (opcode_matches(opcode, instr)) { |
257 | goto instruction_ok; |
258 | } |
259 | } |
260 | return -1; |
261 | } |
262 | |
263 | instruction_ok: |
264 | /* |
265 | * Grab the virtual TPR address from the instruction |
266 | * and update the cached values. |
267 | */ |
268 | if (cpu_memory_rw_debug(cs, ip + instr->addr_offset, |
269 | (void *)&real_tpr_addr, |
270 | sizeof(real_tpr_addr), 0) < 0) { |
271 | return -1; |
272 | } |
273 | real_tpr_addr = le32_to_cpu(real_tpr_addr); |
274 | if ((real_tpr_addr & 0xfff) != 0x80) { |
275 | return -1; |
276 | } |
277 | s->real_tpr_addr = real_tpr_addr; |
278 | update_guest_rom_state(s); |
279 | |
280 | *pip = ip; |
281 | return 0; |
282 | } |
283 | |
284 | static int update_rom_mapping(VAPICROMState *s, CPUX86State *env, target_ulong ip) |
285 | { |
286 | CPUState *cs = env_cpu(env); |
287 | hwaddr paddr; |
288 | uint32_t rom_state_vaddr; |
289 | uint32_t pos, patch, offset; |
290 | |
291 | /* nothing to do if already activated */ |
292 | if (s->state == VAPIC_ACTIVE) { |
293 | return 0; |
294 | } |
295 | |
296 | /* bail out if ROM init code was not executed (missing ROM?) */ |
297 | if (s->state == VAPIC_INACTIVE) { |
298 | return -1; |
299 | } |
300 | |
301 | /* find out virtual address of the ROM */ |
302 | rom_state_vaddr = s->rom_state_paddr + (ip & 0xf0000000); |
303 | paddr = cpu_get_phys_page_debug(cs, rom_state_vaddr); |
304 | if (paddr == -1) { |
305 | return -1; |
306 | } |
307 | paddr += rom_state_vaddr & ~TARGET_PAGE_MASK; |
308 | if (paddr != s->rom_state_paddr) { |
309 | return -1; |
310 | } |
311 | read_guest_rom_state(s); |
312 | if (memcmp(s->rom_state.signature, "kvm aPiC" , 8) != 0) { |
313 | return -1; |
314 | } |
315 | s->rom_state_vaddr = rom_state_vaddr; |
316 | |
317 | /* fixup addresses in ROM if needed */ |
318 | if (rom_state_vaddr == le32_to_cpu(s->rom_state.vaddr)) { |
319 | return 0; |
320 | } |
321 | for (pos = le32_to_cpu(s->rom_state.fixup_start); |
322 | pos < le32_to_cpu(s->rom_state.fixup_end); |
323 | pos += 4) { |
324 | cpu_physical_memory_read(paddr + pos - s->rom_state.vaddr, |
325 | &offset, sizeof(offset)); |
326 | offset = le32_to_cpu(offset); |
327 | cpu_physical_memory_read(paddr + offset, &patch, sizeof(patch)); |
328 | patch = le32_to_cpu(patch); |
329 | patch += rom_state_vaddr - le32_to_cpu(s->rom_state.vaddr); |
330 | patch = cpu_to_le32(patch); |
331 | cpu_physical_memory_write(paddr + offset, &patch, sizeof(patch)); |
332 | } |
333 | read_guest_rom_state(s); |
334 | s->vapic_paddr = paddr + le32_to_cpu(s->rom_state.vapic_vaddr) - |
335 | le32_to_cpu(s->rom_state.vaddr); |
336 | |
337 | return 0; |
338 | } |
339 | |
340 | /* |
341 | * Tries to read the unique processor number from the Kernel Processor Control |
342 | * Region (KPCR) of 32-bit Windows XP and Server 2003. Returns -1 if the KPCR |
343 | * cannot be accessed or is considered invalid. This also ensures that we are |
344 | * not patching the wrong guest. |
345 | */ |
346 | static int get_kpcr_number(X86CPU *cpu) |
347 | { |
348 | CPUX86State *env = &cpu->env; |
349 | struct kpcr { |
350 | uint8_t fill1[0x1c]; |
351 | uint32_t self; |
352 | uint8_t fill2[0x31]; |
353 | uint8_t number; |
354 | } QEMU_PACKED kpcr; |
355 | |
356 | if (cpu_memory_rw_debug(CPU(cpu), env->segs[R_FS].base, |
357 | (void *)&kpcr, sizeof(kpcr), 0) < 0 || |
358 | kpcr.self != env->segs[R_FS].base) { |
359 | return -1; |
360 | } |
361 | return kpcr.number; |
362 | } |
363 | |
364 | static int vapic_enable(VAPICROMState *s, X86CPU *cpu) |
365 | { |
366 | int cpu_number = get_kpcr_number(cpu); |
367 | hwaddr vapic_paddr; |
368 | static const uint8_t enabled = 1; |
369 | |
370 | if (cpu_number < 0) { |
371 | return -1; |
372 | } |
373 | vapic_paddr = s->vapic_paddr + |
374 | (((hwaddr)cpu_number) << VAPIC_CPU_SHIFT); |
375 | cpu_physical_memory_write(vapic_paddr + offsetof(VAPICState, enabled), |
376 | &enabled, sizeof(enabled)); |
377 | apic_enable_vapic(cpu->apic_state, vapic_paddr); |
378 | |
379 | s->state = VAPIC_ACTIVE; |
380 | |
381 | return 0; |
382 | } |
383 | |
384 | static void patch_byte(X86CPU *cpu, target_ulong addr, uint8_t byte) |
385 | { |
386 | cpu_memory_rw_debug(CPU(cpu), addr, &byte, 1, 1); |
387 | } |
388 | |
389 | static void patch_call(X86CPU *cpu, target_ulong ip, uint32_t target) |
390 | { |
391 | uint32_t offset; |
392 | |
393 | offset = cpu_to_le32(target - ip - 5); |
394 | patch_byte(cpu, ip, 0xe8); /* call near */ |
395 | cpu_memory_rw_debug(CPU(cpu), ip + 1, (void *)&offset, sizeof(offset), 1); |
396 | } |
397 | |
398 | typedef struct PatchInfo { |
399 | VAPICHandlers *handler; |
400 | target_ulong ip; |
401 | } PatchInfo; |
402 | |
403 | static void do_patch_instruction(CPUState *cs, run_on_cpu_data data) |
404 | { |
405 | X86CPU *x86_cpu = X86_CPU(cs); |
406 | PatchInfo *info = (PatchInfo *) data.host_ptr; |
407 | VAPICHandlers *handlers = info->handler; |
408 | target_ulong ip = info->ip; |
409 | uint8_t opcode[2]; |
410 | uint32_t imm32 = 0; |
411 | |
412 | cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0); |
413 | |
414 | switch (opcode[0]) { |
415 | case 0x89: /* mov r32 to r/m32 */ |
416 | patch_byte(x86_cpu, ip, 0x50 + modrm_reg(opcode[1])); /* push reg */ |
417 | patch_call(x86_cpu, ip + 1, handlers->set_tpr); |
418 | break; |
419 | case 0x8b: /* mov r/m32 to r32 */ |
420 | patch_byte(x86_cpu, ip, 0x90); |
421 | patch_call(x86_cpu, ip + 1, handlers->get_tpr[modrm_reg(opcode[1])]); |
422 | break; |
423 | case 0xa1: /* mov abs to eax */ |
424 | patch_call(x86_cpu, ip, handlers->get_tpr[0]); |
425 | break; |
426 | case 0xa3: /* mov eax to abs */ |
427 | patch_call(x86_cpu, ip, handlers->set_tpr_eax); |
428 | break; |
429 | case 0xc7: /* mov imm32, r/m32 (c7/0) */ |
430 | patch_byte(x86_cpu, ip, 0x68); /* push imm32 */ |
431 | cpu_memory_rw_debug(cs, ip + 6, (void *)&imm32, sizeof(imm32), 0); |
432 | cpu_memory_rw_debug(cs, ip + 1, (void *)&imm32, sizeof(imm32), 1); |
433 | patch_call(x86_cpu, ip + 5, handlers->set_tpr); |
434 | break; |
435 | case 0xff: /* push r/m32 */ |
436 | patch_byte(x86_cpu, ip, 0x50); /* push eax */ |
437 | patch_call(x86_cpu, ip + 1, handlers->get_tpr_stack); |
438 | break; |
439 | default: |
440 | abort(); |
441 | } |
442 | |
443 | g_free(info); |
444 | } |
445 | |
446 | static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip) |
447 | { |
448 | MachineState *ms = MACHINE(qdev_get_machine()); |
449 | CPUState *cs = CPU(cpu); |
450 | VAPICHandlers *handlers; |
451 | PatchInfo *info; |
452 | |
453 | if (ms->smp.cpus == 1) { |
454 | handlers = &s->rom_state.up; |
455 | } else { |
456 | handlers = &s->rom_state.mp; |
457 | } |
458 | |
459 | info = g_new(PatchInfo, 1); |
460 | info->handler = handlers; |
461 | info->ip = ip; |
462 | |
463 | async_safe_run_on_cpu(cs, do_patch_instruction, RUN_ON_CPU_HOST_PTR(info)); |
464 | } |
465 | |
466 | void vapic_report_tpr_access(DeviceState *dev, CPUState *cs, target_ulong ip, |
467 | TPRAccess access) |
468 | { |
469 | VAPICROMState *s = VAPIC(dev); |
470 | X86CPU *cpu = X86_CPU(cs); |
471 | CPUX86State *env = &cpu->env; |
472 | |
473 | cpu_synchronize_state(cs); |
474 | |
475 | if (evaluate_tpr_instruction(s, cpu, &ip, access) < 0) { |
476 | if (s->state == VAPIC_ACTIVE) { |
477 | vapic_enable(s, cpu); |
478 | } |
479 | return; |
480 | } |
481 | if (update_rom_mapping(s, env, ip) < 0) { |
482 | return; |
483 | } |
484 | if (vapic_enable(s, cpu) < 0) { |
485 | return; |
486 | } |
487 | patch_instruction(s, cpu, ip); |
488 | } |
489 | |
490 | typedef struct VAPICEnableTPRReporting { |
491 | DeviceState *apic; |
492 | bool enable; |
493 | } VAPICEnableTPRReporting; |
494 | |
495 | static void vapic_do_enable_tpr_reporting(CPUState *cpu, run_on_cpu_data data) |
496 | { |
497 | VAPICEnableTPRReporting *info = data.host_ptr; |
498 | apic_enable_tpr_access_reporting(info->apic, info->enable); |
499 | } |
500 | |
501 | static void vapic_enable_tpr_reporting(bool enable) |
502 | { |
503 | VAPICEnableTPRReporting info = { |
504 | .enable = enable, |
505 | }; |
506 | CPUState *cs; |
507 | X86CPU *cpu; |
508 | |
509 | CPU_FOREACH(cs) { |
510 | cpu = X86_CPU(cs); |
511 | info.apic = cpu->apic_state; |
512 | run_on_cpu(cs, vapic_do_enable_tpr_reporting, RUN_ON_CPU_HOST_PTR(&info)); |
513 | } |
514 | } |
515 | |
516 | static void vapic_reset(DeviceState *dev) |
517 | { |
518 | VAPICROMState *s = VAPIC(dev); |
519 | |
520 | s->state = VAPIC_INACTIVE; |
521 | s->rom_state_paddr = 0; |
522 | vapic_enable_tpr_reporting(false); |
523 | } |
524 | |
525 | /* |
526 | * Set the IRQ polling hypercalls to the supported variant: |
527 | * - vmcall if using KVM in-kernel irqchip |
528 | * - 32-bit VAPIC port write otherwise |
529 | */ |
530 | static int patch_hypercalls(VAPICROMState *s) |
531 | { |
532 | hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK; |
533 | static const uint8_t vmcall_pattern[] = { /* vmcall */ |
534 | 0xb8, 0x1, 0, 0, 0, 0xf, 0x1, 0xc1 |
535 | }; |
536 | static const uint8_t outl_pattern[] = { /* nop; outl %eax,0x7e */ |
537 | 0xb8, 0x1, 0, 0, 0, 0x90, 0xe7, 0x7e |
538 | }; |
539 | uint8_t alternates[2]; |
540 | const uint8_t *pattern; |
541 | const uint8_t *patch; |
542 | off_t pos; |
543 | uint8_t *rom; |
544 | |
545 | rom = g_malloc(s->rom_size); |
546 | cpu_physical_memory_read(rom_paddr, rom, s->rom_size); |
547 | |
548 | for (pos = 0; pos < s->rom_size - sizeof(vmcall_pattern); pos++) { |
549 | if (kvm_irqchip_in_kernel()) { |
550 | pattern = outl_pattern; |
551 | alternates[0] = outl_pattern[7]; |
552 | alternates[1] = outl_pattern[7]; |
553 | patch = &vmcall_pattern[5]; |
554 | } else { |
555 | pattern = vmcall_pattern; |
556 | alternates[0] = vmcall_pattern[7]; |
557 | alternates[1] = 0xd9; /* AMD's VMMCALL */ |
558 | patch = &outl_pattern[5]; |
559 | } |
560 | if (memcmp(rom + pos, pattern, 7) == 0 && |
561 | (rom[pos + 7] == alternates[0] || rom[pos + 7] == alternates[1])) { |
562 | cpu_physical_memory_write(rom_paddr + pos + 5, patch, 3); |
563 | /* |
564 | * Don't flush the tb here. Under ordinary conditions, the patched |
565 | * calls are miles away from the current IP. Under malicious |
566 | * conditions, the guest could trick us to crash. |
567 | */ |
568 | } |
569 | } |
570 | |
571 | g_free(rom); |
572 | return 0; |
573 | } |
574 | |
575 | /* |
576 | * For TCG mode or the time KVM honors read-only memory regions, we need to |
577 | * enable write access to the option ROM so that variables can be updated by |
578 | * the guest. |
579 | */ |
580 | static int vapic_map_rom_writable(VAPICROMState *s) |
581 | { |
582 | hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK; |
583 | MemoryRegionSection section; |
584 | MemoryRegion *as; |
585 | size_t rom_size; |
586 | uint8_t *ram; |
587 | |
588 | as = sysbus_address_space(&s->busdev); |
589 | |
590 | if (s->rom_mapped_writable) { |
591 | memory_region_del_subregion(as, &s->rom); |
592 | object_unparent(OBJECT(&s->rom)); |
593 | } |
594 | |
595 | /* grab RAM memory region (region @rom_paddr may still be pc.rom) */ |
596 | section = memory_region_find(as, 0, 1); |
597 | |
598 | /* read ROM size from RAM region */ |
599 | if (rom_paddr + 2 >= memory_region_size(section.mr)) { |
600 | return -1; |
601 | } |
602 | ram = memory_region_get_ram_ptr(section.mr); |
603 | rom_size = ram[rom_paddr + 2] * ROM_BLOCK_SIZE; |
604 | if (rom_size == 0) { |
605 | return -1; |
606 | } |
607 | s->rom_size = rom_size; |
608 | |
609 | /* We need to round to avoid creating subpages |
610 | * from which we cannot run code. */ |
611 | rom_size += rom_paddr & ~TARGET_PAGE_MASK; |
612 | rom_paddr &= TARGET_PAGE_MASK; |
613 | rom_size = TARGET_PAGE_ALIGN(rom_size); |
614 | |
615 | memory_region_init_alias(&s->rom, OBJECT(s), "kvmvapic-rom" , section.mr, |
616 | rom_paddr, rom_size); |
617 | memory_region_add_subregion_overlap(as, rom_paddr, &s->rom, 1000); |
618 | s->rom_mapped_writable = true; |
619 | memory_region_unref(section.mr); |
620 | |
621 | return 0; |
622 | } |
623 | |
624 | static int vapic_prepare(VAPICROMState *s) |
625 | { |
626 | if (vapic_map_rom_writable(s) < 0) { |
627 | return -1; |
628 | } |
629 | |
630 | if (patch_hypercalls(s) < 0) { |
631 | return -1; |
632 | } |
633 | |
634 | vapic_enable_tpr_reporting(true); |
635 | |
636 | return 0; |
637 | } |
638 | |
639 | static void vapic_write(void *opaque, hwaddr addr, uint64_t data, |
640 | unsigned int size) |
641 | { |
642 | VAPICROMState *s = opaque; |
643 | X86CPU *cpu; |
644 | CPUX86State *env; |
645 | hwaddr rom_paddr; |
646 | |
647 | if (!current_cpu) { |
648 | return; |
649 | } |
650 | |
651 | cpu_synchronize_state(current_cpu); |
652 | cpu = X86_CPU(current_cpu); |
653 | env = &cpu->env; |
654 | |
655 | /* |
656 | * The VAPIC supports two PIO-based hypercalls, both via port 0x7E. |
657 | * o 16-bit write access: |
658 | * Reports the option ROM initialization to the hypervisor. Written |
659 | * value is the offset of the state structure in the ROM. |
660 | * o 8-bit write access: |
661 | * Reactivates the VAPIC after a guest hibernation, i.e. after the |
662 | * option ROM content has been re-initialized by a guest power cycle. |
663 | * o 32-bit write access: |
664 | * Poll for pending IRQs, considering the current VAPIC state. |
665 | */ |
666 | switch (size) { |
667 | case 2: |
668 | if (s->state == VAPIC_INACTIVE) { |
669 | rom_paddr = (env->segs[R_CS].base + env->eip) & ROM_BLOCK_MASK; |
670 | s->rom_state_paddr = rom_paddr + data; |
671 | |
672 | s->state = VAPIC_STANDBY; |
673 | } |
674 | if (vapic_prepare(s) < 0) { |
675 | s->state = VAPIC_INACTIVE; |
676 | s->rom_state_paddr = 0; |
677 | break; |
678 | } |
679 | break; |
680 | case 1: |
681 | if (kvm_enabled()) { |
682 | /* |
683 | * Disable triggering instruction in ROM by writing a NOP. |
684 | * |
685 | * We cannot do this in TCG mode as the reported IP is not |
686 | * accurate. |
687 | */ |
688 | pause_all_vcpus(); |
689 | patch_byte(cpu, env->eip - 2, 0x66); |
690 | patch_byte(cpu, env->eip - 1, 0x90); |
691 | resume_all_vcpus(); |
692 | } |
693 | |
694 | if (s->state == VAPIC_ACTIVE) { |
695 | break; |
696 | } |
697 | if (update_rom_mapping(s, env, env->eip) < 0) { |
698 | break; |
699 | } |
700 | if (find_real_tpr_addr(s, env) < 0) { |
701 | break; |
702 | } |
703 | vapic_enable(s, cpu); |
704 | break; |
705 | default: |
706 | case 4: |
707 | if (!kvm_irqchip_in_kernel()) { |
708 | apic_poll_irq(cpu->apic_state); |
709 | } |
710 | break; |
711 | } |
712 | } |
713 | |
714 | static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size) |
715 | { |
716 | return 0xffffffff; |
717 | } |
718 | |
719 | static const MemoryRegionOps vapic_ops = { |
720 | .write = vapic_write, |
721 | .read = vapic_read, |
722 | .endianness = DEVICE_NATIVE_ENDIAN, |
723 | }; |
724 | |
725 | static void vapic_realize(DeviceState *dev, Error **errp) |
726 | { |
727 | SysBusDevice *sbd = SYS_BUS_DEVICE(dev); |
728 | VAPICROMState *s = VAPIC(dev); |
729 | |
730 | memory_region_init_io(&s->io, OBJECT(s), &vapic_ops, s, "kvmvapic" , 2); |
731 | sysbus_add_io(sbd, VAPIC_IO_PORT, &s->io); |
732 | sysbus_init_ioports(sbd, VAPIC_IO_PORT, 2); |
733 | |
734 | option_rom[nb_option_roms].name = "kvmvapic.bin" ; |
735 | option_rom[nb_option_roms].bootindex = -1; |
736 | nb_option_roms++; |
737 | } |
738 | |
739 | static void do_vapic_enable(CPUState *cs, run_on_cpu_data data) |
740 | { |
741 | VAPICROMState *s = data.host_ptr; |
742 | X86CPU *cpu = X86_CPU(cs); |
743 | |
744 | static const uint8_t enabled = 1; |
745 | cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled), |
746 | &enabled, sizeof(enabled)); |
747 | apic_enable_vapic(cpu->apic_state, s->vapic_paddr); |
748 | s->state = VAPIC_ACTIVE; |
749 | } |
750 | |
751 | static void kvmvapic_vm_state_change(void *opaque, int running, |
752 | RunState state) |
753 | { |
754 | MachineState *ms = MACHINE(qdev_get_machine()); |
755 | VAPICROMState *s = opaque; |
756 | uint8_t *zero; |
757 | |
758 | if (!running) { |
759 | return; |
760 | } |
761 | |
762 | if (s->state == VAPIC_ACTIVE) { |
763 | if (ms->smp.cpus == 1) { |
764 | run_on_cpu(first_cpu, do_vapic_enable, RUN_ON_CPU_HOST_PTR(s)); |
765 | } else { |
766 | zero = g_malloc0(s->rom_state.vapic_size); |
767 | cpu_physical_memory_write(s->vapic_paddr, zero, |
768 | s->rom_state.vapic_size); |
769 | g_free(zero); |
770 | } |
771 | } |
772 | |
773 | qemu_del_vm_change_state_handler(s->vmsentry); |
774 | s->vmsentry = NULL; |
775 | } |
776 | |
777 | static int vapic_post_load(void *opaque, int version_id) |
778 | { |
779 | VAPICROMState *s = opaque; |
780 | |
781 | /* |
782 | * The old implementation of qemu-kvm did not provide the state |
783 | * VAPIC_STANDBY. Reconstruct it. |
784 | */ |
785 | if (s->state == VAPIC_INACTIVE && s->rom_state_paddr != 0) { |
786 | s->state = VAPIC_STANDBY; |
787 | } |
788 | |
789 | if (s->state != VAPIC_INACTIVE) { |
790 | if (vapic_prepare(s) < 0) { |
791 | return -1; |
792 | } |
793 | } |
794 | |
795 | if (!s->vmsentry) { |
796 | s->vmsentry = |
797 | qemu_add_vm_change_state_handler(kvmvapic_vm_state_change, s); |
798 | } |
799 | return 0; |
800 | } |
801 | |
802 | static const VMStateDescription vmstate_handlers = { |
803 | .name = "kvmvapic-handlers" , |
804 | .version_id = 1, |
805 | .minimum_version_id = 1, |
806 | .fields = (VMStateField[]) { |
807 | VMSTATE_UINT32(set_tpr, VAPICHandlers), |
808 | VMSTATE_UINT32(set_tpr_eax, VAPICHandlers), |
809 | VMSTATE_UINT32_ARRAY(get_tpr, VAPICHandlers, 8), |
810 | VMSTATE_UINT32(get_tpr_stack, VAPICHandlers), |
811 | VMSTATE_END_OF_LIST() |
812 | } |
813 | }; |
814 | |
815 | static const VMStateDescription vmstate_guest_rom = { |
816 | .name = "kvmvapic-guest-rom" , |
817 | .version_id = 1, |
818 | .minimum_version_id = 1, |
819 | .fields = (VMStateField[]) { |
820 | VMSTATE_UNUSED(8), /* signature */ |
821 | VMSTATE_UINT32(vaddr, GuestROMState), |
822 | VMSTATE_UINT32(fixup_start, GuestROMState), |
823 | VMSTATE_UINT32(fixup_end, GuestROMState), |
824 | VMSTATE_UINT32(vapic_vaddr, GuestROMState), |
825 | VMSTATE_UINT32(vapic_size, GuestROMState), |
826 | VMSTATE_UINT32(vcpu_shift, GuestROMState), |
827 | VMSTATE_UINT32(real_tpr_addr, GuestROMState), |
828 | VMSTATE_STRUCT(up, GuestROMState, 0, vmstate_handlers, VAPICHandlers), |
829 | VMSTATE_STRUCT(mp, GuestROMState, 0, vmstate_handlers, VAPICHandlers), |
830 | VMSTATE_END_OF_LIST() |
831 | } |
832 | }; |
833 | |
834 | static const VMStateDescription vmstate_vapic = { |
835 | .name = "kvm-tpr-opt" , /* compatible with qemu-kvm VAPIC */ |
836 | .version_id = 1, |
837 | .minimum_version_id = 1, |
838 | .post_load = vapic_post_load, |
839 | .fields = (VMStateField[]) { |
840 | VMSTATE_STRUCT(rom_state, VAPICROMState, 0, vmstate_guest_rom, |
841 | GuestROMState), |
842 | VMSTATE_UINT32(state, VAPICROMState), |
843 | VMSTATE_UINT32(real_tpr_addr, VAPICROMState), |
844 | VMSTATE_UINT32(rom_state_vaddr, VAPICROMState), |
845 | VMSTATE_UINT32(vapic_paddr, VAPICROMState), |
846 | VMSTATE_UINT32(rom_state_paddr, VAPICROMState), |
847 | VMSTATE_END_OF_LIST() |
848 | } |
849 | }; |
850 | |
851 | static void vapic_class_init(ObjectClass *klass, void *data) |
852 | { |
853 | DeviceClass *dc = DEVICE_CLASS(klass); |
854 | |
855 | dc->reset = vapic_reset; |
856 | dc->vmsd = &vmstate_vapic; |
857 | dc->realize = vapic_realize; |
858 | } |
859 | |
860 | static const TypeInfo vapic_type = { |
861 | .name = TYPE_VAPIC, |
862 | .parent = TYPE_SYS_BUS_DEVICE, |
863 | .instance_size = sizeof(VAPICROMState), |
864 | .class_init = vapic_class_init, |
865 | }; |
866 | |
867 | static void vapic_register(void) |
868 | { |
869 | type_register_static(&vapic_type); |
870 | } |
871 | |
872 | type_init(vapic_register); |
873 | |