1 | /* |
2 | * ioapic.c IOAPIC emulation logic |
3 | * |
4 | * Copyright (c) 2004-2005 Fabrice Bellard |
5 | * |
6 | * Split the ioapic logic from apic.c |
7 | * Xiantao Zhang <xiantao.zhang@intel.com> |
8 | * |
9 | * This library is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU Lesser General Public |
11 | * License as published by the Free Software Foundation; either |
12 | * version 2 of the License, or (at your option) any later version. |
13 | * |
14 | * This library is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | * Lesser General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU Lesser General Public |
20 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
21 | */ |
22 | |
23 | #include "qemu/osdep.h" |
24 | #include "qapi/error.h" |
25 | #include "monitor/monitor.h" |
26 | #include "hw/i386/pc.h" |
27 | #include "hw/i386/apic.h" |
28 | #include "hw/i386/ioapic.h" |
29 | #include "hw/i386/ioapic_internal.h" |
30 | #include "hw/pci/msi.h" |
31 | #include "hw/qdev-properties.h" |
32 | #include "sysemu/kvm.h" |
33 | #include "sysemu/sysemu.h" |
34 | #include "hw/i386/apic-msidef.h" |
35 | #include "hw/i386/x86-iommu.h" |
36 | #include "trace.h" |
37 | |
38 | #define APIC_DELIVERY_MODE_SHIFT 8 |
39 | #define APIC_POLARITY_SHIFT 14 |
40 | #define APIC_TRIG_MODE_SHIFT 15 |
41 | |
42 | static IOAPICCommonState *ioapics[MAX_IOAPICS]; |
43 | |
44 | /* global variable from ioapic_common.c */ |
45 | extern int ioapic_no; |
46 | |
47 | struct ioapic_entry_info { |
48 | /* fields parsed from IOAPIC entries */ |
49 | uint8_t masked; |
50 | uint8_t trig_mode; |
51 | uint16_t dest_idx; |
52 | uint8_t dest_mode; |
53 | uint8_t delivery_mode; |
54 | uint8_t vector; |
55 | |
56 | /* MSI message generated from above parsed fields */ |
57 | uint32_t addr; |
58 | uint32_t data; |
59 | }; |
60 | |
61 | static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info) |
62 | { |
63 | memset(info, 0, sizeof(*info)); |
64 | info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1; |
65 | info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1; |
66 | /* |
67 | * By default, this would be dest_id[8] + reserved[8]. When IR |
68 | * is enabled, this would be interrupt_index[15] + |
69 | * interrupt_format[1]. This field never means anything, but |
70 | * only used to generate corresponding MSI. |
71 | */ |
72 | info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff; |
73 | info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1; |
74 | info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \ |
75 | & IOAPIC_DM_MASK; |
76 | if (info->delivery_mode == IOAPIC_DM_EXTINT) { |
77 | info->vector = pic_read_irq(isa_pic); |
78 | } else { |
79 | info->vector = entry & IOAPIC_VECTOR_MASK; |
80 | } |
81 | |
82 | info->addr = APIC_DEFAULT_ADDRESS | \ |
83 | (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \ |
84 | (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT); |
85 | info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \ |
86 | (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \ |
87 | (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT); |
88 | } |
89 | |
90 | static void ioapic_service(IOAPICCommonState *s) |
91 | { |
92 | AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as; |
93 | struct ioapic_entry_info info; |
94 | uint8_t i; |
95 | uint32_t mask; |
96 | uint64_t entry; |
97 | |
98 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { |
99 | mask = 1 << i; |
100 | if (s->irr & mask) { |
101 | int coalesce = 0; |
102 | |
103 | entry = s->ioredtbl[i]; |
104 | ioapic_entry_parse(entry, &info); |
105 | if (!info.masked) { |
106 | if (info.trig_mode == IOAPIC_TRIGGER_EDGE) { |
107 | s->irr &= ~mask; |
108 | } else { |
109 | coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR; |
110 | trace_ioapic_set_remote_irr(i); |
111 | s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR; |
112 | } |
113 | |
114 | if (coalesce) { |
115 | /* We are level triggered interrupts, and the |
116 | * guest should be still working on previous one, |
117 | * so skip it. */ |
118 | continue; |
119 | } |
120 | |
121 | #ifdef CONFIG_KVM |
122 | if (kvm_irqchip_is_split()) { |
123 | if (info.trig_mode == IOAPIC_TRIGGER_EDGE) { |
124 | kvm_set_irq(kvm_state, i, 1); |
125 | kvm_set_irq(kvm_state, i, 0); |
126 | } else { |
127 | kvm_set_irq(kvm_state, i, 1); |
128 | } |
129 | continue; |
130 | } |
131 | #endif |
132 | |
133 | /* No matter whether IR is enabled, we translate |
134 | * the IOAPIC message into a MSI one, and its |
135 | * address space will decide whether we need a |
136 | * translation. */ |
137 | stl_le_phys(ioapic_as, info.addr, info.data); |
138 | } |
139 | } |
140 | } |
141 | } |
142 | |
143 | #define SUCCESSIVE_IRQ_MAX_COUNT 10000 |
144 | |
145 | static void delayed_ioapic_service_cb(void *opaque) |
146 | { |
147 | IOAPICCommonState *s = opaque; |
148 | |
149 | ioapic_service(s); |
150 | } |
151 | |
152 | static void ioapic_set_irq(void *opaque, int vector, int level) |
153 | { |
154 | IOAPICCommonState *s = opaque; |
155 | |
156 | /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps |
157 | * to GSI 2. GSI maps to ioapic 1-1. This is not |
158 | * the cleanest way of doing it but it should work. */ |
159 | |
160 | trace_ioapic_set_irq(vector, level); |
161 | ioapic_stat_update_irq(s, vector, level); |
162 | if (vector == 0) { |
163 | vector = 2; |
164 | } |
165 | if (vector < IOAPIC_NUM_PINS) { |
166 | uint32_t mask = 1 << vector; |
167 | uint64_t entry = s->ioredtbl[vector]; |
168 | |
169 | if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) == |
170 | IOAPIC_TRIGGER_LEVEL) { |
171 | /* level triggered */ |
172 | if (level) { |
173 | s->irr |= mask; |
174 | if (!(entry & IOAPIC_LVT_REMOTE_IRR)) { |
175 | ioapic_service(s); |
176 | } |
177 | } else { |
178 | s->irr &= ~mask; |
179 | } |
180 | } else { |
181 | /* According to the 82093AA manual, we must ignore edge requests |
182 | * if the input pin is masked. */ |
183 | if (level && !(entry & IOAPIC_LVT_MASKED)) { |
184 | s->irr |= mask; |
185 | ioapic_service(s); |
186 | } |
187 | } |
188 | } |
189 | } |
190 | |
191 | static void ioapic_update_kvm_routes(IOAPICCommonState *s) |
192 | { |
193 | #ifdef CONFIG_KVM |
194 | int i; |
195 | |
196 | if (kvm_irqchip_is_split()) { |
197 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { |
198 | MSIMessage msg; |
199 | struct ioapic_entry_info info; |
200 | ioapic_entry_parse(s->ioredtbl[i], &info); |
201 | if (!info.masked) { |
202 | msg.address = info.addr; |
203 | msg.data = info.data; |
204 | kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); |
205 | } |
206 | } |
207 | kvm_irqchip_commit_routes(kvm_state); |
208 | } |
209 | #endif |
210 | } |
211 | |
212 | #ifdef CONFIG_KVM |
213 | static void ioapic_iec_notifier(void *private, bool global, |
214 | uint32_t index, uint32_t mask) |
215 | { |
216 | IOAPICCommonState *s = (IOAPICCommonState *)private; |
217 | /* For simplicity, we just update all the routes */ |
218 | ioapic_update_kvm_routes(s); |
219 | } |
220 | #endif |
221 | |
222 | void ioapic_eoi_broadcast(int vector) |
223 | { |
224 | IOAPICCommonState *s; |
225 | uint64_t entry; |
226 | int i, n; |
227 | |
228 | trace_ioapic_eoi_broadcast(vector); |
229 | |
230 | for (i = 0; i < MAX_IOAPICS; i++) { |
231 | s = ioapics[i]; |
232 | if (!s) { |
233 | continue; |
234 | } |
235 | for (n = 0; n < IOAPIC_NUM_PINS; n++) { |
236 | entry = s->ioredtbl[n]; |
237 | |
238 | if ((entry & IOAPIC_VECTOR_MASK) != vector || |
239 | ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != IOAPIC_TRIGGER_LEVEL) { |
240 | continue; |
241 | } |
242 | |
243 | if (!(entry & IOAPIC_LVT_REMOTE_IRR)) { |
244 | continue; |
245 | } |
246 | |
247 | trace_ioapic_clear_remote_irr(n, vector); |
248 | s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR; |
249 | |
250 | if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) { |
251 | ++s->irq_eoi[n]; |
252 | if (s->irq_eoi[n] >= SUCCESSIVE_IRQ_MAX_COUNT) { |
253 | /* |
254 | * Real hardware does not deliver the interrupt immediately |
255 | * during eoi broadcast, and this lets a buggy guest make |
256 | * slow progress even if it does not correctly handle a |
257 | * level-triggered interrupt. Emulate this behavior if we |
258 | * detect an interrupt storm. |
259 | */ |
260 | s->irq_eoi[n] = 0; |
261 | timer_mod_anticipate(s->delayed_ioapic_service_timer, |
262 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + |
263 | NANOSECONDS_PER_SECOND / 100); |
264 | trace_ioapic_eoi_delayed_reassert(n); |
265 | } else { |
266 | ioapic_service(s); |
267 | } |
268 | } else { |
269 | s->irq_eoi[n] = 0; |
270 | } |
271 | } |
272 | } |
273 | } |
274 | |
275 | static uint64_t |
276 | ioapic_mem_read(void *opaque, hwaddr addr, unsigned int size) |
277 | { |
278 | IOAPICCommonState *s = opaque; |
279 | int index; |
280 | uint32_t val = 0; |
281 | |
282 | addr &= 0xff; |
283 | |
284 | switch (addr) { |
285 | case IOAPIC_IOREGSEL: |
286 | val = s->ioregsel; |
287 | break; |
288 | case IOAPIC_IOWIN: |
289 | if (size != 4) { |
290 | break; |
291 | } |
292 | switch (s->ioregsel) { |
293 | case IOAPIC_REG_ID: |
294 | case IOAPIC_REG_ARB: |
295 | val = s->id << IOAPIC_ID_SHIFT; |
296 | break; |
297 | case IOAPIC_REG_VER: |
298 | val = s->version | |
299 | ((IOAPIC_NUM_PINS - 1) << IOAPIC_VER_ENTRIES_SHIFT); |
300 | break; |
301 | default: |
302 | index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1; |
303 | if (index >= 0 && index < IOAPIC_NUM_PINS) { |
304 | if (s->ioregsel & 1) { |
305 | val = s->ioredtbl[index] >> 32; |
306 | } else { |
307 | val = s->ioredtbl[index] & 0xffffffff; |
308 | } |
309 | } |
310 | } |
311 | break; |
312 | } |
313 | |
314 | trace_ioapic_mem_read(addr, s->ioregsel, size, val); |
315 | |
316 | return val; |
317 | } |
318 | |
319 | /* |
320 | * This is to satisfy the hack in Linux kernel. One hack of it is to |
321 | * simulate clearing the Remote IRR bit of IOAPIC entry using the |
322 | * following: |
323 | * |
324 | * "For IO-APIC's with EOI register, we use that to do an explicit EOI. |
325 | * Otherwise, we simulate the EOI message manually by changing the trigger |
326 | * mode to edge and then back to level, with RTE being masked during |
327 | * this." |
328 | * |
329 | * (See linux kernel __eoi_ioapic_pin() comment in commit c0205701) |
330 | * |
331 | * This is based on the assumption that, Remote IRR bit will be |
332 | * cleared by IOAPIC hardware when configured as edge-triggered |
333 | * interrupts. |
334 | * |
335 | * Without this, level-triggered interrupts in IR mode might fail to |
336 | * work correctly. |
337 | */ |
338 | static inline void |
339 | ioapic_fix_edge_remote_irr(uint64_t *entry) |
340 | { |
341 | if (!(*entry & IOAPIC_LVT_TRIGGER_MODE)) { |
342 | /* Edge-triggered interrupts, make sure remote IRR is zero */ |
343 | *entry &= ~((uint64_t)IOAPIC_LVT_REMOTE_IRR); |
344 | } |
345 | } |
346 | |
347 | static void |
348 | ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val, |
349 | unsigned int size) |
350 | { |
351 | IOAPICCommonState *s = opaque; |
352 | int index; |
353 | |
354 | addr &= 0xff; |
355 | trace_ioapic_mem_write(addr, s->ioregsel, size, val); |
356 | |
357 | switch (addr) { |
358 | case IOAPIC_IOREGSEL: |
359 | s->ioregsel = val; |
360 | break; |
361 | case IOAPIC_IOWIN: |
362 | if (size != 4) { |
363 | break; |
364 | } |
365 | switch (s->ioregsel) { |
366 | case IOAPIC_REG_ID: |
367 | s->id = (val >> IOAPIC_ID_SHIFT) & IOAPIC_ID_MASK; |
368 | break; |
369 | case IOAPIC_REG_VER: |
370 | case IOAPIC_REG_ARB: |
371 | break; |
372 | default: |
373 | index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1; |
374 | if (index >= 0 && index < IOAPIC_NUM_PINS) { |
375 | uint64_t ro_bits = s->ioredtbl[index] & IOAPIC_RO_BITS; |
376 | if (s->ioregsel & 1) { |
377 | s->ioredtbl[index] &= 0xffffffff; |
378 | s->ioredtbl[index] |= (uint64_t)val << 32; |
379 | } else { |
380 | s->ioredtbl[index] &= ~0xffffffffULL; |
381 | s->ioredtbl[index] |= val; |
382 | } |
383 | /* restore RO bits */ |
384 | s->ioredtbl[index] &= IOAPIC_RW_BITS; |
385 | s->ioredtbl[index] |= ro_bits; |
386 | s->irq_eoi[index] = 0; |
387 | ioapic_fix_edge_remote_irr(&s->ioredtbl[index]); |
388 | ioapic_service(s); |
389 | } |
390 | } |
391 | break; |
392 | case IOAPIC_EOI: |
393 | /* Explicit EOI is only supported for IOAPIC version 0x20 */ |
394 | if (size != 4 || s->version != 0x20) { |
395 | break; |
396 | } |
397 | ioapic_eoi_broadcast(val); |
398 | break; |
399 | } |
400 | |
401 | ioapic_update_kvm_routes(s); |
402 | } |
403 | |
404 | static const MemoryRegionOps ioapic_io_ops = { |
405 | .read = ioapic_mem_read, |
406 | .write = ioapic_mem_write, |
407 | .endianness = DEVICE_NATIVE_ENDIAN, |
408 | }; |
409 | |
410 | static void ioapic_machine_done_notify(Notifier *notifier, void *data) |
411 | { |
412 | #ifdef CONFIG_KVM |
413 | IOAPICCommonState *s = container_of(notifier, IOAPICCommonState, |
414 | machine_done); |
415 | |
416 | if (kvm_irqchip_is_split()) { |
417 | X86IOMMUState *iommu = x86_iommu_get_default(); |
418 | if (iommu) { |
419 | /* Register this IOAPIC with IOMMU IEC notifier, so that |
420 | * when there are IR invalidates, we can be notified to |
421 | * update kernel IR cache. */ |
422 | x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s); |
423 | } |
424 | } |
425 | #endif |
426 | } |
427 | |
428 | #define IOAPIC_VER_DEF 0x20 |
429 | |
430 | static void ioapic_realize(DeviceState *dev, Error **errp) |
431 | { |
432 | IOAPICCommonState *s = IOAPIC_COMMON(dev); |
433 | |
434 | if (s->version != 0x11 && s->version != 0x20) { |
435 | error_setg(errp, "IOAPIC only supports version 0x11 or 0x20 " |
436 | "(default: 0x%x)." , IOAPIC_VER_DEF); |
437 | return; |
438 | } |
439 | |
440 | memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s, |
441 | "ioapic" , 0x1000); |
442 | |
443 | s->delayed_ioapic_service_timer = |
444 | timer_new_ns(QEMU_CLOCK_VIRTUAL, delayed_ioapic_service_cb, s); |
445 | |
446 | qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS); |
447 | |
448 | ioapics[ioapic_no] = s; |
449 | s->machine_done.notify = ioapic_machine_done_notify; |
450 | qemu_add_machine_init_done_notifier(&s->machine_done); |
451 | } |
452 | |
453 | static void ioapic_unrealize(DeviceState *dev, Error **errp) |
454 | { |
455 | IOAPICCommonState *s = IOAPIC_COMMON(dev); |
456 | |
457 | timer_del(s->delayed_ioapic_service_timer); |
458 | timer_free(s->delayed_ioapic_service_timer); |
459 | } |
460 | |
461 | static Property ioapic_properties[] = { |
462 | DEFINE_PROP_UINT8("version" , IOAPICCommonState, version, IOAPIC_VER_DEF), |
463 | DEFINE_PROP_END_OF_LIST(), |
464 | }; |
465 | |
466 | static void ioapic_class_init(ObjectClass *klass, void *data) |
467 | { |
468 | IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass); |
469 | DeviceClass *dc = DEVICE_CLASS(klass); |
470 | |
471 | k->realize = ioapic_realize; |
472 | k->unrealize = ioapic_unrealize; |
473 | /* |
474 | * If APIC is in kernel, we need to update the kernel cache after |
475 | * migration, otherwise first 24 gsi routes will be invalid. |
476 | */ |
477 | k->post_load = ioapic_update_kvm_routes; |
478 | dc->reset = ioapic_reset_common; |
479 | dc->props = ioapic_properties; |
480 | } |
481 | |
482 | static const TypeInfo ioapic_info = { |
483 | .name = TYPE_IOAPIC, |
484 | .parent = TYPE_IOAPIC_COMMON, |
485 | .instance_size = sizeof(IOAPICCommonState), |
486 | .class_init = ioapic_class_init, |
487 | }; |
488 | |
489 | static void ioapic_register_types(void) |
490 | { |
491 | type_register_static(&ioapic_info); |
492 | } |
493 | |
494 | type_init(ioapic_register_types) |
495 | |