cpus.c source code [qemu/cpus.c]

1	/*
2	* QEMU System Emulator
3	*
4	* Copyright (c) 2003-2008 Fabrice Bellard
5	*
6	* Permission is hereby granted, free of charge, to any person obtaining a copy
7	* of this software and associated documentation files (the "Software"), to deal
8	* in the Software without restriction, including without limitation the rights
9	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10	* copies of the Software, and to permit persons to whom the Software is
11	* furnished to do so, subject to the following conditions:
12	*
13	* The above copyright notice and this permission notice shall be included in
14	* all copies or substantial portions of the Software.
15	*
16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22	* THE SOFTWARE.
23	*/
24
25	#include "qemu/osdep.h"
26	#include "qemu-common.h"
27	#include "qemu/config-file.h"
28	#include "migration/vmstate.h"
29	#include "monitor/monitor.h"
30	#include "qapi/error.h"
31	#include "qapi/qapi-commands-misc.h"
32	#include "qapi/qapi-events-run-state.h"
33	#include "qapi/qmp/qerror.h"
34	#include "qemu/error-report.h"
35	#include "qemu/qemu-print.h"
36	#include "sysemu/tcg.h"
37	#include "sysemu/block-backend.h"
38	#include "exec/gdbstub.h"
39	#include "sysemu/dma.h"
40	#include "sysemu/hw_accel.h"
41	#include "sysemu/kvm.h"
42	#include "sysemu/hax.h"
43	#include "sysemu/hvf.h"
44	#include "sysemu/whpx.h"
45	#include "exec/exec-all.h"
46
47	#include "qemu/thread.h"
48	#include "sysemu/cpus.h"
49	#include "sysemu/qtest.h"
50	#include "qemu/main-loop.h"
51	#include "qemu/option.h"
52	#include "qemu/bitmap.h"
53	#include "qemu/seqlock.h"
54	#include "qemu/guest-random.h"
55	#include "tcg.h"
56	#include "hw/nmi.h"
57	#include "sysemu/replay.h"
58	#include "sysemu/runstate.h"
59	#include "hw/boards.h"
60	#include "hw/hw.h"
61
62	#ifdef CONFIG_LINUX
63
64	#include <sys/prctl.h>
65
66	#ifndef PR_MCE_KILL
67	#define PR_MCE_KILL 33
68	#endif
69
70	#ifndef PR_MCE_KILL_SET
71	#define PR_MCE_KILL_SET 1
72	#endif
73
74	#ifndef PR_MCE_KILL_EARLY
75	#define PR_MCE_KILL_EARLY 1
76	#endif
77
78	#endif /* CONFIG_LINUX */
79
80	int64_t max_delay;
81	int64_t max_advance;
82
83	/ vcpu throttling controls /
84	static QEMUTimer *throttle_timer;
85	static unsigned int throttle_percentage;
86
87	#define CPU_THROTTLE_PCT_MIN 1
88	#define CPU_THROTTLE_PCT_MAX 99
89	#define CPU_THROTTLE_TIMESLICE_NS 10000000
90
91	bool cpu_is_stopped(CPUState *cpu)
92	{
93	return cpu->stopped \|\| !runstate_is_running();
94	}
95
96	static bool cpu_thread_is_idle(CPUState *cpu)
97	{
98	if (cpu->stop \|\| cpu->queued_work_first) {
99	return false;
100	}
101	if (cpu_is_stopped(cpu)) {
102	return true;
103	}
104	if (!cpu->halted \|\| cpu_has_work(cpu) \|\|
105	kvm_halt_in_kernel()) {
106	return false;
107	}
108	return true;
109	}
110
111	static bool all_cpu_threads_idle(void)
112	{
113	CPUState *cpu;
114
115	CPU_FOREACH(cpu) {
116	if (!cpu_thread_is_idle(cpu)) {
117	return false;
118	}
119	}
120	return true;
121	}
122
123	/*********************************************************/
124	/ guest cycle counter /
125
126	/ Protected by TimersState seqlock /
127
128	static bool icount_sleep = true;
129	/ Arbitrarily pick 1MIPS as the minimum allowable speed. /
130	#define MAX_ICOUNT_SHIFT 10
131
132	typedef struct TimersState {
133	/ Protected by BQL. /
134	int64_t cpu_ticks_prev;
135	int64_t cpu_ticks_offset;
136
137	/ Protect fields that can be respectively read outside the*
138	* BQL, and written from multiple threads.
139	*/
140	QemuSeqLock vm_clock_seqlock;
141	QemuSpin vm_clock_lock;
142
143	int16_t cpu_ticks_enabled;
144
145	/ Conversion factor from emulated instructions to virtual clock ticks. /
146	int16_t icount_time_shift;
147
148	/ Compensate for varying guest execution speed. /
149	int64_t qemu_icount_bias;
150
151	int64_t vm_clock_warp_start;
152	int64_t cpu_clock_offset;
153
154	/ Only written by TCG thread /
155	int64_t qemu_icount;
156
157	/ for adjusting icount /
158	QEMUTimer *icount_rt_timer;
159	QEMUTimer *icount_vm_timer;
160	QEMUTimer *icount_warp_timer;
161	} TimersState;
162
163	static TimersState timers_state;
164	bool mttcg_enabled;
165
166	/*
167	* We default to false if we know other options have been enabled
168	* which are currently incompatible with MTTCG. Otherwise when each
169	* guest (target) has been updated to support:
170	* - atomic instructions
171	* - memory ordering primitives (barriers)
172	* they can set the appropriate CONFIG flags in ${target}-softmmu.mak
173	*
174	* Once a guest architecture has been converted to the new primitives
175	* there are two remaining limitations to check.
176	*
177	* - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
178	* - The host must have a stronger memory order than the guest
179	*
180	* It may be possible in future to support strong guests on weak hosts
181	* but that will require tagging all load/stores in a guest with their
182	* implicit memory order requirements which would likely slow things
183	* down a lot.
184	*/
185
186	static bool check_tcg_memory_orders_compatible(void)
187	{
188	#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
189	return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == `0`;
190	#else
191	return false;
192	#endif
193	}
194
195	static bool default_mttcg_enabled(void)
196	{
197	if (use_icount \|\| TCG_OVERSIZED_GUEST) {
198	return false;
199	} else {
200	#ifdef TARGET_SUPPORTS_MTTCG
201	return check_tcg_memory_orders_compatible();
202	#else
203	return false;
204	#endif
205	}
206	}
207
208	void qemu_tcg_configure(QemuOpts opts, Error *errp)
209	{
210	const char *t = qemu_opt_get(opts, "thread");
211	if (t) {
212	if (strcmp(t, "multi") == `0`) {
213	if (TCG_OVERSIZED_GUEST) {
214	error_setg(errp, "No MTTCG when guest word size > hosts");
215	} else if (use_icount) {
216	error_setg(errp, "No MTTCG when icount is enabled");
217	} else {
218	#ifndef TARGET_SUPPORTS_MTTCG
219	warn_report("Guest not yet converted to MTTCG - "
220	"you may get unexpected results");
221	#endif
222	if (!check_tcg_memory_orders_compatible()) {
223	warn_report("Guest expects a stronger memory ordering "
224	"than the host provides");
225	error_printf("This may cause strange/hard to debug errors\n");
226	}
227	mttcg_enabled = true;
228	}
229	} else if (strcmp(t, "single") == `0`) {
230	mttcg_enabled = false;
231	} else {
232	error_setg(errp, "Invalid 'thread' setting %s", t);
233	}
234	} else {
235	mttcg_enabled = default_mttcg_enabled();
236	}
237	}
238
239	/ The current number of executed instructions is based on what we*
240	* originally budgeted minus the current state of the decrementing
241	* icount counters in extra/u16.low.
242	*/
243	static int64_t cpu_get_icount_executed(CPUState *cpu)
244	{
245	return (cpu->icount_budget -
246	(cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
247	}
248
249	/*
250	* Update the global shared timer_state.qemu_icount to take into
251	* account executed instructions. This is done by the TCG vCPU
252	* thread so the main-loop can see time has moved forward.
253	*/
254	static void cpu_update_icount_locked(CPUState *cpu)
255	{
256	int64_t executed = cpu_get_icount_executed(cpu);
257	cpu->icount_budget -= executed;
258
259	atomic_set_i64(&timers_state.qemu_icount,
260	timers_state.qemu_icount + executed);
261	}
262
263	/*
264	* Update the global shared timer_state.qemu_icount to take into
265	* account executed instructions. This is done by the TCG vCPU
266	* thread so the main-loop can see time has moved forward.
267	*/
268	void cpu_update_icount(CPUState *cpu)
269	{
270	seqlock_write_lock(&timers_state.vm_clock_seqlock,
271	&timers_state.vm_clock_lock);
272	cpu_update_icount_locked(cpu);
273	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
274	&timers_state.vm_clock_lock);
275	}
276
277	static int64_t cpu_get_icount_raw_locked(void)
278	{
279	CPUState *cpu = current_cpu;
280
281	if (cpu && cpu->running) {
282	if (!cpu->can_do_io) {
283	error_report("Bad icount read");
284	exit(`1`);
285	}
286	/ Take into account what has run /
287	cpu_update_icount_locked(cpu);
288	}
289	/ The read is protected by the seqlock, but needs atomic64 to avoid UB /
290	return atomic_read_i64(&timers_state.qemu_icount);
291	}
292
293	static int64_t cpu_get_icount_locked(void)
294	{
295	int64_t icount = cpu_get_icount_raw_locked();
296	return atomic_read_i64(&timers_state.qemu_icount_bias) +
297	cpu_icount_to_ns(icount);
298	}
299
300	int64_t cpu_get_icount_raw(void)
301	{
302	int64_t icount;
303	unsigned start;
304
305	do {
306	start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
307	icount = cpu_get_icount_raw_locked();
308	} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
309
310	return icount;
311	}
312
313	/ Return the virtual CPU time, based on the instruction counter. /
314	int64_t cpu_get_icount(void)
315	{
316	int64_t icount;
317	unsigned start;
318
319	do {
320	start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
321	icount = cpu_get_icount_locked();
322	} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
323
324	return icount;
325	}
326
327	int64_t cpu_icount_to_ns(int64_t icount)
328	{
329	return icount << atomic_read(&timers_state.icount_time_shift);
330	}
331
332	static int64_t cpu_get_ticks_locked(void)
333	{
334	int64_t ticks = timers_state.cpu_ticks_offset;
335	if (timers_state.cpu_ticks_enabled) {
336	ticks += cpu_get_host_ticks();
337	}
338
339	if (timers_state.cpu_ticks_prev > ticks) {
340	/ Non increasing ticks may happen if the host uses software suspend. /
341	timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
342	ticks = timers_state.cpu_ticks_prev;
343	}
344
345	timers_state.cpu_ticks_prev = ticks;
346	return ticks;
347	}
348
349	/ return the time elapsed in VM between vm_start and vm_stop. Unless*
350	* icount is active, cpu_get_ticks() uses units of the host CPU cycle
351	* counter.
352	*/
353	int64_t cpu_get_ticks(void)
354	{
355	int64_t ticks;
356
357	if (use_icount) {
358	return cpu_get_icount();
359	}
360
361	qemu_spin_lock(&timers_state.vm_clock_lock);
362	ticks = cpu_get_ticks_locked();
363	qemu_spin_unlock(&timers_state.vm_clock_lock);
364	return ticks;
365	}
366
367	static int64_t cpu_get_clock_locked(void)
368	{
369	int64_t time;
370
371	time = timers_state.cpu_clock_offset;
372	if (timers_state.cpu_ticks_enabled) {
373	time += get_clock();
374	}
375
376	return time;
377	}
378
379	/ Return the monotonic time elapsed in VM, i.e.,*
380	* the time between vm_start and vm_stop
381	*/
382	int64_t cpu_get_clock(void)
383	{
384	int64_t ti;
385	unsigned start;
386
387	do {
388	start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
389	ti = cpu_get_clock_locked();
390	} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
391
392	return ti;
393	}
394
395	/ enable cpu_get_ticks()*
396	* Caller must hold BQL which serves as mutex for vm_clock_seqlock.
397	*/
398	void cpu_enable_ticks(void)
399	{
400	seqlock_write_lock(&timers_state.vm_clock_seqlock,
401	&timers_state.vm_clock_lock);
402	if (!timers_state.cpu_ticks_enabled) {
403	timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
404	timers_state.cpu_clock_offset -= get_clock();
405	timers_state.cpu_ticks_enabled = `1`;
406	}
407	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
408	&timers_state.vm_clock_lock);
409	}
410
411	/ disable cpu_get_ticks() : the clock is stopped. You must not call*
412	* cpu_get_ticks() after that.
413	* Caller must hold BQL which serves as mutex for vm_clock_seqlock.
414	*/
415	void cpu_disable_ticks(void)
416	{
417	seqlock_write_lock(&timers_state.vm_clock_seqlock,
418	&timers_state.vm_clock_lock);
419	if (timers_state.cpu_ticks_enabled) {
420	timers_state.cpu_ticks_offset += cpu_get_host_ticks();
421	timers_state.cpu_clock_offset = cpu_get_clock_locked();
422	timers_state.cpu_ticks_enabled = `0`;
423	}
424	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
425	&timers_state.vm_clock_lock);
426	}
427
428	/ Correlation between real and virtual time is always going to be*
429	fairly approximate, so ignore small variation.
430	When the guest is idle real and virtual time will be aligned in
431	the IO wait loop. /*
432	#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
433
434	static void icount_adjust(void)
435	{
436	int64_t cur_time;
437	int64_t cur_icount;
438	int64_t delta;
439
440	/ Protected by TimersState mutex. /
441	static int64_t last_delta;
442
443	/ If the VM is not running, then do nothing. /
444	if (!runstate_is_running()) {
445	return;
446	}
447
448	seqlock_write_lock(&timers_state.vm_clock_seqlock,
449	&timers_state.vm_clock_lock);
450	cur_time = cpu_get_clock_locked();
451	cur_icount = cpu_get_icount_locked();
452
453	delta = cur_icount - cur_time;
454	/ FIXME: This is a very crude algorithm, somewhat prone to oscillation. /
455	if (delta > `0`
456	&& last_delta + ICOUNT_WOBBLE < delta * `2`
457	&& timers_state.icount_time_shift > `0`) {
458	/ The guest is getting too far ahead. Slow time down. /
459	atomic_set(&timers_state.icount_time_shift,
460	timers_state.icount_time_shift - `1`);
461	}
462	if (delta < `0`
463	&& last_delta - ICOUNT_WOBBLE > delta * `2`
464	&& timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
465	/ The guest is getting too far behind. Speed time up. /
466	atomic_set(&timers_state.icount_time_shift,
467	timers_state.icount_time_shift + `1`);
468	}
469	last_delta = delta;
470	atomic_set_i64(&timers_state.qemu_icount_bias,
471	cur_icount - (timers_state.qemu_icount
472	<< timers_state.icount_time_shift));
473	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
474	&timers_state.vm_clock_lock);
475	}
476
477	static void icount_adjust_rt(void *opaque)
478	{
479	timer_mod(timers_state.icount_rt_timer,
480	qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + `1000`);
481	icount_adjust();
482	}
483
484	static void icount_adjust_vm(void *opaque)
485	{
486	timer_mod(timers_state.icount_vm_timer,
487	qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
488	NANOSECONDS_PER_SECOND / `10`);
489	icount_adjust();
490	}
491
492	static int64_t qemu_icount_round(int64_t count)
493	{
494	int shift = atomic_read(&timers_state.icount_time_shift);
495	return (count + (`1` << shift) - `1`) >> shift;
496	}
497
498	static void icount_warp_rt(void)
499	{
500	unsigned seq;
501	int64_t warp_start;
502
503	/ The icount_warp_timer is rescheduled soon after vm_clock_warp_start*
504	* changes from -1 to another value, so the race here is okay.
505	*/
506	do {
507	seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
508	warp_start = timers_state.vm_clock_warp_start;
509	} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
510
511	if (warp_start == -`1`) {
512	return;
513	}
514
515	seqlock_write_lock(&timers_state.vm_clock_seqlock,
516	&timers_state.vm_clock_lock);
517	if (runstate_is_running()) {
518	int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
519	cpu_get_clock_locked());
520	int64_t warp_delta;
521
522	warp_delta = clock - timers_state.vm_clock_warp_start;
523	if (use_icount == `2`) {
524	/*
525	* In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
526	* far ahead of real time.
527	*/
528	int64_t cur_icount = cpu_get_icount_locked();
529	int64_t delta = clock - cur_icount;
530	warp_delta = MIN(warp_delta, delta);
531	}
532	atomic_set_i64(&timers_state.qemu_icount_bias,
533	timers_state.qemu_icount_bias + warp_delta);
534	}
535	timers_state.vm_clock_warp_start = -`1`;
536	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
537	&timers_state.vm_clock_lock);
538
539	if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
540	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
541	}
542	}
543
544	static void icount_timer_cb(void *opaque)
545	{
546	/ No need for a checkpoint because the timer already synchronizes*
547	* with CHECKPOINT_CLOCK_VIRTUAL_RT.
548	*/
549	icount_warp_rt();
550	}
551
552	void qtest_clock_warp(int64_t dest)
553	{
554	int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
555	AioContext *aio_context;
556	assert(qtest_enabled());
557	aio_context = qemu_get_aio_context();
558	while (clock < dest) {
559	int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
560	QEMU_TIMER_ATTR_ALL);
561	int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
562
563	seqlock_write_lock(&timers_state.vm_clock_seqlock,
564	&timers_state.vm_clock_lock);
565	atomic_set_i64(&timers_state.qemu_icount_bias,
566	timers_state.qemu_icount_bias + warp);
567	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
568	&timers_state.vm_clock_lock);
569
570	qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
571	timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
572	clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
573	}
574	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
575	}
576
577	void qemu_start_warp_timer(void)
578	{
579	int64_t clock;
580	int64_t deadline;
581
582	if (!use_icount) {
583	return;
584	}
585
586	/ Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers*
587	* do not fire, so computing the deadline does not make sense.
588	*/
589	if (!runstate_is_running()) {
590	return;
591	}
592
593	if (replay_mode != REPLAY_MODE_PLAY) {
594	if (!all_cpu_threads_idle()) {
595	return;
596	}
597
598	if (qtest_enabled()) {
599	/ When testing, qtest commands advance icount. /
600	return;
601	}
602
603	replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
604	} else {
605	/ warp clock deterministically in record/replay mode /
606	if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
607	/ vCPU is sleeping and warp can't be started.*
608	It is probably a race condition: notification sent
609	to vCPU was processed in advance and vCPU went to sleep.
610	Therefore we have to wake it up for doing someting. /*
611	if (replay_has_checkpoint()) {
612	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
613	}
614	return;
615	}
616	}
617
618	/ We want to use the earliest deadline from ALL vm_clocks /
619	clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
620	deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
621	~QEMU_TIMER_ATTR_EXTERNAL);
622	if (deadline < `0`) {
623	static bool notified;
624	if (!icount_sleep && !notified) {
625	warn_report("icount sleep disabled and no active timers");
626	notified = true;
627	}
628	return;
629	}
630
631	if (deadline > `0`) {
632	/*
633	* Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
634	* sleep. Otherwise, the CPU might be waiting for a future timer
635	* interrupt to wake it up, but the interrupt never comes because
636	* the vCPU isn't running any insns and thus doesn't advance the
637	* QEMU_CLOCK_VIRTUAL.
638	*/
639	if (!icount_sleep) {
640	/*
641	* We never let VCPUs sleep in no sleep icount mode.
642	* If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
643	* to the next QEMU_CLOCK_VIRTUAL event and notify it.
644	* It is useful when we want a deterministic execution time,
645	* isolated from host latencies.
646	*/
647	seqlock_write_lock(&timers_state.vm_clock_seqlock,
648	&timers_state.vm_clock_lock);
649	atomic_set_i64(&timers_state.qemu_icount_bias,
650	timers_state.qemu_icount_bias + deadline);
651	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
652	&timers_state.vm_clock_lock);
653	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
654	} else {
655	/*
656	* We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
657	* "real" time, (related to the time left until the next event) has
658	* passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
659	* This avoids that the warps are visible externally; for example,
660	* you will not be sending network packets continuously instead of
661	* every 100ms.
662	*/
663	seqlock_write_lock(&timers_state.vm_clock_seqlock,
664	&timers_state.vm_clock_lock);
665	if (timers_state.vm_clock_warp_start == -`1`
666	\|\| timers_state.vm_clock_warp_start > clock) {
667	timers_state.vm_clock_warp_start = clock;
668	}
669	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
670	&timers_state.vm_clock_lock);
671	timer_mod_anticipate(timers_state.icount_warp_timer,
672	clock + deadline);
673	}
674	} else if (deadline == `0`) {
675	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
676	}
677	}
678
679	static void qemu_account_warp_timer(void)
680	{
681	if (!use_icount \|\| !icount_sleep) {
682	return;
683	}
684
685	/ Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers*
686	* do not fire, so computing the deadline does not make sense.
687	*/
688	if (!runstate_is_running()) {
689	return;
690	}
691
692	/ warp clock deterministically in record/replay mode /
693	if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
694	return;
695	}
696
697	timer_del(timers_state.icount_warp_timer);
698	icount_warp_rt();
699	}
700
701	static bool icount_state_needed(void *opaque)
702	{
703	return use_icount;
704	}
705
706	static bool warp_timer_state_needed(void *opaque)
707	{
708	TimersState *s = opaque;
709	return s->icount_warp_timer != NULL;
710	}
711
712	static bool adjust_timers_state_needed(void *opaque)
713	{
714	TimersState *s = opaque;
715	return s->icount_rt_timer != NULL;
716	}
717
718	/*
719	* Subsection for warp timer migration is optional, because may not be created
720	*/
721	static const VMStateDescription icount_vmstate_warp_timer = {
722	.name = "timer/icount/warp_timer",
723	.version_id = `1`,
724	.minimum_version_id = `1`,
725	.needed = warp_timer_state_needed,
726	.fields = (VMStateField[]) {
727	VMSTATE_INT64(vm_clock_warp_start, TimersState),
728	VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
729	VMSTATE_END_OF_LIST()
730	}
731	};
732
733	static const VMStateDescription icount_vmstate_adjust_timers = {
734	.name = "timer/icount/timers",
735	.version_id = `1`,
736	.minimum_version_id = `1`,
737	.needed = adjust_timers_state_needed,
738	.fields = (VMStateField[]) {
739	VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
740	VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
741	VMSTATE_END_OF_LIST()
742	}
743	};
744
745	/*
746	* This is a subsection for icount migration.
747	*/
748	static const VMStateDescription icount_vmstate_timers = {
749	.name = "timer/icount",
750	.version_id = `1`,
751	.minimum_version_id = `1`,
752	.needed = icount_state_needed,
753	.fields = (VMStateField[]) {
754	VMSTATE_INT64(qemu_icount_bias, TimersState),
755	VMSTATE_INT64(qemu_icount, TimersState),
756	VMSTATE_END_OF_LIST()
757	},
758	.subsections = (const VMStateDescription*[]) {
759	&icount_vmstate_warp_timer,
760	&icount_vmstate_adjust_timers,
761	NULL
762	}
763	};
764
765	static const VMStateDescription vmstate_timers = {
766	.name = "timer",
767	.version_id = `2`,
768	.minimum_version_id = `1`,
769	.fields = (VMStateField[]) {
770	VMSTATE_INT64(cpu_ticks_offset, TimersState),
771	VMSTATE_UNUSED(`8`),
772	VMSTATE_INT64_V(cpu_clock_offset, TimersState, `2`),
773	VMSTATE_END_OF_LIST()
774	},
775	.subsections = (const VMStateDescription*[]) {
776	&icount_vmstate_timers,
777	NULL
778	}
779	};
780
781	static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
782	{
783	double pct;
784	double throttle_ratio;
785	long sleeptime_ns;
786
787	if (!cpu_throttle_get_percentage()) {
788	return;
789	}
790
791	pct = (double)cpu_throttle_get_percentage()/`100`;
792	throttle_ratio = pct / (`1` - pct);
793	sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
794
795	qemu_mutex_unlock_iothread();
796	g_usleep(sleeptime_ns / `1000`); / Convert ns to us for usleep call /
797	qemu_mutex_lock_iothread();
798	atomic_set(&cpu->throttle_thread_scheduled, `0`);
799	}
800
801	static void cpu_throttle_timer_tick(void *opaque)
802	{
803	CPUState *cpu;
804	double pct;
805
806	/ Stop the timer if needed /
807	if (!cpu_throttle_get_percentage()) {
808	return;
809	}
810	CPU_FOREACH(cpu) {
811	if (!atomic_xchg(&cpu->throttle_thread_scheduled, `1`)) {
812	async_run_on_cpu(cpu, cpu_throttle_thread,
813	RUN_ON_CPU_NULL);
814	}
815	}
816
817	pct = (double)cpu_throttle_get_percentage()/`100`;
818	timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
819	CPU_THROTTLE_TIMESLICE_NS / (`1`-pct));
820	}
821
822	void cpu_throttle_set(int new_throttle_pct)
823	{
824	/ Ensure throttle percentage is within valid range /
825	new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
826	new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
827
828	atomic_set(&throttle_percentage, new_throttle_pct);
829
830	timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
831	CPU_THROTTLE_TIMESLICE_NS);
832	}
833
834	void cpu_throttle_stop(void)
835	{
836	atomic_set(&throttle_percentage, `0`);
837	}
838
839	bool cpu_throttle_active(void)
840	{
841	return (cpu_throttle_get_percentage() != `0`);
842	}
843
844	int cpu_throttle_get_percentage(void)
845	{
846	return atomic_read(&throttle_percentage);
847	}
848
849	void cpu_ticks_init(void)
850	{
851	seqlock_init(&timers_state.vm_clock_seqlock);
852	qemu_spin_init(&timers_state.vm_clock_lock);
853	vmstate_register(NULL, `0`, &vmstate_timers, &timers_state);
854	throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
855	cpu_throttle_timer_tick, NULL);
856	}
857
858	void configure_icount(QemuOpts opts, Error *errp)
859	{
860	const char *option;
861	char *rem_str = NULL;
862
863	option = qemu_opt_get(opts, "shift");
864	if (!option) {
865	if (qemu_opt_get(opts, "align") != NULL) {
866	error_setg(errp, "Please specify shift option when using align");
867	}
868	return;
869	}
870
871	icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
872	if (icount_sleep) {
873	timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
874	icount_timer_cb, NULL);
875	}
876
877	icount_align_option = qemu_opt_get_bool(opts, "align", false);
878
879	if (icount_align_option && !icount_sleep) {
880	error_setg(errp, "align=on and sleep=off are incompatible");
881	}
882	if (strcmp(option, "auto") != `0`) {
883	errno = `0`;
884	timers_state.icount_time_shift = strtol(option, &rem_str, `0`);
885	if (errno != `0` \|\| *rem_str != `'\0'` \|\| !strlen(option)) {
886	error_setg(errp, "icount: Invalid shift value");
887	}
888	use_icount = `1`;
889	return;
890	} else if (icount_align_option) {
891	error_setg(errp, "shift=auto and align=on are incompatible");
892	} else if (!icount_sleep) {
893	error_setg(errp, "shift=auto and sleep=off are incompatible");
894	}
895
896	use_icount = `2`;
897
898	/ 125MIPS seems a reasonable initial guess at the guest speed.*
899	It will be corrected fairly quickly anyway. /*
900	timers_state.icount_time_shift = `3`;
901
902	/ Have both realtime and virtual time triggers for speed adjustment.*
903	The realtime trigger catches emulated time passing too slowly,
904	the virtual time trigger catches emulated time passing too fast.
905	Realtime triggers occur even when idle, so use them less frequently
906	than VM triggers. /*
907	timers_state.vm_clock_warp_start = -`1`;
908	timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
909	icount_adjust_rt, NULL);
910	timer_mod(timers_state.icount_rt_timer,
911	qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + `1000`);
912	timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
913	icount_adjust_vm, NULL);
914	timer_mod(timers_state.icount_vm_timer,
915	qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
916	NANOSECONDS_PER_SECOND / `10`);
917	}
918
919	/*********************************************************/
920	/ TCG vCPU kick timer*
921	*
922	* The kick timer is responsible for moving single threaded vCPU
923	* emulation on to the next vCPU. If more than one vCPU is running a
924	* timer event with force a cpu->exit so the next vCPU can get
925	* scheduled.
926	*
927	* The timer is removed if all vCPUs are idle and restarted again once
928	* idleness is complete.
929	*/
930
931	static QEMUTimer *tcg_kick_vcpu_timer;
932	static CPUState *tcg_current_rr_cpu;
933
934	#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
935
936	static inline int64_t qemu_tcg_next_kick(void)
937	{
938	return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
939	}
940
941	/ Kick the currently round-robin scheduled vCPU /
942	static void qemu_cpu_kick_rr_cpu(void)
943	{
944	CPUState *cpu;
945	do {
946	cpu = atomic_mb_read(&tcg_current_rr_cpu);
947	if (cpu) {
948	cpu_exit(cpu);
949	}
950	} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
951	}
952
953	static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
954	{
955	}
956
957	void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
958	{
959	if (!use_icount \|\| type != QEMU_CLOCK_VIRTUAL) {
960	qemu_notify_event();
961	return;
962	}
963
964	if (qemu_in_vcpu_thread()) {
965	/ A CPU is currently running; kick it back out to the*
966	* tcg_cpu_exec() loop so it will recalculate its
967	* icount deadline immediately.
968	*/
969	qemu_cpu_kick(current_cpu);
970	} else if (first_cpu) {
971	/ qemu_cpu_kick is not enough to kick a halted CPU out of*
972	* qemu_tcg_wait_io_event. async_run_on_cpu, instead,
973	* causes cpu_thread_is_idle to return false. This way,
974	* handle_icount_deadline can run.
975	* If we have no CPUs at all for some reason, we don't
976	* need to do anything.
977	*/
978	async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
979	}
980	}
981
982	static void kick_tcg_thread(void *opaque)
983	{
984	timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
985	qemu_cpu_kick_rr_cpu();
986	}
987
988	static void start_tcg_kick_timer(void)
989	{
990	assert(!mttcg_enabled);
991	if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
992	tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
993	kick_tcg_thread, NULL);
994	}
995	if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
996	timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
997	}
998	}
999
1000	static void stop_tcg_kick_timer(void)
1001	{
1002	assert(!mttcg_enabled);
1003	if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
1004	timer_del(tcg_kick_vcpu_timer);
1005	}
1006	}
1007
1008	/*********************************************************/
1009	void hw_error(const char *fmt, ...)
1010	{
1011	va_list ap;
1012	CPUState *cpu;
1013
1014	va_start(ap, fmt);
1015	fprintf(stderr, "qemu: hardware error: ");
1016	vfprintf(stderr, fmt, ap);
1017	fprintf(stderr, "\n");
1018	CPU_FOREACH(cpu) {
1019	fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
1020	cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
1021	}
1022	va_end(ap);
1023	abort();
1024	}
1025
1026	void cpu_synchronize_all_states(void)
1027	{
1028	CPUState *cpu;
1029
1030	CPU_FOREACH(cpu) {
1031	cpu_synchronize_state(cpu);
1032	/ TODO: move to cpu_synchronize_state() /
1033	if (hvf_enabled()) {
1034	hvf_cpu_synchronize_state(cpu);
1035	}
1036	}
1037	}
1038
1039	void cpu_synchronize_all_post_reset(void)
1040	{
1041	CPUState *cpu;
1042
1043	CPU_FOREACH(cpu) {
1044	cpu_synchronize_post_reset(cpu);
1045	/ TODO: move to cpu_synchronize_post_reset() /
1046	if (hvf_enabled()) {
1047	hvf_cpu_synchronize_post_reset(cpu);
1048	}
1049	}
1050	}
1051
1052	void cpu_synchronize_all_post_init(void)
1053	{
1054	CPUState *cpu;
1055
1056	CPU_FOREACH(cpu) {
1057	cpu_synchronize_post_init(cpu);
1058	/ TODO: move to cpu_synchronize_post_init() /
1059	if (hvf_enabled()) {
1060	hvf_cpu_synchronize_post_init(cpu);
1061	}
1062	}
1063	}
1064
1065	void cpu_synchronize_all_pre_loadvm(void)
1066	{
1067	CPUState *cpu;
1068
1069	CPU_FOREACH(cpu) {
1070	cpu_synchronize_pre_loadvm(cpu);
1071	}
1072	}
1073
1074	static int do_vm_stop(RunState state, bool send_stop)
1075	{
1076	int ret = `0`;
1077
1078	if (runstate_is_running()) {
1079	cpu_disable_ticks();
1080	pause_all_vcpus();
1081	runstate_set(state);
1082	vm_state_notify(`0`, state);
1083	if (send_stop) {
1084	qapi_event_send_stop();
1085	}
1086	}
1087
1088	bdrv_drain_all();
1089	replay_disable_events();
1090	ret = bdrv_flush_all();
1091
1092	return ret;
1093	}
1094
1095	/ Special vm_stop() variant for terminating the process. Historically clients*
1096	* did not expect a QMP STOP event and so we need to retain compatibility.
1097	*/
1098	int vm_shutdown(void)
1099	{
1100	return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1101	}
1102
1103	static bool cpu_can_run(CPUState *cpu)
1104	{
1105	if (cpu->stop) {
1106	return false;
1107	}
1108	if (cpu_is_stopped(cpu)) {
1109	return false;
1110	}
1111	return true;
1112	}
1113
1114	static void cpu_handle_guest_debug(CPUState *cpu)
1115	{
1116	gdb_set_stop_cpu(cpu);
1117	qemu_system_debug_request();
1118	cpu->stopped = true;
1119	}
1120
1121	#ifdef CONFIG_LINUX
1122	static void sigbus_reraise(void)
1123	{
1124	sigset_t set;
1125	struct sigaction action;
1126
1127	memset(&action, `0`, sizeof(action));
1128	action.sa_handler = SIG_DFL;
1129	if (!sigaction(SIGBUS, &action, NULL)) {
1130	raise(SIGBUS);
1131	sigemptyset(&set);
1132	sigaddset(&set, SIGBUS);
1133	pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1134	}
1135	perror("Failed to re-raise SIGBUS!\n");
1136	abort();
1137	}
1138
1139	static void sigbus_handler(int n, siginfo_t siginfo, void* *ctx)
1140	{
1141	if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1142	sigbus_reraise();
1143	}
1144
1145	if (current_cpu) {
1146	/ Called asynchronously in VCPU thread. /
1147	if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1148	sigbus_reraise();
1149	}
1150	} else {
1151	/ Called synchronously (via signalfd) in main thread. /
1152	if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1153	sigbus_reraise();
1154	}
1155	}
1156	}
1157
1158	static void qemu_init_sigbus(void)
1159	{
1160	struct sigaction action;
1161
1162	memset(&action, `0`, sizeof(action));
1163	action.sa_flags = SA_SIGINFO;
1164	action.sa_sigaction = sigbus_handler;
1165	sigaction(SIGBUS, &action, NULL);
1166
1167	prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, `0`, `0`);
1168	}
1169	#else /* !CONFIG_LINUX */
1170	static void qemu_init_sigbus(void)
1171	{
1172	}
1173	#endif /* !CONFIG_LINUX */
1174
1175	static QemuMutex qemu_global_mutex;
1176
1177	static QemuThread io_thread;
1178
1179	/ cpu creation /
1180	static QemuCond qemu_cpu_cond;
1181	/ system init /
1182	static QemuCond qemu_pause_cond;
1183
1184	void qemu_init_cpu_loop(void)
1185	{
1186	qemu_init_sigbus();
1187	qemu_cond_init(&qemu_cpu_cond);
1188	qemu_cond_init(&qemu_pause_cond);
1189	qemu_mutex_init(&qemu_global_mutex);
1190
1191	qemu_thread_get_self(&io_thread);
1192	}
1193
1194	void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1195	{
1196	do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1197	}
1198
1199	static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1200	{
1201	if (kvm_destroy_vcpu(cpu) < `0`) {
1202	error_report("kvm_destroy_vcpu failed");
1203	exit(EXIT_FAILURE);
1204	}
1205	}
1206
1207	static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1208	{
1209	}
1210
1211	static void qemu_cpu_stop(CPUState *cpu, bool exit)
1212	{
1213	g_assert(qemu_cpu_is_self(cpu));
1214	cpu->stop = false;
1215	cpu->stopped = true;
1216	if (exit) {
1217	cpu_exit(cpu);
1218	}
1219	qemu_cond_broadcast(&qemu_pause_cond);
1220	}
1221
1222	static void qemu_wait_io_event_common(CPUState *cpu)
1223	{
1224	atomic_mb_set(&cpu->thread_kicked, false);
1225	if (cpu->stop) {
1226	qemu_cpu_stop(cpu, false);
1227	}
1228	process_queued_cpu_work(cpu);
1229	}
1230
1231	static void qemu_tcg_rr_wait_io_event(void)
1232	{
1233	CPUState *cpu;
1234
1235	while (all_cpu_threads_idle()) {
1236	stop_tcg_kick_timer();
1237	qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1238	}
1239
1240	start_tcg_kick_timer();
1241
1242	CPU_FOREACH(cpu) {
1243	qemu_wait_io_event_common(cpu);
1244	}
1245	}
1246
1247	static void qemu_wait_io_event(CPUState *cpu)
1248	{
1249	while (cpu_thread_is_idle(cpu)) {
1250	qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1251	}
1252
1253	#ifdef _WIN32
1254	/ Eat dummy APC queued by qemu_cpu_kick_thread. /
1255	if (!tcg_enabled()) {
1256	SleepEx(`0`, TRUE);
1257	}
1258	#endif
1259	qemu_wait_io_event_common(cpu);
1260	}
1261
1262	static void qemu_kvm_cpu_thread_fn(void* *arg)
1263	{
1264	CPUState *cpu = arg;
1265	int r;
1266
1267	rcu_register_thread();
1268
1269	qemu_mutex_lock_iothread();
1270	qemu_thread_get_self(cpu->thread);
1271	cpu->thread_id = qemu_get_thread_id();
1272	cpu->can_do_io = `1`;
1273	current_cpu = cpu;
1274
1275	r = kvm_init_vcpu(cpu);
1276	if (r < `0`) {
1277	error_report("kvm_init_vcpu failed: %s", strerror(-r));
1278	exit(`1`);
1279	}
1280
1281	kvm_init_cpu_signals(cpu);
1282
1283	/ signal CPU creation /
1284	cpu->created = true;
1285	qemu_cond_signal(&qemu_cpu_cond);
1286	qemu_guest_random_seed_thread_part2(cpu->random_seed);
1287
1288	do {
1289	if (cpu_can_run(cpu)) {
1290	r = kvm_cpu_exec(cpu);
1291	if (r == EXCP_DEBUG) {
1292	cpu_handle_guest_debug(cpu);
1293	}
1294	}
1295	qemu_wait_io_event(cpu);
1296	} while (!cpu->unplug \|\| cpu_can_run(cpu));
1297
1298	qemu_kvm_destroy_vcpu(cpu);
1299	cpu->created = false;
1300	qemu_cond_signal(&qemu_cpu_cond);
1301	qemu_mutex_unlock_iothread();
1302	rcu_unregister_thread();
1303	return NULL;
1304	}
1305
1306	static void qemu_dummy_cpu_thread_fn(void* *arg)
1307	{
1308	#ifdef _WIN32
1309	error_report("qtest is not supported under Windows");
1310	exit(`1`);
1311	#else
1312	CPUState *cpu = arg;
1313	sigset_t waitset;
1314	int r;
1315
1316	rcu_register_thread();
1317
1318	qemu_mutex_lock_iothread();
1319	qemu_thread_get_self(cpu->thread);
1320	cpu->thread_id = qemu_get_thread_id();
1321	cpu->can_do_io = `1`;
1322	current_cpu = cpu;
1323
1324	sigemptyset(&waitset);
1325	sigaddset(&waitset, SIG_IPI);
1326
1327	/ signal CPU creation /
1328	cpu->created = true;
1329	qemu_cond_signal(&qemu_cpu_cond);
1330	qemu_guest_random_seed_thread_part2(cpu->random_seed);
1331
1332	do {
1333	qemu_mutex_unlock_iothread();
1334	do {
1335	int sig;
1336	r = sigwait(&waitset, &sig);
1337	} while (r == -`1` && (errno == EAGAIN \|\| errno == EINTR));
1338	if (r == -`1`) {
1339	perror("sigwait");
1340	exit(`1`);
1341	}
1342	qemu_mutex_lock_iothread();
1343	qemu_wait_io_event(cpu);
1344	} while (!cpu->unplug);
1345
1346	qemu_mutex_unlock_iothread();
1347	rcu_unregister_thread();
1348	return NULL;
1349	#endif
1350	}
1351
1352	static int64_t tcg_get_icount_limit(void)
1353	{
1354	int64_t deadline;
1355
1356	if (replay_mode != REPLAY_MODE_PLAY) {
1357	/*
1358	* Include all the timers, because they may need an attention.
1359	* Too long CPU execution may create unnecessary delay in UI.
1360	*/
1361	deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1362	QEMU_TIMER_ATTR_ALL);
1363
1364	/ Maintain prior (possibly buggy) behaviour where if no deadline*
1365	* was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1366	* INT32_MAX nanoseconds ahead, we still use INT32_MAX
1367	* nanoseconds.
1368	*/
1369	if ((deadline < `0`) \|\| (deadline > INT32_MAX)) {
1370	deadline = INT32_MAX;
1371	}
1372
1373	return qemu_icount_round(deadline);
1374	} else {
1375	return replay_get_instructions();
1376	}
1377	}
1378
1379	static void handle_icount_deadline(void)
1380	{
1381	assert(qemu_in_vcpu_thread());
1382	if (use_icount) {
1383	int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1384	QEMU_TIMER_ATTR_ALL);
1385
1386	if (deadline == `0`) {
1387	/ Wake up other AioContexts. /
1388	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1389	qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1390	}
1391	}
1392	}
1393
1394	static void prepare_icount_for_run(CPUState *cpu)
1395	{
1396	if (use_icount) {
1397	int insns_left;
1398
1399	/ These should always be cleared by process_icount_data after*
1400	* each vCPU execution. However u16.high can be raised
1401	* asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1402	*/
1403	g_assert(cpu_neg(cpu)->icount_decr.u16.low == `0`);
1404	g_assert(cpu->icount_extra == `0`);
1405
1406	cpu->icount_budget = tcg_get_icount_limit();
1407	insns_left = MIN(`0xffff`, cpu->icount_budget);
1408	cpu_neg(cpu)->icount_decr.u16.low = insns_left;
1409	cpu->icount_extra = cpu->icount_budget - insns_left;
1410
1411	replay_mutex_lock();
1412	}
1413	}
1414
1415	static void process_icount_data(CPUState *cpu)
1416	{
1417	if (use_icount) {
1418	/ Account for executed instructions /
1419	cpu_update_icount(cpu);
1420
1421	/ Reset the counters /
1422	cpu_neg(cpu)->icount_decr.u16.low = `0`;
1423	cpu->icount_extra = `0`;
1424	cpu->icount_budget = `0`;
1425
1426	replay_account_executed_instructions();
1427
1428	replay_mutex_unlock();
1429	}
1430	}
1431
1432
1433	static int tcg_cpu_exec(CPUState *cpu)
1434	{
1435	int ret;
1436	#ifdef CONFIG_PROFILER
1437	int64_t ti;
1438	#endif
1439
1440	assert(tcg_enabled());
1441	#ifdef CONFIG_PROFILER
1442	ti = profile_getclock();
1443	#endif
1444	cpu_exec_start(cpu);
1445	ret = cpu_exec(cpu);
1446	cpu_exec_end(cpu);
1447	#ifdef CONFIG_PROFILER
1448	atomic_set(&tcg_ctx->prof.cpu_exec_time,
1449	tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1450	#endif
1451	return ret;
1452	}
1453
1454	/ Destroy any remaining vCPUs which have been unplugged and have*
1455	* finished running
1456	*/
1457	static void deal_with_unplugged_cpus(void)
1458	{
1459	CPUState *cpu;
1460
1461	CPU_FOREACH(cpu) {
1462	if (cpu->unplug && !cpu_can_run(cpu)) {
1463	qemu_tcg_destroy_vcpu(cpu);
1464	cpu->created = false;
1465	qemu_cond_signal(&qemu_cpu_cond);
1466	break;
1467	}
1468	}
1469	}
1470
1471	/ Single-threaded TCG*
1472	*
1473	* In the single-threaded case each vCPU is simulated in turn. If
1474	* there is more than a single vCPU we create a simple timer to kick
1475	* the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1476	* This is done explicitly rather than relying on side-effects
1477	* elsewhere.
1478	*/
1479
1480	static void qemu_tcg_rr_cpu_thread_fn(void* *arg)
1481	{
1482	CPUState *cpu = arg;
1483
1484	assert(tcg_enabled());
1485	rcu_register_thread();
1486	tcg_register_thread();
1487
1488	qemu_mutex_lock_iothread();
1489	qemu_thread_get_self(cpu->thread);
1490
1491	cpu->thread_id = qemu_get_thread_id();
1492	cpu->created = true;
1493	cpu->can_do_io = `1`;
1494	qemu_cond_signal(&qemu_cpu_cond);
1495	qemu_guest_random_seed_thread_part2(cpu->random_seed);
1496
1497	/ wait for initial kick-off after machine start /
1498	while (first_cpu->stopped) {
1499	qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1500
1501	/ process any pending work /
1502	CPU_FOREACH(cpu) {
1503	current_cpu = cpu;
1504	qemu_wait_io_event_common(cpu);
1505	}
1506	}
1507
1508	start_tcg_kick_timer();
1509
1510	cpu = first_cpu;
1511
1512	/ process any pending work /
1513	cpu->exit_request = `1`;
1514
1515	while (`1`) {
1516	qemu_mutex_unlock_iothread();
1517	replay_mutex_lock();
1518	qemu_mutex_lock_iothread();
1519	/ Account partial waits to QEMU_CLOCK_VIRTUAL. /
1520	qemu_account_warp_timer();
1521
1522	/ Run the timers here. This is much more efficient than*
1523	* waking up the I/O thread and waiting for completion.
1524	*/
1525	handle_icount_deadline();
1526
1527	replay_mutex_unlock();
1528
1529	if (!cpu) {
1530	cpu = first_cpu;
1531	}
1532
1533	while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1534
1535	atomic_mb_set(&tcg_current_rr_cpu, cpu);
1536	current_cpu = cpu;
1537
1538	qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1539	(cpu->singlestep_enabled & SSTEP_NOTIMER) == `0`);
1540
1541	if (cpu_can_run(cpu)) {
1542	int r;
1543
1544	qemu_mutex_unlock_iothread();
1545	prepare_icount_for_run(cpu);
1546
1547	r = tcg_cpu_exec(cpu);
1548
1549	process_icount_data(cpu);
1550	qemu_mutex_lock_iothread();
1551
1552	if (r == EXCP_DEBUG) {
1553	cpu_handle_guest_debug(cpu);
1554	break;
1555	} else if (r == EXCP_ATOMIC) {
1556	qemu_mutex_unlock_iothread();
1557	cpu_exec_step_atomic(cpu);
1558	qemu_mutex_lock_iothread();
1559	break;
1560	}
1561	} else if (cpu->stop) {
1562	if (cpu->unplug) {
1563	cpu = CPU_NEXT(cpu);
1564	}
1565	break;
1566	}
1567
1568	cpu = CPU_NEXT(cpu);
1569	} / while (cpu && !cpu->exit_request).. /
1570
1571	/ Does not need atomic_mb_set because a spurious wakeup is okay. /
1572	atomic_set(&tcg_current_rr_cpu, NULL);
1573
1574	if (cpu && cpu->exit_request) {
1575	atomic_mb_set(&cpu->exit_request, `0`);
1576	}
1577
1578	if (use_icount && all_cpu_threads_idle()) {
1579	/*
1580	* When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1581	* in the main_loop, wake it up in order to start the warp timer.
1582	*/
1583	qemu_notify_event();
1584	}
1585
1586	qemu_tcg_rr_wait_io_event();
1587	deal_with_unplugged_cpus();
1588	}
1589
1590	rcu_unregister_thread();
1591	return NULL;
1592	}
1593
1594	static void qemu_hax_cpu_thread_fn(void* *arg)
1595	{
1596	CPUState *cpu = arg;
1597	int r;
1598
1599	rcu_register_thread();
1600	qemu_mutex_lock_iothread();
1601	qemu_thread_get_self(cpu->thread);
1602
1603	cpu->thread_id = qemu_get_thread_id();
1604	cpu->created = true;
1605	current_cpu = cpu;
1606
1607	hax_init_vcpu(cpu);
1608	qemu_cond_signal(&qemu_cpu_cond);
1609	qemu_guest_random_seed_thread_part2(cpu->random_seed);
1610
1611	do {
1612	if (cpu_can_run(cpu)) {
1613	r = hax_smp_cpu_exec(cpu);
1614	if (r == EXCP_DEBUG) {
1615	cpu_handle_guest_debug(cpu);
1616	}
1617	}
1618
1619	qemu_wait_io_event(cpu);
1620	} while (!cpu->unplug \|\| cpu_can_run(cpu));
1621	rcu_unregister_thread();
1622	return NULL;
1623	}
1624
1625	/ The HVF-specific vCPU thread function. This one should only run when the host*
1626	* CPU supports the VMX "unrestricted guest" feature. */
1627	static void qemu_hvf_cpu_thread_fn(void* *arg)
1628	{
1629	CPUState *cpu = arg;
1630
1631	int r;
1632
1633	assert(hvf_enabled());
1634
1635	rcu_register_thread();
1636
1637	qemu_mutex_lock_iothread();
1638	qemu_thread_get_self(cpu->thread);
1639
1640	cpu->thread_id = qemu_get_thread_id();
1641	cpu->can_do_io = `1`;
1642	current_cpu = cpu;
1643
1644	hvf_init_vcpu(cpu);
1645
1646	/ signal CPU creation /
1647	cpu->created = true;
1648	qemu_cond_signal(&qemu_cpu_cond);
1649	qemu_guest_random_seed_thread_part2(cpu->random_seed);
1650
1651	do {
1652	if (cpu_can_run(cpu)) {
1653	r = hvf_vcpu_exec(cpu);
1654	if (r == EXCP_DEBUG) {
1655	cpu_handle_guest_debug(cpu);
1656	}
1657	}
1658	qemu_wait_io_event(cpu);
1659	} while (!cpu->unplug \|\| cpu_can_run(cpu));
1660
1661	hvf_vcpu_destroy(cpu);
1662	cpu->created = false;
1663	qemu_cond_signal(&qemu_cpu_cond);
1664	qemu_mutex_unlock_iothread();
1665	rcu_unregister_thread();
1666	return NULL;
1667	}
1668
1669	static void qemu_whpx_cpu_thread_fn(void* *arg)
1670	{
1671	CPUState *cpu = arg;
1672	int r;
1673
1674	rcu_register_thread();
1675
1676	qemu_mutex_lock_iothread();
1677	qemu_thread_get_self(cpu->thread);
1678	cpu->thread_id = qemu_get_thread_id();
1679	current_cpu = cpu;
1680
1681	r = whpx_init_vcpu(cpu);
1682	if (r < `0`) {
1683	fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1684	exit(`1`);
1685	}
1686
1687	/ signal CPU creation /
1688	cpu->created = true;
1689	qemu_cond_signal(&qemu_cpu_cond);
1690	qemu_guest_random_seed_thread_part2(cpu->random_seed);
1691
1692	do {
1693	if (cpu_can_run(cpu)) {
1694	r = whpx_vcpu_exec(cpu);
1695	if (r == EXCP_DEBUG) {
1696	cpu_handle_guest_debug(cpu);
1697	}
1698	}
1699	while (cpu_thread_is_idle(cpu)) {
1700	qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1701	}
1702	qemu_wait_io_event_common(cpu);
1703	} while (!cpu->unplug \|\| cpu_can_run(cpu));
1704
1705	whpx_destroy_vcpu(cpu);
1706	cpu->created = false;
1707	qemu_cond_signal(&qemu_cpu_cond);
1708	qemu_mutex_unlock_iothread();
1709	rcu_unregister_thread();
1710	return NULL;
1711	}
1712
1713	#ifdef _WIN32
1714	static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1715	{
1716	}
1717	#endif
1718
1719	/ Multi-threaded TCG*
1720	*
1721	* In the multi-threaded case each vCPU has its own thread. The TLS
1722	* variable current_cpu can be used deep in the code to find the
1723	* current CPUState for a given thread.
1724	*/
1725
1726	static void qemu_tcg_cpu_thread_fn(void* *arg)
1727	{
1728	CPUState *cpu = arg;
1729
1730	assert(tcg_enabled());
1731	g_assert(!use_icount);
1732
1733	rcu_register_thread();
1734	tcg_register_thread();
1735
1736	qemu_mutex_lock_iothread();
1737	qemu_thread_get_self(cpu->thread);
1738
1739	cpu->thread_id = qemu_get_thread_id();
1740	cpu->created = true;
1741	cpu->can_do_io = `1`;
1742	current_cpu = cpu;
1743	qemu_cond_signal(&qemu_cpu_cond);
1744	qemu_guest_random_seed_thread_part2(cpu->random_seed);
1745
1746	/ process any pending work /
1747	cpu->exit_request = `1`;
1748
1749	do {
1750	if (cpu_can_run(cpu)) {
1751	int r;
1752	qemu_mutex_unlock_iothread();
1753	r = tcg_cpu_exec(cpu);
1754	qemu_mutex_lock_iothread();
1755	switch (r) {
1756	case EXCP_DEBUG:
1757	cpu_handle_guest_debug(cpu);
1758	break;
1759	case EXCP_HALTED:
1760	/ during start-up the vCPU is reset and the thread is*
1761	* kicked several times. If we don't ensure we go back
1762	* to sleep in the halted state we won't cleanly
1763	* start-up when the vCPU is enabled.
1764	*
1765	* cpu->halted should ensure we sleep in wait_io_event
1766	*/
1767	g_assert(cpu->halted);
1768	break;
1769	case EXCP_ATOMIC:
1770	qemu_mutex_unlock_iothread();
1771	cpu_exec_step_atomic(cpu);
1772	qemu_mutex_lock_iothread();
1773	default:
1774	/ Ignore everything else? /
1775	break;
1776	}
1777	}
1778
1779	atomic_mb_set(&cpu->exit_request, `0`);
1780	qemu_wait_io_event(cpu);
1781	} while (!cpu->unplug \|\| cpu_can_run(cpu));
1782
1783	qemu_tcg_destroy_vcpu(cpu);
1784	cpu->created = false;
1785	qemu_cond_signal(&qemu_cpu_cond);
1786	qemu_mutex_unlock_iothread();
1787	rcu_unregister_thread();
1788	return NULL;
1789	}
1790
1791	static void qemu_cpu_kick_thread(CPUState *cpu)
1792	{
1793	#ifndef _WIN32
1794	int err;
1795
1796	if (cpu->thread_kicked) {
1797	return;
1798	}
1799	cpu->thread_kicked = true;
1800	err = pthread_kill(cpu->thread->thread, SIG_IPI);
1801	if (err && err != ESRCH) {
1802	fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1803	exit(`1`);
1804	}
1805	#else /* _WIN32 */
1806	if (!qemu_cpu_is_self(cpu)) {
1807	if (whpx_enabled()) {
1808	whpx_vcpu_kick(cpu);
1809	} else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, `0`)) {
1810	fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1811	__func__, GetLastError());
1812	exit(`1`);
1813	}
1814	}
1815	#endif
1816	}
1817
1818	void qemu_cpu_kick(CPUState *cpu)
1819	{
1820	qemu_cond_broadcast(cpu->halt_cond);
1821	if (tcg_enabled()) {
1822	cpu_exit(cpu);
1823	/ NOP unless doing single-thread RR /
1824	qemu_cpu_kick_rr_cpu();
1825	} else {
1826	if (hax_enabled()) {
1827	/*
1828	* FIXME: race condition with the exit_request check in
1829	* hax_vcpu_hax_exec
1830	*/
1831	cpu->exit_request = `1`;
1832	}
1833	qemu_cpu_kick_thread(cpu);
1834	}
1835	}
1836
1837	void qemu_cpu_kick_self(void)
1838	{
1839	assert(current_cpu);
1840	qemu_cpu_kick_thread(current_cpu);
1841	}
1842
1843	bool qemu_cpu_is_self(CPUState *cpu)
1844	{
1845	return qemu_thread_is_self(cpu->thread);
1846	}
1847
1848	bool qemu_in_vcpu_thread(void)
1849	{
1850	return current_cpu && qemu_cpu_is_self(current_cpu);
1851	}
1852
1853	static __thread bool iothread_locked = false;
1854
1855	bool qemu_mutex_iothread_locked(void)
1856	{
1857	return iothread_locked;
1858	}
1859
1860	/*
1861	* The BQL is taken from so many places that it is worth profiling the
1862	* callers directly, instead of funneling them all through a single function.
1863	*/
1864	void qemu_mutex_lock_iothread_impl(const char file, int* line)
1865	{
1866	QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1867
1868	g_assert(!qemu_mutex_iothread_locked());
1869	bql_lock(&qemu_global_mutex, file, line);
1870	iothread_locked = true;
1871	}
1872
1873	void qemu_mutex_unlock_iothread(void)
1874	{
1875	g_assert(qemu_mutex_iothread_locked());
1876	iothread_locked = false;
1877	qemu_mutex_unlock(&qemu_global_mutex);
1878	}
1879
1880	static bool all_vcpus_paused(void)
1881	{
1882	CPUState *cpu;
1883
1884	CPU_FOREACH(cpu) {
1885	if (!cpu->stopped) {
1886	return false;
1887	}
1888	}
1889
1890	return true;
1891	}
1892
1893	void pause_all_vcpus(void)
1894	{
1895	CPUState *cpu;
1896
1897	qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1898	CPU_FOREACH(cpu) {
1899	if (qemu_cpu_is_self(cpu)) {
1900	qemu_cpu_stop(cpu, true);
1901	} else {
1902	cpu->stop = true;
1903	qemu_cpu_kick(cpu);
1904	}
1905	}
1906
1907	/ We need to drop the replay_lock so any vCPU threads woken up*
1908	* can finish their replay tasks
1909	*/
1910	replay_mutex_unlock();
1911
1912	while (!all_vcpus_paused()) {
1913	qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1914	CPU_FOREACH(cpu) {
1915	qemu_cpu_kick(cpu);
1916	}
1917	}
1918
1919	qemu_mutex_unlock_iothread();
1920	replay_mutex_lock();
1921	qemu_mutex_lock_iothread();
1922	}
1923
1924	void cpu_resume(CPUState *cpu)
1925	{
1926	cpu->stop = false;
1927	cpu->stopped = false;
1928	qemu_cpu_kick(cpu);
1929	}
1930
1931	void resume_all_vcpus(void)
1932	{
1933	CPUState *cpu;
1934
1935	qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1936	CPU_FOREACH(cpu) {
1937	cpu_resume(cpu);
1938	}
1939	}
1940
1941	void cpu_remove_sync(CPUState *cpu)
1942	{
1943	cpu->stop = true;
1944	cpu->unplug = true;
1945	qemu_cpu_kick(cpu);
1946	qemu_mutex_unlock_iothread();
1947	qemu_thread_join(cpu->thread);
1948	qemu_mutex_lock_iothread();
1949	}
1950
1951	/ For temporary buffers for forming a name /
1952	#define VCPU_THREAD_NAME_SIZE 16
1953
1954	static void qemu_tcg_init_vcpu(CPUState *cpu)
1955	{
1956	char thread_name[VCPU_THREAD_NAME_SIZE];
1957	static QemuCond *single_tcg_halt_cond;
1958	static QemuThread *single_tcg_cpu_thread;
1959	static int tcg_region_inited;
1960
1961	assert(tcg_enabled());
1962	/*
1963	* Initialize TCG regions--once. Now is a good time, because:
1964	* (1) TCG's init context, prologue and target globals have been set up.
1965	* (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1966	* -accel flag is processed, so the check doesn't work then).
1967	*/
1968	if (!tcg_region_inited) {
1969	tcg_region_inited = `1`;
1970	tcg_region_init();
1971	}
1972
1973	if (qemu_tcg_mttcg_enabled() \|\| !single_tcg_cpu_thread) {
1974	cpu->thread = g_malloc0(sizeof(QemuThread));
1975	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1976	qemu_cond_init(cpu->halt_cond);
1977
1978	if (qemu_tcg_mttcg_enabled()) {
1979	/ create a thread per vCPU with TCG (MTTCG) /
1980	parallel_cpus = true;
1981	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1982	cpu->cpu_index);
1983
1984	qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1985	cpu, QEMU_THREAD_JOINABLE);
1986
1987	} else {
1988	/ share a single thread for all cpus with TCG /
1989	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1990	qemu_thread_create(cpu->thread, thread_name,
1991	qemu_tcg_rr_cpu_thread_fn,
1992	cpu, QEMU_THREAD_JOINABLE);
1993
1994	single_tcg_halt_cond = cpu->halt_cond;
1995	single_tcg_cpu_thread = cpu->thread;
1996	}
1997	#ifdef _WIN32
1998	cpu->hThread = qemu_thread_get_handle(cpu->thread);
1999	#endif
2000	} else {
2001	/ For non-MTTCG cases we share the thread /
2002	cpu->thread = single_tcg_cpu_thread;
2003	cpu->halt_cond = single_tcg_halt_cond;
2004	cpu->thread_id = first_cpu->thread_id;
2005	cpu->can_do_io = `1`;
2006	cpu->created = true;
2007	}
2008	}
2009
2010	static void qemu_hax_start_vcpu(CPUState *cpu)
2011	{
2012	char thread_name[VCPU_THREAD_NAME_SIZE];
2013
2014	cpu->thread = g_malloc0(sizeof(QemuThread));
2015	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2016	qemu_cond_init(cpu->halt_cond);
2017
2018	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2019	cpu->cpu_index);
2020	qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2021	cpu, QEMU_THREAD_JOINABLE);
2022	#ifdef _WIN32
2023	cpu->hThread = qemu_thread_get_handle(cpu->thread);
2024	#endif
2025	}
2026
2027	static void qemu_kvm_start_vcpu(CPUState *cpu)
2028	{
2029	char thread_name[VCPU_THREAD_NAME_SIZE];
2030
2031	cpu->thread = g_malloc0(sizeof(QemuThread));
2032	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2033	qemu_cond_init(cpu->halt_cond);
2034	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2035	cpu->cpu_index);
2036	qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2037	cpu, QEMU_THREAD_JOINABLE);
2038	}
2039
2040	static void qemu_hvf_start_vcpu(CPUState *cpu)
2041	{
2042	char thread_name[VCPU_THREAD_NAME_SIZE];
2043
2044	/ HVF currently does not support TCG, and only runs in*
2045	* unrestricted-guest mode. */
2046	assert(hvf_enabled());
2047
2048	cpu->thread = g_malloc0(sizeof(QemuThread));
2049	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2050	qemu_cond_init(cpu->halt_cond);
2051
2052	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2053	cpu->cpu_index);
2054	qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2055	cpu, QEMU_THREAD_JOINABLE);
2056	}
2057
2058	static void qemu_whpx_start_vcpu(CPUState *cpu)
2059	{
2060	char thread_name[VCPU_THREAD_NAME_SIZE];
2061
2062	cpu->thread = g_malloc0(sizeof(QemuThread));
2063	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2064	qemu_cond_init(cpu->halt_cond);
2065	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2066	cpu->cpu_index);
2067	qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2068	cpu, QEMU_THREAD_JOINABLE);
2069	#ifdef _WIN32
2070	cpu->hThread = qemu_thread_get_handle(cpu->thread);
2071	#endif
2072	}
2073
2074	static void qemu_dummy_start_vcpu(CPUState *cpu)
2075	{
2076	char thread_name[VCPU_THREAD_NAME_SIZE];
2077
2078	cpu->thread = g_malloc0(sizeof(QemuThread));
2079	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2080	qemu_cond_init(cpu->halt_cond);
2081	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2082	cpu->cpu_index);
2083	qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2084	QEMU_THREAD_JOINABLE);
2085	}
2086
2087	void qemu_init_vcpu(CPUState *cpu)
2088	{
2089	MachineState *ms = MACHINE(qdev_get_machine());
2090
2091	cpu->nr_cores = ms->smp.cores;
2092	cpu->nr_threads = ms->smp.threads;
2093	cpu->stopped = true;
2094	cpu->random_seed = qemu_guest_random_seed_thread_part1();
2095
2096	if (!cpu->as) {
2097	/ If the target cpu hasn't set up any address spaces itself,*
2098	* give it the default one.
2099	*/
2100	cpu->num_ases = `1`;
2101	cpu_address_space_init(cpu, `0`, "cpu-memory", cpu->memory);
2102	}
2103
2104	if (kvm_enabled()) {
2105	qemu_kvm_start_vcpu(cpu);
2106	} else if (hax_enabled()) {
2107	qemu_hax_start_vcpu(cpu);
2108	} else if (hvf_enabled()) {
2109	qemu_hvf_start_vcpu(cpu);
2110	} else if (tcg_enabled()) {
2111	qemu_tcg_init_vcpu(cpu);
2112	} else if (whpx_enabled()) {
2113	qemu_whpx_start_vcpu(cpu);
2114	} else {
2115	qemu_dummy_start_vcpu(cpu);
2116	}
2117
2118	while (!cpu->created) {
2119	qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2120	}
2121	}
2122
2123	void cpu_stop_current(void)
2124	{
2125	if (current_cpu) {
2126	current_cpu->stop = true;
2127	cpu_exit(current_cpu);
2128	}
2129	}
2130
2131	int vm_stop(RunState state)
2132	{
2133	if (qemu_in_vcpu_thread()) {
2134	qemu_system_vmstop_request_prepare();
2135	qemu_system_vmstop_request(state);
2136	/*
2137	* FIXME: should not return to device code in case
2138	* vm_stop() has been requested.
2139	*/
2140	cpu_stop_current();
2141	return `0`;
2142	}
2143
2144	return do_vm_stop(state, true);
2145	}
2146
2147	/**
2148	* Prepare for (re)starting the VM.
2149	* Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2150	* running or in case of an error condition), 0 otherwise.
2151	*/
2152	int vm_prepare_start(void)
2153	{
2154	RunState requested;
2155
2156	qemu_vmstop_requested(&requested);
2157	if (runstate_is_running() && requested == RUN_STATE__MAX) {
2158	return -`1`;
2159	}
2160
2161	/ Ensure that a STOP/RESUME pair of events is emitted if a*
2162	* vmstop request was pending. The BLOCK_IO_ERROR event, for
2163	* example, according to documentation is always followed by
2164	* the STOP event.
2165	*/
2166	if (runstate_is_running()) {
2167	qapi_event_send_stop();
2168	qapi_event_send_resume();
2169	return -`1`;
2170	}
2171
2172	/ We are sending this now, but the CPUs will be resumed shortly later /
2173	qapi_event_send_resume();
2174
2175	replay_enable_events();
2176	cpu_enable_ticks();
2177	runstate_set(RUN_STATE_RUNNING);
2178	vm_state_notify(`1`, RUN_STATE_RUNNING);
2179	return `0`;
2180	}
2181
2182	void vm_start(void)
2183	{
2184	if (!vm_prepare_start()) {
2185	resume_all_vcpus();
2186	}
2187	}
2188
2189	/ does a state transition even if the VM is already stopped,*
2190	current state is forgotten forever /*
2191	int vm_stop_force_state(RunState state)
2192	{
2193	if (runstate_is_running()) {
2194	return vm_stop(state);
2195	} else {
2196	runstate_set(state);
2197
2198	bdrv_drain_all();
2199	/ Make sure to return an error if the flush in a previous vm_stop()*
2200	* failed. */
2201	return bdrv_flush_all();
2202	}
2203	}
2204
2205	void list_cpus(const char *optarg)
2206	{
2207	/ XXX: implement xxx_cpu_list for targets that still miss it /
2208	#if defined(cpu_list)
2209	cpu_list();
2210	#endif
2211	}
2212
2213	void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2214	bool has_cpu, int64_t cpu_index, Error **errp)
2215	{
2216	FILE *f;
2217	uint32_t l;
2218	CPUState *cpu;
2219	uint8_t buf[`1024`];
2220	int64_t orig_addr = addr, orig_size = size;
2221
2222	if (!has_cpu) {
2223	cpu_index = `0`;
2224	}
2225
2226	cpu = qemu_get_cpu(cpu_index);
2227	if (cpu == NULL) {
2228	error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2229	"a CPU number");
2230	return;
2231	}
2232
2233	f = fopen(filename, "wb");
2234	if (!f) {
2235	error_setg_file_open(errp, errno, filename);
2236	return;
2237	}
2238
2239	while (size != `0`) {
2240	l = sizeof(buf);
2241	if (l > size)
2242	l = size;
2243	if (cpu_memory_rw_debug(cpu, addr, buf, l, `0`) != `0`) {
2244	error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2245	" specified", orig_addr, orig_size);
2246	goto exit;
2247	}
2248	if (fwrite(buf, `1`, l, f) != l) {
2249	error_setg(errp, QERR_IO_ERROR);
2250	goto exit;
2251	}
2252	addr += l;
2253	size -= l;
2254	}
2255
2256	exit:
2257	fclose(f);
2258	}
2259
2260	void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2261	Error **errp)
2262	{
2263	FILE *f;
2264	uint32_t l;
2265	uint8_t buf[`1024`];
2266
2267	f = fopen(filename, "wb");
2268	if (!f) {
2269	error_setg_file_open(errp, errno, filename);
2270	return;
2271	}
2272
2273	while (size != `0`) {
2274	l = sizeof(buf);
2275	if (l > size)
2276	l = size;
2277	cpu_physical_memory_read(addr, buf, l);
2278	if (fwrite(buf, `1`, l, f) != l) {
2279	error_setg(errp, QERR_IO_ERROR);
2280	goto exit;
2281	}
2282	addr += l;
2283	size -= l;
2284	}
2285
2286	exit:
2287	fclose(f);
2288	}
2289
2290	void qmp_inject_nmi(Error **errp)
2291	{
2292	nmi_monitor_handle(monitor_get_cpu_index(), errp);
2293	}
2294
2295	void dump_drift_info(void)
2296	{
2297	if (!use_icount) {
2298	return;
2299	}
2300
2301	qemu_printf("Host - Guest clock %"PRIi64" ms\n",
2302	(cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2303	if (icount_align_option) {
2304	qemu_printf("Max guest delay %"PRIi64" ms\n",
2305	-max_delay / SCALE_MS);
2306	qemu_printf("Max guest advance %"PRIi64" ms\n",
2307	max_advance / SCALE_MS);
2308	} else {
2309	qemu_printf("Max guest delay NA\n");
2310	qemu_printf("Max guest advance NA\n");
2311	}
2312	}
2313

Browse the source code of qemu/cpus.c