cputlb.c source code [qemu/accel/tcg/cputlb.c]

1	/*
2	* Common CPU TLB handling
3	*
4	* Copyright (c) 2003 Fabrice Bellard
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Lesser General Public
8	* License as published by the Free Software Foundation; either
9	* version 2.1 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Lesser General Public License for more details.
15	*
16	* You should have received a copy of the GNU Lesser General Public
17	* License along with this library; if not, see <http://www.gnu.org/licenses/>.
18	*/
19
20	#include "qemu/osdep.h"
21	#include "qemu/main-loop.h"
22	#include "cpu.h"
23	#include "exec/exec-all.h"
24	#include "exec/memory.h"
25	#include "exec/address-spaces.h"
26	#include "exec/cpu_ldst.h"
27	#include "exec/cputlb.h"
28	#include "exec/memory-internal.h"
29	#include "exec/ram_addr.h"
30	#include "tcg/tcg.h"
31	#include "qemu/error-report.h"
32	#include "exec/log.h"
33	#include "exec/helper-proto.h"
34	#include "qemu/atomic.h"
35	#include "qemu/atomic128.h"
36
37	/ DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target /
38	/ #define DEBUG_TLB /
39	/ #define DEBUG_TLB_LOG /
40
41	#ifdef DEBUG_TLB
42	# define DEBUG_TLB_GATE 1
43	# ifdef DEBUG_TLB_LOG
44	# define DEBUG_TLB_LOG_GATE 1
45	# else
46	# define DEBUG_TLB_LOG_GATE 0
47	# endif
48	#else
49	# define DEBUG_TLB_GATE 0
50	# define DEBUG_TLB_LOG_GATE 0
51	#endif
52
53	#define tlb_debug(fmt, ...) do { \
54	if (DEBUG_TLB_LOG_GATE) { \
55	qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
56	## __VA_ARGS__); \
57	} else if (DEBUG_TLB_GATE) { \
58	fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
59	} \
60	} while (0)
61
62	#define assert_cpu_is_self(cpu) do { \
63	if (DEBUG_TLB_GATE) { \
64	g_assert(!(cpu)->created \|\| qemu_cpu_is_self(cpu)); \
65	} \
66	} while (0)
67
68	/ run_on_cpu_data.target_ptr should always be big enough for a*
69	* target_ulong even on 32 bit builds */
70	QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
71
72	/ We currently can't handle more than 16 bits in the MMUIDX bitmask.*
73	*/
74	QEMU_BUILD_BUG_ON(NB_MMU_MODES > `16`);
75	#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
76
77	static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
78	{
79	return env_tlb(env)->f[mmu_idx].mask + (`1` << CPU_TLB_ENTRY_BITS);
80	}
81
82	static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
83	size_t max_entries)
84	{
85	desc->window_begin_ns = ns;
86	desc->window_max_entries = max_entries;
87	}
88
89	static void tlb_dyn_init(CPUArchState *env)
90	{
91	int i;
92
93	for (i = `0`; i < NB_MMU_MODES; i++) {
94	CPUTLBDesc *desc = &env_tlb(env)->d[i];
95	size_t n_entries = `1` << CPU_TLB_DYN_DEFAULT_BITS;
96
97	tlb_window_reset(desc, get_clock_realtime(), `0`);
98	desc->n_used_entries = `0`;
99	env_tlb(env)->f[i].mask = (n_entries - `1`) << CPU_TLB_ENTRY_BITS;
100	env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
101	env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
102	}
103	}
104
105	/**
106	* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
107	* @env: CPU that owns the TLB
108	* @mmu_idx: MMU index of the TLB
109	*
110	* Called with tlb_lock_held.
111	*
112	* We have two main constraints when resizing a TLB: (1) we only resize it
113	* on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
114	* the array or unnecessarily flushing it), which means we do not control how
115	* frequently the resizing can occur; (2) we don't have access to the guest's
116	* future scheduling decisions, and therefore have to decide the magnitude of
117	* the resize based on past observations.
118	*
119	* In general, a memory-hungry process can benefit greatly from an appropriately
120	* sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
121	* we just have to make the TLB as large as possible; while an oversized TLB
122	* results in minimal TLB miss rates, it also takes longer to be flushed
123	* (flushes can be _very_ frequent), and the reduced locality can also hurt
124	* performance.
125	*
126	* To achieve near-optimal performance for all kinds of workloads, we:
127	*
128	* 1. Aggressively increase the size of the TLB when the use rate of the
129	* TLB being flushed is high, since it is likely that in the near future this
130	* memory-hungry process will execute again, and its memory hungriness will
131	* probably be similar.
132	*
133	* 2. Slowly reduce the size of the TLB as the use rate declines over a
134	* reasonably large time window. The rationale is that if in such a time window
135	* we have not observed a high TLB use rate, it is likely that we won't observe
136	* it in the near future. In that case, once a time window expires we downsize
137	* the TLB to match the maximum use rate observed in the window.
138	*
139	* 3. Try to keep the maximum use rate in a time window in the 30-70% range,
140	* since in that range performance is likely near-optimal. Recall that the TLB
141	* is direct mapped, so we want the use rate to be low (or at least not too
142	* high), since otherwise we are likely to have a significant amount of
143	* conflict misses.
144	*/
145	static void tlb_mmu_resize_locked(CPUArchState env, int* mmu_idx)
146	{
147	CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
148	size_t old_size = tlb_n_entries(env, mmu_idx);
149	size_t rate;
150	size_t new_size = old_size;
151	int64_t now = get_clock_realtime();
152	int64_t window_len_ms = `100`;
153	int64_t window_len_ns = window_len_ms * `1000` * `1000`;
154	bool window_expired = now > desc->window_begin_ns + window_len_ns;
155
156	if (desc->n_used_entries > desc->window_max_entries) {
157	desc->window_max_entries = desc->n_used_entries;
158	}
159	rate = desc->window_max_entries * `100` / old_size;
160
161	if (rate > `70`) {
162	new_size = MIN(old_size << `1`, `1` << CPU_TLB_DYN_MAX_BITS);
163	} else if (rate < `30` && window_expired) {
164	size_t ceil = pow2ceil(desc->window_max_entries);
165	size_t expected_rate = desc->window_max_entries * `100` / ceil;
166
167	/*
168	* Avoid undersizing when the max number of entries seen is just below
169	* a pow2. For instance, if max_entries == 1025, the expected use rate
170	* would be 1025/2048==50%. However, if max_entries == 1023, we'd get
171	* 1023/1024==99.9% use rate, so we'd likely end up doubling the size
172	* later. Thus, make sure that the expected use rate remains below 70%.
173	* (and since we double the size, that means the lowest rate we'd
174	* expect to get is 35%, which is still in the 30-70% range where
175	* we consider that the size is appropriate.)
176	*/
177	if (expected_rate > `70`) {
178	ceil *= `2`;
179	}
180	new_size = MAX(ceil, `1` << CPU_TLB_DYN_MIN_BITS);
181	}
182
183	if (new_size == old_size) {
184	if (window_expired) {
185	tlb_window_reset(desc, now, desc->n_used_entries);
186	}
187	return;
188	}
189
190	g_free(env_tlb(env)->f[mmu_idx].table);
191	g_free(env_tlb(env)->d[mmu_idx].iotlb);
192
193	tlb_window_reset(desc, now, `0`);
194	/ desc->n_used_entries is cleared by the caller /
195	env_tlb(env)->f[mmu_idx].mask = (new_size - `1`) << CPU_TLB_ENTRY_BITS;
196	env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
197	env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
198	/*
199	* If the allocations fail, try smaller sizes. We just freed some
200	* memory, so going back to half of new_size has a good chance of working.
201	* Increased memory pressure elsewhere in the system might cause the
202	* allocations to fail though, so we progressively reduce the allocation
203	* size, aborting if we cannot even allocate the smallest TLB we support.
204	*/
205	while (env_tlb(env)->f[mmu_idx].table == NULL \|\|
206	env_tlb(env)->d[mmu_idx].iotlb == NULL) {
207	if (new_size == (`1` << CPU_TLB_DYN_MIN_BITS)) {
208	error_report("%s: %s", __func__, strerror(errno));
209	abort();
210	}
211	new_size = MAX(new_size >> `1`, `1` << CPU_TLB_DYN_MIN_BITS);
212	env_tlb(env)->f[mmu_idx].mask = (new_size - `1`) << CPU_TLB_ENTRY_BITS;
213
214	g_free(env_tlb(env)->f[mmu_idx].table);
215	g_free(env_tlb(env)->d[mmu_idx].iotlb);
216	env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
217	env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
218	}
219	}
220
221	static inline void tlb_table_flush_by_mmuidx(CPUArchState env, int* mmu_idx)
222	{
223	tlb_mmu_resize_locked(env, mmu_idx);
224	memset(env_tlb(env)->f[mmu_idx].table, -`1`, sizeof_tlb(env, mmu_idx));
225	env_tlb(env)->d[mmu_idx].n_used_entries = `0`;
226	}
227
228	static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
229	{
230	env_tlb(env)->d[mmu_idx].n_used_entries++;
231	}
232
233	static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
234	{
235	env_tlb(env)->d[mmu_idx].n_used_entries--;
236	}
237
238	void tlb_init(CPUState *cpu)
239	{
240	CPUArchState *env = cpu->env_ptr;
241
242	qemu_spin_init(&env_tlb(env)->c.lock);
243
244	/ Ensure that cpu_reset performs a full flush. /
245	env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
246
247	tlb_dyn_init(env);
248	}
249
250	/ flush_all_helper: run fn across all cpus*
251	*
252	* If the wait flag is set then the src cpu's helper will be queued as
253	* "safe" work and the loop exited creating a synchronisation point
254	* where all queued work will be finished before execution starts
255	* again.
256	*/
257	static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
258	run_on_cpu_data d)
259	{
260	CPUState *cpu;
261
262	CPU_FOREACH(cpu) {
263	if (cpu != src) {
264	async_run_on_cpu(cpu, fn, d);
265	}
266	}
267	}
268
269	void tlb_flush_counts(size_t pfull, size_t ppart, size_t *pelide)
270	{
271	CPUState *cpu;
272	size_t full = `0`, part = `0`, elide = `0`;
273
274	CPU_FOREACH(cpu) {
275	CPUArchState *env = cpu->env_ptr;
276
277	full += atomic_read(&env_tlb(env)->c.full_flush_count);
278	part += atomic_read(&env_tlb(env)->c.part_flush_count);
279	elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
280	}
281	*pfull = full;
282	*ppart = part;
283	*pelide = elide;
284	}
285
286	static void tlb_flush_one_mmuidx_locked(CPUArchState env, int* mmu_idx)
287	{
288	tlb_table_flush_by_mmuidx(env, mmu_idx);
289	env_tlb(env)->d[mmu_idx].large_page_addr = -`1`;
290	env_tlb(env)->d[mmu_idx].large_page_mask = -`1`;
291	env_tlb(env)->d[mmu_idx].vindex = `0`;
292	memset(env_tlb(env)->d[mmu_idx].vtable, -`1`,
293	sizeof(env_tlb(env)->d[`0`].vtable));
294	}
295
296	static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
297	{
298	CPUArchState *env = cpu->env_ptr;
299	uint16_t asked = data.host_int;
300	uint16_t all_dirty, work, to_clean;
301
302	assert_cpu_is_self(cpu);
303
304	tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
305
306	qemu_spin_lock(&env_tlb(env)->c.lock);
307
308	all_dirty = env_tlb(env)->c.dirty;
309	to_clean = asked & all_dirty;
310	all_dirty &= ~to_clean;
311	env_tlb(env)->c.dirty = all_dirty;
312
313	for (work = to_clean; work != `0`; work &= work - `1`) {
314	int mmu_idx = ctz32(work);
315	tlb_flush_one_mmuidx_locked(env, mmu_idx);
316	}
317
318	qemu_spin_unlock(&env_tlb(env)->c.lock);
319
320	cpu_tb_jmp_cache_clear(cpu);
321
322	if (to_clean == ALL_MMUIDX_BITS) {
323	atomic_set(&env_tlb(env)->c.full_flush_count,
324	env_tlb(env)->c.full_flush_count + `1`);
325	} else {
326	atomic_set(&env_tlb(env)->c.part_flush_count,
327	env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
328	if (to_clean != asked) {
329	atomic_set(&env_tlb(env)->c.elide_flush_count,
330	env_tlb(env)->c.elide_flush_count +
331	ctpop16(asked & ~to_clean));
332	}
333	}
334	}
335
336	void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
337	{
338	tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
339
340	if (cpu->created && !qemu_cpu_is_self(cpu)) {
341	async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
342	RUN_ON_CPU_HOST_INT(idxmap));
343	} else {
344	tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
345	}
346	}
347
348	void tlb_flush(CPUState *cpu)
349	{
350	tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
351	}
352
353	void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
354	{
355	const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
356
357	tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
358
359	flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
360	fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
361	}
362
363	void tlb_flush_all_cpus(CPUState *src_cpu)
364	{
365	tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
366	}
367
368	void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
369	{
370	const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
371
372	tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
373
374	flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
375	async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
376	}
377
378	void tlb_flush_all_cpus_synced(CPUState *src_cpu)
379	{
380	tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
381	}
382
383	static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
384	target_ulong page)
385	{
386	return tlb_hit_page(tlb_entry->addr_read, page) \|\|
387	tlb_hit_page(tlb_addr_write(tlb_entry), page) \|\|
388	tlb_hit_page(tlb_entry->addr_code, page);
389	}
390
391	/**
392	* tlb_entry_is_empty - return true if the entry is not in use
393	* @te: pointer to CPUTLBEntry
394	*/
395	static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
396	{
397	return te->addr_read == -`1` && te->addr_write == -`1` && te->addr_code == -`1`;
398	}
399
400	/ Called with tlb_c.lock held /
401	static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
402	target_ulong page)
403	{
404	if (tlb_hit_page_anyprot(tlb_entry, page)) {
405	memset(tlb_entry, -`1`, sizeof(*tlb_entry));
406	return true;
407	}
408	return false;
409	}
410
411	/ Called with tlb_c.lock held /
412	static inline void tlb_flush_vtlb_page_locked(CPUArchState env, int* mmu_idx,
413	target_ulong page)
414	{
415	CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
416	int k;
417
418	assert_cpu_is_self(env_cpu(env));
419	for (k = `0`; k < CPU_VTLB_SIZE; k++) {
420	if (tlb_flush_entry_locked(&d->vtable[k], page)) {
421	tlb_n_used_entries_dec(env, mmu_idx);
422	}
423	}
424	}
425
426	static void tlb_flush_page_locked(CPUArchState env, int* midx,
427	target_ulong page)
428	{
429	target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
430	target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
431
432	/ Check if we need to flush due to large pages. /
433	if ((page & lp_mask) == lp_addr) {
434	tlb_debug("forcing full flush midx %d ("
435	TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
436	midx, lp_addr, lp_mask);
437	tlb_flush_one_mmuidx_locked(env, midx);
438	} else {
439	if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
440	tlb_n_used_entries_dec(env, midx);
441	}
442	tlb_flush_vtlb_page_locked(env, midx, page);
443	}
444	}
445
446	/ As we are going to hijack the bottom bits of the page address for a*
447	* mmuidx bit mask we need to fail to build if we can't do that
448	*/
449	QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
450
451	static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
452	run_on_cpu_data data)
453	{
454	CPUArchState *env = cpu->env_ptr;
455	target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
456	target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
457	unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
458	int mmu_idx;
459
460	assert_cpu_is_self(cpu);
461
462	tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
463	addr, mmu_idx_bitmap);
464
465	qemu_spin_lock(&env_tlb(env)->c.lock);
466	for (mmu_idx = `0`; mmu_idx < NB_MMU_MODES; mmu_idx++) {
467	if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
468	tlb_flush_page_locked(env, mmu_idx, addr);
469	}
470	}
471	qemu_spin_unlock(&env_tlb(env)->c.lock);
472
473	tb_flush_jmp_cache(cpu, addr);
474	}
475
476	void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
477	{
478	target_ulong addr_and_mmu_idx;
479
480	tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
481
482	/ This should already be page aligned /
483	addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
484	addr_and_mmu_idx \|= idxmap;
485
486	if (!qemu_cpu_is_self(cpu)) {
487	async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
488	RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
489	} else {
490	tlb_flush_page_by_mmuidx_async_work(
491	cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
492	}
493	}
494
495	void tlb_flush_page(CPUState *cpu, target_ulong addr)
496	{
497	tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
498	}
499
500	void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
501	uint16_t idxmap)
502	{
503	const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
504	target_ulong addr_and_mmu_idx;
505
506	tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
507
508	/ This should already be page aligned /
509	addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
510	addr_and_mmu_idx \|= idxmap;
511
512	flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
513	fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
514	}
515
516	void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
517	{
518	tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
519	}
520
521	void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
522	target_ulong addr,
523	uint16_t idxmap)
524	{
525	const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
526	target_ulong addr_and_mmu_idx;
527
528	tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
529
530	/ This should already be page aligned /
531	addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
532	addr_and_mmu_idx \|= idxmap;
533
534	flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
535	async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
536	}
537
538	void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
539	{
540	tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
541	}
542
543	/ update the TLBs so that writes to code in the virtual page 'addr'*
544	can be detected /*
545	void tlb_protect_code(ram_addr_t ram_addr)
546	{
547	cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
548	DIRTY_MEMORY_CODE);
549	}
550
551	/ update the TLB so that writes in physical page 'phys_addr' are no longer*
552	tested for self modifying code /*
553	void tlb_unprotect_code(ram_addr_t ram_addr)
554	{
555	cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
556	}
557
558
559	/*
560	* Dirty write flag handling
561	*
562	* When the TCG code writes to a location it looks up the address in
563	* the TLB and uses that data to compute the final address. If any of
564	* the lower bits of the address are set then the slow path is forced.
565	* There are a number of reasons to do this but for normal RAM the
566	* most usual is detecting writes to code regions which may invalidate
567	* generated code.
568	*
569	* Other vCPUs might be reading their TLBs during guest execution, so we update
570	* te->addr_write with atomic_set. We don't need to worry about this for
571	* oversized guests as MTTCG is disabled for them.
572	*
573	* Called with tlb_c.lock held.
574	*/
575	static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
576	uintptr_t start, uintptr_t length)
577	{
578	uintptr_t addr = tlb_entry->addr_write;
579
580	if ((addr & (TLB_INVALID_MASK \| TLB_MMIO \| TLB_NOTDIRTY)) == `0`) {
581	addr &= TARGET_PAGE_MASK;
582	addr += tlb_entry->addend;
583	if ((addr - start) < length) {
584	#if TCG_OVERSIZED_GUEST
585	tlb_entry->addr_write \|= TLB_NOTDIRTY;
586	#else
587	atomic_set(&tlb_entry->addr_write,
588	tlb_entry->addr_write \| TLB_NOTDIRTY);
589	#endif
590	}
591	}
592	}
593
594	/*
595	* Called with tlb_c.lock held.
596	* Called only from the vCPU context, i.e. the TLB's owner thread.
597	*/
598	static inline void copy_tlb_helper_locked(CPUTLBEntry d, const* CPUTLBEntry *s)
599	{
600	d = s;
601	}
602
603	/ This is a cross vCPU call (i.e. another vCPU resetting the flags of*
604	* the target vCPU).
605	* We must take tlb_c.lock to avoid racing with another vCPU update. The only
606	* thing actually updated is the target TLB entry ->addr_write flags.
607	*/
608	void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
609	{
610	CPUArchState *env;
611
612	int mmu_idx;
613
614	env = cpu->env_ptr;
615	qemu_spin_lock(&env_tlb(env)->c.lock);
616	for (mmu_idx = `0`; mmu_idx < NB_MMU_MODES; mmu_idx++) {
617	unsigned int i;
618	unsigned int n = tlb_n_entries(env, mmu_idx);
619
620	for (i = `0`; i < n; i++) {
621	tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
622	start1, length);
623	}
624
625	for (i = `0`; i < CPU_VTLB_SIZE; i++) {
626	tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
627	start1, length);
628	}
629	}
630	qemu_spin_unlock(&env_tlb(env)->c.lock);
631	}
632
633	/ Called with tlb_c.lock held /
634	static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
635	target_ulong vaddr)
636	{
637	if (tlb_entry->addr_write == (vaddr \| TLB_NOTDIRTY)) {
638	tlb_entry->addr_write = vaddr;
639	}
640	}
641
642	/ update the TLB corresponding to virtual page vaddr*
643	so that it is no longer dirty /*
644	void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
645	{
646	CPUArchState *env = cpu->env_ptr;
647	int mmu_idx;
648
649	assert_cpu_is_self(cpu);
650
651	vaddr &= TARGET_PAGE_MASK;
652	qemu_spin_lock(&env_tlb(env)->c.lock);
653	for (mmu_idx = `0`; mmu_idx < NB_MMU_MODES; mmu_idx++) {
654	tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
655	}
656
657	for (mmu_idx = `0`; mmu_idx < NB_MMU_MODES; mmu_idx++) {
658	int k;
659	for (k = `0`; k < CPU_VTLB_SIZE; k++) {
660	tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
661	}
662	}
663	qemu_spin_unlock(&env_tlb(env)->c.lock);
664	}
665
666	/ Our TLB does not support large pages, so remember the area covered by*
667	large pages and trigger a full TLB flush if these are invalidated. /*
668	static void tlb_add_large_page(CPUArchState env, int* mmu_idx,
669	target_ulong vaddr, target_ulong size)
670	{
671	target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
672	target_ulong lp_mask = ~(size - `1`);
673
674	if (lp_addr == (target_ulong)-`1`) {
675	/ No previous large page. /
676	lp_addr = vaddr;
677	} else {
678	/ Extend the existing region to include the new page.*
679	This is a compromise between unnecessary flushes and
680	the cost of maintaining a full variable size TLB. /*
681	lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
682	while (((lp_addr ^ vaddr) & lp_mask) != `0`) {
683	lp_mask <<= `1`;
684	}
685	}
686	env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
687	env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
688	}
689
690	/ Add a new TLB entry. At most one entry for a given virtual address*
691	* is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
692	* supplied size is only used by tlb_flush_page.
693	*
694	* Called from TCG-generated code, which is under an RCU read-side
695	* critical section.
696	*/
697	void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
698	hwaddr paddr, MemTxAttrs attrs, int prot,
699	int mmu_idx, target_ulong size)
700	{
701	CPUArchState *env = cpu->env_ptr;
702	CPUTLB *tlb = env_tlb(env);
703	CPUTLBDesc *desc = &tlb->d[mmu_idx];
704	MemoryRegionSection *section;
705	unsigned int index;
706	target_ulong address;
707	target_ulong code_address;
708	uintptr_t addend;
709	CPUTLBEntry *te, tn;
710	hwaddr iotlb, xlat, sz, paddr_page;
711	target_ulong vaddr_page;
712	int asidx = cpu_asidx_from_attrs(cpu, attrs);
713	int wp_flags;
714
715	assert_cpu_is_self(cpu);
716
717	if (size <= TARGET_PAGE_SIZE) {
718	sz = TARGET_PAGE_SIZE;
719	} else {
720	tlb_add_large_page(env, mmu_idx, vaddr, size);
721	sz = size;
722	}
723	vaddr_page = vaddr & TARGET_PAGE_MASK;
724	paddr_page = paddr & TARGET_PAGE_MASK;
725
726	section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
727	&xlat, &sz, attrs, &prot);
728	assert(sz >= TARGET_PAGE_SIZE);
729
730	tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
731	" prot=%x idx=%d\n",
732	vaddr, paddr, prot, mmu_idx);
733
734	address = vaddr_page;
735	if (size < TARGET_PAGE_SIZE) {
736	/ Repeat the MMU check and TLB fill on every access. /
737	address \|= TLB_INVALID_MASK;
738	}
739	if (attrs.byte_swap) {
740	/ Force the access through the I/O slow path. /
741	address \|= TLB_MMIO;
742	}
743	if (!memory_region_is_ram(section->mr) &&
744	!memory_region_is_romd(section->mr)) {
745	/ IO memory case /
746	address \|= TLB_MMIO;
747	addend = `0`;
748	} else {
749	/ TLB_MMIO for rom/romd handled below /
750	addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
751	}
752
753	code_address = address;
754	iotlb = memory_region_section_get_iotlb(cpu, section, vaddr_page,
755	paddr_page, xlat, prot, &address);
756	wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
757	TARGET_PAGE_SIZE);
758
759	index = tlb_index(env, mmu_idx, vaddr_page);
760	te = tlb_entry(env, mmu_idx, vaddr_page);
761
762	/*
763	* Hold the TLB lock for the rest of the function. We could acquire/release
764	* the lock several times in the function, but it is faster to amortize the
765	* acquisition cost by acquiring it just once. Note that this leads to
766	* a longer critical section, but this is not a concern since the TLB lock
767	* is unlikely to be contended.
768	*/
769	qemu_spin_lock(&tlb->c.lock);
770
771	/ Note that the tlb is no longer clean. /
772	tlb->c.dirty \|= `1` << mmu_idx;
773
774	/ Make sure there's no cached translation for the new page. /
775	tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
776
777	/*
778	* Only evict the old entry to the victim tlb if it's for a
779	* different page; otherwise just overwrite the stale data.
780	*/
781	if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
782	unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
783	CPUTLBEntry *tv = &desc->vtable[vidx];
784
785	/ Evict the old entry into the victim tlb. /
786	copy_tlb_helper_locked(tv, te);
787	desc->viotlb[vidx] = desc->iotlb[index];
788	tlb_n_used_entries_dec(env, mmu_idx);
789	}
790
791	/ refill the tlb /
792	/*
793	* At this point iotlb contains a physical section number in the lower
794	* TARGET_PAGE_BITS, and either
795	* + the ram_addr_t of the page base of the target RAM (if NOTDIRTY or ROM)
796	* + the offset within section->mr of the page base (otherwise)
797	* We subtract the vaddr_page (which is page aligned and thus won't
798	* disturb the low bits) to give an offset which can be added to the
799	* (non-page-aligned) vaddr of the eventual memory access to get
800	* the MemoryRegion offset for the access. Note that the vaddr we
801	* subtract here is that of the page base, and not the same as the
802	* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
803	*/
804	desc->iotlb[index].addr = iotlb - vaddr_page;
805	desc->iotlb[index].attrs = attrs;
806
807	/ Now calculate the new entry /
808	tn.addend = addend - vaddr_page;
809	if (prot & PAGE_READ) {
810	tn.addr_read = address;
811	if (wp_flags & BP_MEM_READ) {
812	tn.addr_read \|= TLB_WATCHPOINT;
813	}
814	} else {
815	tn.addr_read = -`1`;
816	}
817
818	if (prot & PAGE_EXEC) {
819	tn.addr_code = code_address;
820	} else {
821	tn.addr_code = -`1`;
822	}
823
824	tn.addr_write = -`1`;
825	if (prot & PAGE_WRITE) {
826	if ((memory_region_is_ram(section->mr) && section->readonly)
827	\|\| memory_region_is_romd(section->mr)) {
828	/ Write access calls the I/O callback. /
829	tn.addr_write = address \| TLB_MMIO;
830	} else if (memory_region_is_ram(section->mr)
831	&& cpu_physical_memory_is_clean(
832	memory_region_get_ram_addr(section->mr) + xlat)) {
833	tn.addr_write = address \| TLB_NOTDIRTY;
834	} else {
835	tn.addr_write = address;
836	}
837	if (prot & PAGE_WRITE_INV) {
838	tn.addr_write \|= TLB_INVALID_MASK;
839	}
840	if (wp_flags & BP_MEM_WRITE) {
841	tn.addr_write \|= TLB_WATCHPOINT;
842	}
843	}
844
845	copy_tlb_helper_locked(te, &tn);
846	tlb_n_used_entries_inc(env, mmu_idx);
847	qemu_spin_unlock(&tlb->c.lock);
848	}
849
850	/ Add a new TLB entry, but without specifying the memory*
851	* transaction attributes to be used.
852	*/
853	void tlb_set_page(CPUState *cpu, target_ulong vaddr,
854	hwaddr paddr, int prot,
855	int mmu_idx, target_ulong size)
856	{
857	tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
858	prot, mmu_idx, size);
859	}
860
861	static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
862	{
863	ram_addr_t ram_addr;
864
865	ram_addr = qemu_ram_addr_from_host(ptr);
866	if (ram_addr == RAM_ADDR_INVALID) {
867	error_report("Bad ram pointer %p", ptr);
868	abort();
869	}
870	return ram_addr;
871	}
872
873	/*
874	* Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
875	* caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
876	* be discarded and looked up again (e.g. via tlb_entry()).
877	*/
878	static void tlb_fill(CPUState cpu, target_ulong addr, int* size,
879	MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
880	{
881	CPUClass *cc = CPU_GET_CLASS(cpu);
882	bool ok;
883
884	/*
885	* This is not a probe, so only valid return is success; failure
886	* should result in exception + longjmp to the cpu loop.
887	*/
888	ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
889	assert(ok);
890	}
891
892	static uint64_t io_readx(CPUArchState env, CPUIOTLBEntry iotlbentry,
893	int mmu_idx, target_ulong addr, uintptr_t retaddr,
894	MMUAccessType access_type, MemOp op)
895	{
896	CPUState *cpu = env_cpu(env);
897	hwaddr mr_offset;
898	MemoryRegionSection *section;
899	MemoryRegion *mr;
900	uint64_t val;
901	bool locked = false;
902	MemTxResult r;
903
904	if (iotlbentry->attrs.byte_swap) {
905	op ^= MO_BSWAP;
906	}
907
908	section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
909	mr = section->mr;
910	mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
911	cpu->mem_io_pc = retaddr;
912	if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
913	cpu_io_recompile(cpu, retaddr);
914	}
915
916	cpu->mem_io_vaddr = addr;
917	cpu->mem_io_access_type = access_type;
918
919	if (mr->global_locking && !qemu_mutex_iothread_locked()) {
920	qemu_mutex_lock_iothread();
921	locked = true;
922	}
923	r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
924	if (r != MEMTX_OK) {
925	hwaddr physaddr = mr_offset +
926	section->offset_within_address_space -
927	section->offset_within_region;
928
929	cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
930	mmu_idx, iotlbentry->attrs, r, retaddr);
931	}
932	if (locked) {
933	qemu_mutex_unlock_iothread();
934	}
935
936	return val;
937	}
938
939	static void io_writex(CPUArchState env, CPUIOTLBEntry iotlbentry,
940	int mmu_idx, uint64_t val, target_ulong addr,
941	uintptr_t retaddr, MemOp op)
942	{
943	CPUState *cpu = env_cpu(env);
944	hwaddr mr_offset;
945	MemoryRegionSection *section;
946	MemoryRegion *mr;
947	bool locked = false;
948	MemTxResult r;
949
950	if (iotlbentry->attrs.byte_swap) {
951	op ^= MO_BSWAP;
952	}
953
954	section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
955	mr = section->mr;
956	mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
957	if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
958	cpu_io_recompile(cpu, retaddr);
959	}
960	cpu->mem_io_vaddr = addr;
961	cpu->mem_io_pc = retaddr;
962
963	if (mr->global_locking && !qemu_mutex_iothread_locked()) {
964	qemu_mutex_lock_iothread();
965	locked = true;
966	}
967	r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
968	if (r != MEMTX_OK) {
969	hwaddr physaddr = mr_offset +
970	section->offset_within_address_space -
971	section->offset_within_region;
972
973	cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
974	MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
975	retaddr);
976	}
977	if (locked) {
978	qemu_mutex_unlock_iothread();
979	}
980	}
981
982	static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
983	{
984	#if TCG_OVERSIZED_GUEST
985	return (target_ulong )((uintptr_t)entry + ofs);
986	#else
987	/ ofs might correspond to .addr_write, so use atomic_read /
988	return atomic_read((target_ulong *)((uintptr_t)entry + ofs));
989	#endif
990	}
991
992	/ Return true if ADDR is present in the victim tlb, and has been copied*
993	back to the main tlb. /*
994	static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
995	size_t elt_ofs, target_ulong page)
996	{
997	size_t vidx;
998
999	assert_cpu_is_self(env_cpu(env));
1000	for (vidx = `0`; vidx < CPU_VTLB_SIZE; ++vidx) {
1001	CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1002	target_ulong cmp;
1003
1004	/ elt_ofs might correspond to .addr_write, so use atomic_read /
1005	#if TCG_OVERSIZED_GUEST
1006	cmp = (target_ulong )((uintptr_t)vtlb + elt_ofs);
1007	#else
1008	cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1009	#endif
1010
1011	if (cmp == page) {
1012	/ Found entry in victim tlb, swap tlb and iotlb. /
1013	CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1014
1015	qemu_spin_lock(&env_tlb(env)->c.lock);
1016	copy_tlb_helper_locked(&tmptlb, tlb);
1017	copy_tlb_helper_locked(tlb, vtlb);
1018	copy_tlb_helper_locked(vtlb, &tmptlb);
1019	qemu_spin_unlock(&env_tlb(env)->c.lock);
1020
1021	CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1022	CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1023	tmpio = io; io = vio; vio = tmpio;
1024	return true;
1025	}
1026	}
1027	return false;
1028	}
1029
1030	/ Macro to call the above, with local variables from the use context. /
1031	#define VICTIM_TLB_HIT(TY, ADDR) \
1032	victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1033	(ADDR) & TARGET_PAGE_MASK)
1034
1035	/*
1036	* Return a ram_addr_t for the virtual address for execution.
1037	*
1038	* Return -1 if we can't translate and execute from an entire page
1039	* of RAM. This will force us to execute by loading and translating
1040	* one insn at a time, without caching.
1041	*
1042	* NOTE: This function will trigger an exception if the page is
1043	* not executable.
1044	*/
1045	tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1046	{
1047	uintptr_t mmu_idx = cpu_mmu_index(env, true);
1048	uintptr_t index = tlb_index(env, mmu_idx, addr);
1049	CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1050	void *p;
1051
1052	if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1053	if (!VICTIM_TLB_HIT(addr_code, addr)) {
1054	tlb_fill(env_cpu(env), addr, `0`, MMU_INST_FETCH, mmu_idx, `0`);
1055	index = tlb_index(env, mmu_idx, addr);
1056	entry = tlb_entry(env, mmu_idx, addr);
1057
1058	if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1059	/*
1060	* The MMU protection covers a smaller range than a target
1061	* page, so we must redo the MMU check for every insn.
1062	*/
1063	return -`1`;
1064	}
1065	}
1066	assert(tlb_hit(entry->addr_code, addr));
1067	}
1068
1069	if (unlikely(entry->addr_code & TLB_MMIO)) {
1070	/ The region is not backed by RAM. /
1071	return -`1`;
1072	}
1073
1074	p = (void *)((uintptr_t)addr + entry->addend);
1075	return qemu_ram_addr_from_host_nofail(p);
1076	}
1077
1078	/*
1079	* Probe for whether the specified guest access is permitted. If it is not
1080	* permitted then an exception will be taken in the same way as if this
1081	* were a real access (and we will not return).
1082	* If the size is 0 or the page requires I/O access, returns NULL; otherwise,
1083	* returns the address of the host page similar to tlb_vaddr_to_host().
1084	*/
1085	void probe_access(CPUArchState env, target_ulong addr, int size,
1086	MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1087	{
1088	uintptr_t index = tlb_index(env, mmu_idx, addr);
1089	CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1090	target_ulong tlb_addr;
1091	size_t elt_ofs;
1092	int wp_access;
1093
1094	g_assert(-(addr \| TARGET_PAGE_MASK) >= size);
1095
1096	switch (access_type) {
1097	case MMU_DATA_LOAD:
1098	elt_ofs = offsetof(CPUTLBEntry, addr_read);
1099	wp_access = BP_MEM_READ;
1100	break;
1101	case MMU_DATA_STORE:
1102	elt_ofs = offsetof(CPUTLBEntry, addr_write);
1103	wp_access = BP_MEM_WRITE;
1104	break;
1105	case MMU_INST_FETCH:
1106	elt_ofs = offsetof(CPUTLBEntry, addr_code);
1107	wp_access = BP_MEM_READ;
1108	break;
1109	default:
1110	g_assert_not_reached();
1111	}
1112	tlb_addr = tlb_read_ofs(entry, elt_ofs);
1113
1114	if (unlikely(!tlb_hit(tlb_addr, addr))) {
1115	if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs,
1116	addr & TARGET_PAGE_MASK)) {
1117	tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr);
1118	/ TLB resize via tlb_fill may have moved the entry. /
1119	index = tlb_index(env, mmu_idx, addr);
1120	entry = tlb_entry(env, mmu_idx, addr);
1121	}
1122	tlb_addr = tlb_read_ofs(entry, elt_ofs);
1123	}
1124
1125	if (!size) {
1126	return NULL;
1127	}
1128
1129	/ Handle watchpoints. /
1130	if (tlb_addr & TLB_WATCHPOINT) {
1131	cpu_check_watchpoint(env_cpu(env), addr, size,
1132	env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
1133	wp_access, retaddr);
1134	}
1135
1136	if (tlb_addr & (TLB_NOTDIRTY \| TLB_MMIO)) {
1137	/ I/O access /
1138	return NULL;
1139	}
1140
1141	return (void *)((uintptr_t)addr + entry->addend);
1142	}
1143
1144	void tlb_vaddr_to_host(CPUArchState env, abi_ptr addr,
1145	MMUAccessType access_type, int mmu_idx)
1146	{
1147	CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1148	uintptr_t tlb_addr, page;
1149	size_t elt_ofs;
1150
1151	switch (access_type) {
1152	case MMU_DATA_LOAD:
1153	elt_ofs = offsetof(CPUTLBEntry, addr_read);
1154	break;
1155	case MMU_DATA_STORE:
1156	elt_ofs = offsetof(CPUTLBEntry, addr_write);
1157	break;
1158	case MMU_INST_FETCH:
1159	elt_ofs = offsetof(CPUTLBEntry, addr_code);
1160	break;
1161	default:
1162	g_assert_not_reached();
1163	}
1164
1165	page = addr & TARGET_PAGE_MASK;
1166	tlb_addr = tlb_read_ofs(entry, elt_ofs);
1167
1168	if (!tlb_hit_page(tlb_addr, page)) {
1169	uintptr_t index = tlb_index(env, mmu_idx, addr);
1170
1171	if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) {
1172	CPUState *cs = env_cpu(env);
1173	CPUClass *cc = CPU_GET_CLASS(cs);
1174
1175	if (!cc->tlb_fill(cs, addr, `0`, access_type, mmu_idx, true, `0`)) {
1176	/ Non-faulting page table read failed. /
1177	return NULL;
1178	}
1179
1180	/ TLB resize via tlb_fill may have moved the entry. /
1181	entry = tlb_entry(env, mmu_idx, addr);
1182	}
1183	tlb_addr = tlb_read_ofs(entry, elt_ofs);
1184	}
1185
1186	if (tlb_addr & ~TARGET_PAGE_MASK) {
1187	/ IO access /
1188	return NULL;
1189	}
1190
1191	return (void *)((uintptr_t)addr + entry->addend);
1192	}
1193
1194	/ Probe for a read-modify-write atomic operation. Do not allow unaligned*
1195	* operations, or io operations to proceed. Return the host address. */
1196	static void atomic_mmu_lookup(CPUArchState env, target_ulong addr,
1197	TCGMemOpIdx oi, uintptr_t retaddr,
1198	NotDirtyInfo *ndi)
1199	{
1200	size_t mmu_idx = get_mmuidx(oi);
1201	uintptr_t index = tlb_index(env, mmu_idx, addr);
1202	CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1203	target_ulong tlb_addr = tlb_addr_write(tlbe);
1204	MemOp mop = get_memop(oi);
1205	int a_bits = get_alignment_bits(mop);
1206	int s_bits = mop & MO_SIZE;
1207	void *hostaddr;
1208
1209	/ Adjust the given return address. /
1210	retaddr -= GETPC_ADJ;
1211
1212	/ Enforce guest required alignment. /
1213	if (unlikely(a_bits > `0` && (addr & ((`1` << a_bits) - `1`)))) {
1214	/ ??? Maybe indicate atomic op to cpu_unaligned_access /
1215	cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1216	mmu_idx, retaddr);
1217	}
1218
1219	/ Enforce qemu required alignment. /
1220	if (unlikely(addr & ((`1` << s_bits) - `1`))) {
1221	/ We get here if guest alignment was not requested,*
1222	or was not enforced by cpu_unaligned_access above.
1223	We might widen the access and emulate, but for now
1224	mark an exception and exit the cpu loop. /*
1225	goto stop_the_world;
1226	}
1227
1228	/ Check TLB entry and enforce page permissions. /
1229	if (!tlb_hit(tlb_addr, addr)) {
1230	if (!VICTIM_TLB_HIT(addr_write, addr)) {
1231	tlb_fill(env_cpu(env), addr, `1` << s_bits, MMU_DATA_STORE,
1232	mmu_idx, retaddr);
1233	index = tlb_index(env, mmu_idx, addr);
1234	tlbe = tlb_entry(env, mmu_idx, addr);
1235	}
1236	tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1237	}
1238
1239	/ Notice an IO access or a needs-MMU-lookup access /
1240	if (unlikely(tlb_addr & TLB_MMIO)) {
1241	/ There's really nothing that can be done to*
1242	support this apart from stop-the-world. /*
1243	goto stop_the_world;
1244	}
1245
1246	/ Let the guest notice RMW on a write-only page. /
1247	if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1248	tlb_fill(env_cpu(env), addr, `1` << s_bits, MMU_DATA_LOAD,
1249	mmu_idx, retaddr);
1250	/ Since we don't support reads and writes to different addresses,*
1251	and we do have the proper page loaded for write, this shouldn't
1252	ever return. But just in case, handle via stop-the-world. /*
1253	goto stop_the_world;
1254	}
1255
1256	hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1257
1258	ndi->active = false;
1259	if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1260	ndi->active = true;
1261	memory_notdirty_write_prepare(ndi, env_cpu(env), addr,
1262	qemu_ram_addr_from_host_nofail(hostaddr),
1263	`1` << s_bits);
1264	}
1265
1266	return hostaddr;
1267
1268	stop_the_world:
1269	cpu_loop_exit_atomic(env_cpu(env), retaddr);
1270	}
1271
1272	/*
1273	* Load Helpers
1274	*
1275	* We support two different access types. SOFTMMU_CODE_ACCESS is
1276	* specifically for reading instructions from system memory. It is
1277	* called by the translation loop and in some helpers where the code
1278	* is disassembled. It shouldn't be called directly by guest code.
1279	*/
1280
1281	typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1282	TCGMemOpIdx oi, uintptr_t retaddr);
1283
1284	static inline uint64_t __attribute__((always_inline))
1285	load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1286	uintptr_t retaddr, MemOp op, bool code_read,
1287	FullLoadHelper *full_load)
1288	{
1289	uintptr_t mmu_idx = get_mmuidx(oi);
1290	uintptr_t index = tlb_index(env, mmu_idx, addr);
1291	CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1292	target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1293	const size_t tlb_off = code_read ?
1294	offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1295	const MMUAccessType access_type =
1296	code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1297	unsigned a_bits = get_alignment_bits(get_memop(oi));
1298	void *haddr;
1299	uint64_t res;
1300	size_t size = memop_size(op);
1301
1302	/ Handle CPU specific unaligned behaviour /
1303	if (addr & ((`1` << a_bits) - `1`)) {
1304	cpu_unaligned_access(env_cpu(env), addr, access_type,
1305	mmu_idx, retaddr);
1306	}
1307
1308	/ If the TLB entry is for a different page, reload and try again. /
1309	if (!tlb_hit(tlb_addr, addr)) {
1310	if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1311	addr & TARGET_PAGE_MASK)) {
1312	tlb_fill(env_cpu(env), addr, size,
1313	access_type, mmu_idx, retaddr);
1314	index = tlb_index(env, mmu_idx, addr);
1315	entry = tlb_entry(env, mmu_idx, addr);
1316	}
1317	tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1318	tlb_addr &= ~TLB_INVALID_MASK;
1319	}
1320
1321	/ Handle anything that isn't just a straight memory access. /
1322	if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1323	CPUIOTLBEntry *iotlbentry;
1324
1325	/ For anything that is unaligned, recurse through full_load. /
1326	if ((addr & (size - `1`)) != `0`) {
1327	goto do_unaligned_access;
1328	}
1329
1330	iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1331
1332	/ Handle watchpoints. /
1333	if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1334	/ On watchpoint hit, this will longjmp out. /
1335	cpu_check_watchpoint(env_cpu(env), addr, size,
1336	iotlbentry->attrs, BP_MEM_READ, retaddr);
1337
1338	/ The backing page may or may not require I/O. /
1339	tlb_addr &= ~TLB_WATCHPOINT;
1340	if ((tlb_addr & ~TARGET_PAGE_MASK) == `0`) {
1341	goto do_aligned_access;
1342	}
1343	}
1344
1345	/ Handle I/O access. /
1346	return io_readx(env, iotlbentry, mmu_idx, addr,
1347	retaddr, access_type, op);
1348	}
1349
1350	/ Handle slow unaligned access (it spans two pages or IO). /
1351	if (size > `1`
1352	&& unlikely((addr & ~TARGET_PAGE_MASK) + size - `1`
1353	>= TARGET_PAGE_SIZE)) {
1354	target_ulong addr1, addr2;
1355	uint64_t r1, r2;
1356	unsigned shift;
1357	do_unaligned_access:
1358	addr1 = addr & ~((target_ulong)size - `1`);
1359	addr2 = addr1 + size;
1360	r1 = full_load(env, addr1, oi, retaddr);
1361	r2 = full_load(env, addr2, oi, retaddr);
1362	shift = (addr & (size - `1`)) * `8`;
1363
1364	if (memop_big_endian(op)) {
1365	/ Big-endian combine. /
1366	res = (r1 << shift) \| (r2 >> ((size * `8`) - shift));
1367	} else {
1368	/ Little-endian combine. /
1369	res = (r1 >> shift) \| (r2 << ((size * `8`) - shift));
1370	}
1371	return res & MAKE_64BIT_MASK(`0`, size * `8`);
1372	}
1373
1374	do_aligned_access:
1375	haddr = (void *)((uintptr_t)addr + entry->addend);
1376	switch (op) {
1377	case MO_UB:
1378	res = ldub_p(haddr);
1379	break;
1380	case MO_BEUW:
1381	res = lduw_be_p(haddr);
1382	break;
1383	case MO_LEUW:
1384	res = lduw_le_p(haddr);
1385	break;
1386	case MO_BEUL:
1387	res = (uint32_t)ldl_be_p(haddr);
1388	break;
1389	case MO_LEUL:
1390	res = (uint32_t)ldl_le_p(haddr);
1391	break;
1392	case MO_BEQ:
1393	res = ldq_be_p(haddr);
1394	break;
1395	case MO_LEQ:
1396	res = ldq_le_p(haddr);
1397	break;
1398	default:
1399	g_assert_not_reached();
1400	}
1401
1402	return res;
1403	}
1404
1405	/*
1406	* For the benefit of TCG generated code, we want to avoid the
1407	* complication of ABI-specific return type promotion and always
1408	* return a value extended to the register size of the host. This is
1409	* tcg_target_long, except in the case of a 32-bit host and 64-bit
1410	* data, and for that we always have uint64_t.
1411	*
1412	* We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1413	*/
1414
1415	static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1416	TCGMemOpIdx oi, uintptr_t retaddr)
1417	{
1418	return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1419	}
1420
1421	tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1422	TCGMemOpIdx oi, uintptr_t retaddr)
1423	{
1424	return full_ldub_mmu(env, addr, oi, retaddr);
1425	}
1426
1427	static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1428	TCGMemOpIdx oi, uintptr_t retaddr)
1429	{
1430	return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1431	full_le_lduw_mmu);
1432	}
1433
1434	tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1435	TCGMemOpIdx oi, uintptr_t retaddr)
1436	{
1437	return full_le_lduw_mmu(env, addr, oi, retaddr);
1438	}
1439
1440	static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1441	TCGMemOpIdx oi, uintptr_t retaddr)
1442	{
1443	return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1444	full_be_lduw_mmu);
1445	}
1446
1447	tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1448	TCGMemOpIdx oi, uintptr_t retaddr)
1449	{
1450	return full_be_lduw_mmu(env, addr, oi, retaddr);
1451	}
1452
1453	static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1454	TCGMemOpIdx oi, uintptr_t retaddr)
1455	{
1456	return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1457	full_le_ldul_mmu);
1458	}
1459
1460	tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1461	TCGMemOpIdx oi, uintptr_t retaddr)
1462	{
1463	return full_le_ldul_mmu(env, addr, oi, retaddr);
1464	}
1465
1466	static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1467	TCGMemOpIdx oi, uintptr_t retaddr)
1468	{
1469	return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1470	full_be_ldul_mmu);
1471	}
1472
1473	tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1474	TCGMemOpIdx oi, uintptr_t retaddr)
1475	{
1476	return full_be_ldul_mmu(env, addr, oi, retaddr);
1477	}
1478
1479	uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1480	TCGMemOpIdx oi, uintptr_t retaddr)
1481	{
1482	return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1483	helper_le_ldq_mmu);
1484	}
1485
1486	uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1487	TCGMemOpIdx oi, uintptr_t retaddr)
1488	{
1489	return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1490	helper_be_ldq_mmu);
1491	}
1492
1493	/*
1494	* Provide signed versions of the load routines as well. We can of course
1495	* avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1496	*/
1497
1498
1499	tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1500	TCGMemOpIdx oi, uintptr_t retaddr)
1501	{
1502	return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1503	}
1504
1505	tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1506	TCGMemOpIdx oi, uintptr_t retaddr)
1507	{
1508	return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1509	}
1510
1511	tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1512	TCGMemOpIdx oi, uintptr_t retaddr)
1513	{
1514	return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1515	}
1516
1517	tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1518	TCGMemOpIdx oi, uintptr_t retaddr)
1519	{
1520	return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1521	}
1522
1523	tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1524	TCGMemOpIdx oi, uintptr_t retaddr)
1525	{
1526	return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1527	}
1528
1529	/*
1530	* Store Helpers
1531	*/
1532
1533	static inline void __attribute__((always_inline))
1534	store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
1535	TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
1536	{
1537	uintptr_t mmu_idx = get_mmuidx(oi);
1538	uintptr_t index = tlb_index(env, mmu_idx, addr);
1539	CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1540	target_ulong tlb_addr = tlb_addr_write(entry);
1541	const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
1542	unsigned a_bits = get_alignment_bits(get_memop(oi));
1543	void *haddr;
1544	size_t size = memop_size(op);
1545
1546	/ Handle CPU specific unaligned behaviour /
1547	if (addr & ((`1` << a_bits) - `1`)) {
1548	cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1549	mmu_idx, retaddr);
1550	}
1551
1552	/ If the TLB entry is for a different page, reload and try again. /
1553	if (!tlb_hit(tlb_addr, addr)) {
1554	if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1555	addr & TARGET_PAGE_MASK)) {
1556	tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
1557	mmu_idx, retaddr);
1558	index = tlb_index(env, mmu_idx, addr);
1559	entry = tlb_entry(env, mmu_idx, addr);
1560	}
1561	tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
1562	}
1563
1564	/ Handle anything that isn't just a straight memory access. /
1565	if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1566	CPUIOTLBEntry *iotlbentry;
1567
1568	/ For anything that is unaligned, recurse through byte stores. /
1569	if ((addr & (size - `1`)) != `0`) {
1570	goto do_unaligned_access;
1571	}
1572
1573	iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1574
1575	/ Handle watchpoints. /
1576	if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1577	/ On watchpoint hit, this will longjmp out. /
1578	cpu_check_watchpoint(env_cpu(env), addr, size,
1579	iotlbentry->attrs, BP_MEM_WRITE, retaddr);
1580
1581	/ The backing page may or may not require I/O. /
1582	tlb_addr &= ~TLB_WATCHPOINT;
1583	if ((tlb_addr & ~TARGET_PAGE_MASK) == `0`) {
1584	goto do_aligned_access;
1585	}
1586	}
1587
1588	/ Handle I/O access. /
1589	io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr, op);
1590	return;
1591	}
1592
1593	/ Handle slow unaligned access (it spans two pages or IO). /
1594	if (size > `1`
1595	&& unlikely((addr & ~TARGET_PAGE_MASK) + size - `1`
1596	>= TARGET_PAGE_SIZE)) {
1597	int i;
1598	uintptr_t index2;
1599	CPUTLBEntry *entry2;
1600	target_ulong page2, tlb_addr2;
1601	size_t size2;
1602
1603	do_unaligned_access:
1604	/*
1605	* Ensure the second page is in the TLB. Note that the first page
1606	* is already guaranteed to be filled, and that the second page
1607	* cannot evict the first.
1608	*/
1609	page2 = (addr + size) & TARGET_PAGE_MASK;
1610	size2 = (addr + size) & ~TARGET_PAGE_MASK;
1611	index2 = tlb_index(env, mmu_idx, page2);
1612	entry2 = tlb_entry(env, mmu_idx, page2);
1613	tlb_addr2 = tlb_addr_write(entry2);
1614	if (!tlb_hit_page(tlb_addr2, page2)) {
1615	if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
1616	tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
1617	mmu_idx, retaddr);
1618	index2 = tlb_index(env, mmu_idx, page2);
1619	entry2 = tlb_entry(env, mmu_idx, page2);
1620	}
1621	tlb_addr2 = tlb_addr_write(entry2);
1622	}
1623
1624	/*
1625	* Handle watchpoints. Since this may trap, all checks
1626	* must happen before any store.
1627	*/
1628	if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1629	cpu_check_watchpoint(env_cpu(env), addr, size - size2,
1630	env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
1631	BP_MEM_WRITE, retaddr);
1632	}
1633	if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
1634	cpu_check_watchpoint(env_cpu(env), page2, size2,
1635	env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
1636	BP_MEM_WRITE, retaddr);
1637	}
1638
1639	/*
1640	* XXX: not efficient, but simple.
1641	* This loop must go in the forward direction to avoid issues
1642	* with self-modifying code in Windows 64-bit.
1643	*/
1644	for (i = `0`; i < size; ++i) {
1645	uint8_t val8;
1646	if (memop_big_endian(op)) {
1647	/ Big-endian extract. /
1648	val8 = val >> (((size - `1`) * `8`) - (i * `8`));
1649	} else {
1650	/ Little-endian extract. /
1651	val8 = val >> (i * `8`);
1652	}
1653	helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
1654	}
1655	return;
1656	}
1657
1658	do_aligned_access:
1659	haddr = (void *)((uintptr_t)addr + entry->addend);
1660	switch (op) {
1661	case MO_UB:
1662	stb_p(haddr, val);
1663	break;
1664	case MO_BEUW:
1665	stw_be_p(haddr, val);
1666	break;
1667	case MO_LEUW:
1668	stw_le_p(haddr, val);
1669	break;
1670	case MO_BEUL:
1671	stl_be_p(haddr, val);
1672	break;
1673	case MO_LEUL:
1674	stl_le_p(haddr, val);
1675	break;
1676	case MO_BEQ:
1677	stq_be_p(haddr, val);
1678	break;
1679	case MO_LEQ:
1680	stq_le_p(haddr, val);
1681	break;
1682	default:
1683	g_assert_not_reached();
1684	break;
1685	}
1686	}
1687
1688	void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
1689	TCGMemOpIdx oi, uintptr_t retaddr)
1690	{
1691	store_helper(env, addr, val, oi, retaddr, MO_UB);
1692	}
1693
1694	void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1695	TCGMemOpIdx oi, uintptr_t retaddr)
1696	{
1697	store_helper(env, addr, val, oi, retaddr, MO_LEUW);
1698	}
1699
1700	void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1701	TCGMemOpIdx oi, uintptr_t retaddr)
1702	{
1703	store_helper(env, addr, val, oi, retaddr, MO_BEUW);
1704	}
1705
1706	void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1707	TCGMemOpIdx oi, uintptr_t retaddr)
1708	{
1709	store_helper(env, addr, val, oi, retaddr, MO_LEUL);
1710	}
1711
1712	void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1713	TCGMemOpIdx oi, uintptr_t retaddr)
1714	{
1715	store_helper(env, addr, val, oi, retaddr, MO_BEUL);
1716	}
1717
1718	void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1719	TCGMemOpIdx oi, uintptr_t retaddr)
1720	{
1721	store_helper(env, addr, val, oi, retaddr, MO_LEQ);
1722	}
1723
1724	void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1725	TCGMemOpIdx oi, uintptr_t retaddr)
1726	{
1727	store_helper(env, addr, val, oi, retaddr, MO_BEQ);
1728	}
1729
1730	/ First set of helpers allows passing in of OI and RETADDR. This makes*
1731	them callable from other helpers. /*
1732
1733	#define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
1734	#define ATOMIC_NAME(X) \
1735	HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1736	#define ATOMIC_MMU_DECLS NotDirtyInfo ndi
1737	#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr, &ndi)
1738	#define ATOMIC_MMU_CLEANUP \
1739	do { \
1740	if (unlikely(ndi.active)) { \
1741	memory_notdirty_write_complete(&ndi); \
1742	} \
1743	} while (0)
1744
1745	#define DATA_SIZE 1
1746	#include "atomic_template.h"
1747
1748	#define DATA_SIZE 2
1749	#include "atomic_template.h"
1750
1751	#define DATA_SIZE 4
1752	#include "atomic_template.h"
1753
1754	#ifdef CONFIG_ATOMIC64
1755	#define DATA_SIZE 8
1756	#include "atomic_template.h"
1757	#endif
1758
1759	#if HAVE_CMPXCHG128 \|\| HAVE_ATOMIC128
1760	#define DATA_SIZE 16
1761	#include "atomic_template.h"
1762	#endif
1763
1764	/ Second set of helpers are directly callable from TCG as helpers. /
1765
1766	#undef EXTRA_ARGS
1767	#undef ATOMIC_NAME
1768	#undef ATOMIC_MMU_LOOKUP
1769	#define EXTRA_ARGS , TCGMemOpIdx oi
1770	#define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1771	#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC(), &ndi)
1772
1773	#define DATA_SIZE 1
1774	#include "atomic_template.h"
1775
1776	#define DATA_SIZE 2
1777	#include "atomic_template.h"
1778
1779	#define DATA_SIZE 4
1780	#include "atomic_template.h"
1781
1782	#ifdef CONFIG_ATOMIC64
1783	#define DATA_SIZE 8
1784	#include "atomic_template.h"
1785	#endif
1786
1787	/ Code access functions. /
1788
1789	static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr,
1790	TCGMemOpIdx oi, uintptr_t retaddr)
1791	{
1792	return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_cmmu);
1793	}
1794
1795	uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr,
1796	TCGMemOpIdx oi, uintptr_t retaddr)
1797	{
1798	return full_ldub_cmmu(env, addr, oi, retaddr);
1799	}
1800
1801	static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr,
1802	TCGMemOpIdx oi, uintptr_t retaddr)
1803	{
1804	return load_helper(env, addr, oi, retaddr, MO_LEUW, true,
1805	full_le_lduw_cmmu);
1806	}
1807
1808	uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr,
1809	TCGMemOpIdx oi, uintptr_t retaddr)
1810	{
1811	return full_le_lduw_cmmu(env, addr, oi, retaddr);
1812	}
1813
1814	static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr,
1815	TCGMemOpIdx oi, uintptr_t retaddr)
1816	{
1817	return load_helper(env, addr, oi, retaddr, MO_BEUW, true,
1818	full_be_lduw_cmmu);
1819	}
1820
1821	uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr,
1822	TCGMemOpIdx oi, uintptr_t retaddr)
1823	{
1824	return full_be_lduw_cmmu(env, addr, oi, retaddr);
1825	}
1826
1827	static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr,
1828	TCGMemOpIdx oi, uintptr_t retaddr)
1829	{
1830	return load_helper(env, addr, oi, retaddr, MO_LEUL, true,
1831	full_le_ldul_cmmu);
1832	}
1833
1834	uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr,
1835	TCGMemOpIdx oi, uintptr_t retaddr)
1836	{
1837	return full_le_ldul_cmmu(env, addr, oi, retaddr);
1838	}
1839
1840	static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr,
1841	TCGMemOpIdx oi, uintptr_t retaddr)
1842	{
1843	return load_helper(env, addr, oi, retaddr, MO_BEUL, true,
1844	full_be_ldul_cmmu);
1845	}
1846
1847	uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr,
1848	TCGMemOpIdx oi, uintptr_t retaddr)
1849	{
1850	return full_be_ldul_cmmu(env, addr, oi, retaddr);
1851	}
1852
1853	uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr,
1854	TCGMemOpIdx oi, uintptr_t retaddr)
1855	{
1856	return load_helper(env, addr, oi, retaddr, MO_LEQ, true,
1857	helper_le_ldq_cmmu);
1858	}
1859
1860	uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
1861	TCGMemOpIdx oi, uintptr_t retaddr)
1862	{
1863	return load_helper(env, addr, oi, retaddr, MO_BEQ, true,
1864	helper_be_ldq_cmmu);
1865	}
1866

Browse the source code of qemu/accel/tcg/cputlb.c