1 | /* |
2 | * Declarations for cpu physical memory functions |
3 | * |
4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates |
5 | * |
6 | * Authors: |
7 | * Avi Kivity <avi@redhat.com> |
8 | * |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or |
10 | * later. See the COPYING file in the top-level directory. |
11 | * |
12 | */ |
13 | |
14 | /* |
15 | * This header is for use by exec.c and memory.c ONLY. Do not include it. |
16 | * The functions declared here will be removed soon. |
17 | */ |
18 | |
19 | #ifndef RAM_ADDR_H |
20 | #define RAM_ADDR_H |
21 | |
22 | #ifndef CONFIG_USER_ONLY |
23 | #include "cpu.h" |
24 | #include "hw/xen/xen.h" |
25 | #include "sysemu/tcg.h" |
26 | #include "exec/ramlist.h" |
27 | |
28 | struct RAMBlock { |
29 | struct rcu_head rcu; |
30 | struct MemoryRegion *mr; |
31 | uint8_t *host; |
32 | uint8_t *colo_cache; /* For colo, VM's ram cache */ |
33 | ram_addr_t offset; |
34 | ram_addr_t used_length; |
35 | ram_addr_t max_length; |
36 | void (*resized)(const char*, uint64_t length, void *host); |
37 | uint32_t flags; |
38 | /* Protected by iothread lock. */ |
39 | char idstr[256]; |
40 | /* RCU-enabled, writes protected by the ramlist lock */ |
41 | QLIST_ENTRY(RAMBlock) next; |
42 | QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; |
43 | int fd; |
44 | size_t page_size; |
45 | /* dirty bitmap used during migration */ |
46 | unsigned long *bmap; |
47 | /* bitmap of pages that haven't been sent even once |
48 | * only maintained and used in postcopy at the moment |
49 | * where it's used to send the dirtymap at the start |
50 | * of the postcopy phase |
51 | */ |
52 | unsigned long *unsentmap; |
53 | /* bitmap of already received pages in postcopy */ |
54 | unsigned long *receivedmap; |
55 | |
56 | /* |
57 | * bitmap to track already cleared dirty bitmap. When the bit is |
58 | * set, it means the corresponding memory chunk needs a log-clear. |
59 | * Set this up to non-NULL to enable the capability to postpone |
60 | * and split clearing of dirty bitmap on the remote node (e.g., |
61 | * KVM). The bitmap will be set only when doing global sync. |
62 | * |
63 | * NOTE: this bitmap is different comparing to the other bitmaps |
64 | * in that one bit can represent multiple guest pages (which is |
65 | * decided by the `clear_bmap_shift' variable below). On |
66 | * destination side, this should always be NULL, and the variable |
67 | * `clear_bmap_shift' is meaningless. |
68 | */ |
69 | unsigned long *clear_bmap; |
70 | uint8_t clear_bmap_shift; |
71 | }; |
72 | |
73 | /** |
74 | * clear_bmap_size: calculate clear bitmap size |
75 | * |
76 | * @pages: number of guest pages |
77 | * @shift: guest page number shift |
78 | * |
79 | * Returns: number of bits for the clear bitmap |
80 | */ |
81 | static inline long clear_bmap_size(uint64_t pages, uint8_t shift) |
82 | { |
83 | return DIV_ROUND_UP(pages, 1UL << shift); |
84 | } |
85 | |
86 | /** |
87 | * clear_bmap_set: set clear bitmap for the page range |
88 | * |
89 | * @rb: the ramblock to operate on |
90 | * @start: the start page number |
91 | * @size: number of pages to set in the bitmap |
92 | * |
93 | * Returns: None |
94 | */ |
95 | static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, |
96 | uint64_t npages) |
97 | { |
98 | uint8_t shift = rb->clear_bmap_shift; |
99 | |
100 | bitmap_set_atomic(rb->clear_bmap, start >> shift, |
101 | clear_bmap_size(npages, shift)); |
102 | } |
103 | |
104 | /** |
105 | * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set |
106 | * |
107 | * @rb: the ramblock to operate on |
108 | * @page: the page number to check |
109 | * |
110 | * Returns: true if the bit was set, false otherwise |
111 | */ |
112 | static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) |
113 | { |
114 | uint8_t shift = rb->clear_bmap_shift; |
115 | |
116 | return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1); |
117 | } |
118 | |
119 | static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) |
120 | { |
121 | return (b && b->host && offset < b->used_length) ? true : false; |
122 | } |
123 | |
124 | static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) |
125 | { |
126 | assert(offset_in_ramblock(block, offset)); |
127 | return (char *)block->host + offset; |
128 | } |
129 | |
130 | static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, |
131 | RAMBlock *rb) |
132 | { |
133 | uint64_t host_addr_offset = |
134 | (uint64_t)(uintptr_t)(host_addr - (void *)rb->host); |
135 | return host_addr_offset >> TARGET_PAGE_BITS; |
136 | } |
137 | |
138 | bool ramblock_is_pmem(RAMBlock *rb); |
139 | |
140 | long qemu_minrampagesize(void); |
141 | long qemu_maxrampagesize(void); |
142 | |
143 | /** |
144 | * qemu_ram_alloc_from_file, |
145 | * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing |
146 | * file or device |
147 | * |
148 | * Parameters: |
149 | * @size: the size in bytes of the ram block |
150 | * @mr: the memory region where the ram block is |
151 | * @ram_flags: specify the properties of the ram block, which can be one |
152 | * or bit-or of following values |
153 | * - RAM_SHARED: mmap the backing file or device with MAP_SHARED |
154 | * - RAM_PMEM: the backend @mem_path or @fd is persistent memory |
155 | * Other bits are ignored. |
156 | * @mem_path or @fd: specify the backing file or device |
157 | * @errp: pointer to Error*, to store an error if it happens |
158 | * |
159 | * Return: |
160 | * On success, return a pointer to the ram block. |
161 | * On failure, return NULL. |
162 | */ |
163 | RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, |
164 | uint32_t ram_flags, const char *mem_path, |
165 | Error **errp); |
166 | RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, |
167 | uint32_t ram_flags, int fd, |
168 | Error **errp); |
169 | |
170 | RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, |
171 | MemoryRegion *mr, Error **errp); |
172 | RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, |
173 | Error **errp); |
174 | RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, |
175 | void (*resized)(const char*, |
176 | uint64_t length, |
177 | void *host), |
178 | MemoryRegion *mr, Error **errp); |
179 | void qemu_ram_free(RAMBlock *block); |
180 | |
181 | int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); |
182 | |
183 | #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) |
184 | #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) |
185 | |
186 | void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end); |
187 | |
188 | static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, |
189 | ram_addr_t length, |
190 | unsigned client) |
191 | { |
192 | DirtyMemoryBlocks *blocks; |
193 | unsigned long end, page; |
194 | unsigned long idx, offset, base; |
195 | bool dirty = false; |
196 | |
197 | assert(client < DIRTY_MEMORY_NUM); |
198 | |
199 | end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; |
200 | page = start >> TARGET_PAGE_BITS; |
201 | |
202 | rcu_read_lock(); |
203 | |
204 | blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); |
205 | |
206 | idx = page / DIRTY_MEMORY_BLOCK_SIZE; |
207 | offset = page % DIRTY_MEMORY_BLOCK_SIZE; |
208 | base = page - offset; |
209 | while (page < end) { |
210 | unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); |
211 | unsigned long num = next - base; |
212 | unsigned long found = find_next_bit(blocks->blocks[idx], num, offset); |
213 | if (found < num) { |
214 | dirty = true; |
215 | break; |
216 | } |
217 | |
218 | page = next; |
219 | idx++; |
220 | offset = 0; |
221 | base += DIRTY_MEMORY_BLOCK_SIZE; |
222 | } |
223 | |
224 | rcu_read_unlock(); |
225 | |
226 | return dirty; |
227 | } |
228 | |
229 | static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, |
230 | ram_addr_t length, |
231 | unsigned client) |
232 | { |
233 | DirtyMemoryBlocks *blocks; |
234 | unsigned long end, page; |
235 | unsigned long idx, offset, base; |
236 | bool dirty = true; |
237 | |
238 | assert(client < DIRTY_MEMORY_NUM); |
239 | |
240 | end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; |
241 | page = start >> TARGET_PAGE_BITS; |
242 | |
243 | rcu_read_lock(); |
244 | |
245 | blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); |
246 | |
247 | idx = page / DIRTY_MEMORY_BLOCK_SIZE; |
248 | offset = page % DIRTY_MEMORY_BLOCK_SIZE; |
249 | base = page - offset; |
250 | while (page < end) { |
251 | unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); |
252 | unsigned long num = next - base; |
253 | unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset); |
254 | if (found < num) { |
255 | dirty = false; |
256 | break; |
257 | } |
258 | |
259 | page = next; |
260 | idx++; |
261 | offset = 0; |
262 | base += DIRTY_MEMORY_BLOCK_SIZE; |
263 | } |
264 | |
265 | rcu_read_unlock(); |
266 | |
267 | return dirty; |
268 | } |
269 | |
270 | static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, |
271 | unsigned client) |
272 | { |
273 | return cpu_physical_memory_get_dirty(addr, 1, client); |
274 | } |
275 | |
276 | static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) |
277 | { |
278 | bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); |
279 | bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); |
280 | bool migration = |
281 | cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); |
282 | return !(vga && code && migration); |
283 | } |
284 | |
285 | static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, |
286 | ram_addr_t length, |
287 | uint8_t mask) |
288 | { |
289 | uint8_t ret = 0; |
290 | |
291 | if (mask & (1 << DIRTY_MEMORY_VGA) && |
292 | !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { |
293 | ret |= (1 << DIRTY_MEMORY_VGA); |
294 | } |
295 | if (mask & (1 << DIRTY_MEMORY_CODE) && |
296 | !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { |
297 | ret |= (1 << DIRTY_MEMORY_CODE); |
298 | } |
299 | if (mask & (1 << DIRTY_MEMORY_MIGRATION) && |
300 | !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { |
301 | ret |= (1 << DIRTY_MEMORY_MIGRATION); |
302 | } |
303 | return ret; |
304 | } |
305 | |
306 | static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, |
307 | unsigned client) |
308 | { |
309 | unsigned long page, idx, offset; |
310 | DirtyMemoryBlocks *blocks; |
311 | |
312 | assert(client < DIRTY_MEMORY_NUM); |
313 | |
314 | page = addr >> TARGET_PAGE_BITS; |
315 | idx = page / DIRTY_MEMORY_BLOCK_SIZE; |
316 | offset = page % DIRTY_MEMORY_BLOCK_SIZE; |
317 | |
318 | rcu_read_lock(); |
319 | |
320 | blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); |
321 | |
322 | set_bit_atomic(offset, blocks->blocks[idx]); |
323 | |
324 | rcu_read_unlock(); |
325 | } |
326 | |
327 | static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, |
328 | ram_addr_t length, |
329 | uint8_t mask) |
330 | { |
331 | DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; |
332 | unsigned long end, page; |
333 | unsigned long idx, offset, base; |
334 | int i; |
335 | |
336 | if (!mask && !xen_enabled()) { |
337 | return; |
338 | } |
339 | |
340 | end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; |
341 | page = start >> TARGET_PAGE_BITS; |
342 | |
343 | rcu_read_lock(); |
344 | |
345 | for (i = 0; i < DIRTY_MEMORY_NUM; i++) { |
346 | blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]); |
347 | } |
348 | |
349 | idx = page / DIRTY_MEMORY_BLOCK_SIZE; |
350 | offset = page % DIRTY_MEMORY_BLOCK_SIZE; |
351 | base = page - offset; |
352 | while (page < end) { |
353 | unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); |
354 | |
355 | if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { |
356 | bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], |
357 | offset, next - page); |
358 | } |
359 | if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { |
360 | bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], |
361 | offset, next - page); |
362 | } |
363 | if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { |
364 | bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], |
365 | offset, next - page); |
366 | } |
367 | |
368 | page = next; |
369 | idx++; |
370 | offset = 0; |
371 | base += DIRTY_MEMORY_BLOCK_SIZE; |
372 | } |
373 | |
374 | rcu_read_unlock(); |
375 | |
376 | xen_hvm_modified_memory(start, length); |
377 | } |
378 | |
379 | #if !defined(_WIN32) |
380 | static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, |
381 | ram_addr_t start, |
382 | ram_addr_t pages) |
383 | { |
384 | unsigned long i, j; |
385 | unsigned long page_number, c; |
386 | hwaddr addr; |
387 | ram_addr_t ram_addr; |
388 | unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; |
389 | unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE; |
390 | unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); |
391 | |
392 | /* start address is aligned at the start of a word? */ |
393 | if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && |
394 | (hpratio == 1)) { |
395 | unsigned long **blocks[DIRTY_MEMORY_NUM]; |
396 | unsigned long idx; |
397 | unsigned long offset; |
398 | long k; |
399 | long nr = BITS_TO_LONGS(pages); |
400 | |
401 | idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; |
402 | offset = BIT_WORD((start >> TARGET_PAGE_BITS) % |
403 | DIRTY_MEMORY_BLOCK_SIZE); |
404 | |
405 | rcu_read_lock(); |
406 | |
407 | for (i = 0; i < DIRTY_MEMORY_NUM; i++) { |
408 | blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks; |
409 | } |
410 | |
411 | for (k = 0; k < nr; k++) { |
412 | if (bitmap[k]) { |
413 | unsigned long temp = leul_to_cpu(bitmap[k]); |
414 | |
415 | atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); |
416 | |
417 | if (global_dirty_log) { |
418 | atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], |
419 | temp); |
420 | } |
421 | |
422 | if (tcg_enabled()) { |
423 | atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp); |
424 | } |
425 | } |
426 | |
427 | if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { |
428 | offset = 0; |
429 | idx++; |
430 | } |
431 | } |
432 | |
433 | rcu_read_unlock(); |
434 | |
435 | xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); |
436 | } else { |
437 | uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; |
438 | |
439 | if (!global_dirty_log) { |
440 | clients &= ~(1 << DIRTY_MEMORY_MIGRATION); |
441 | } |
442 | |
443 | /* |
444 | * bitmap-traveling is faster than memory-traveling (for addr...) |
445 | * especially when most of the memory is not dirty. |
446 | */ |
447 | for (i = 0; i < len; i++) { |
448 | if (bitmap[i] != 0) { |
449 | c = leul_to_cpu(bitmap[i]); |
450 | do { |
451 | j = ctzl(c); |
452 | c &= ~(1ul << j); |
453 | page_number = (i * HOST_LONG_BITS + j) * hpratio; |
454 | addr = page_number * TARGET_PAGE_SIZE; |
455 | ram_addr = start + addr; |
456 | cpu_physical_memory_set_dirty_range(ram_addr, |
457 | TARGET_PAGE_SIZE * hpratio, clients); |
458 | } while (c != 0); |
459 | } |
460 | } |
461 | } |
462 | } |
463 | #endif /* not _WIN32 */ |
464 | |
465 | bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, |
466 | ram_addr_t length, |
467 | unsigned client); |
468 | |
469 | DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty |
470 | (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client); |
471 | |
472 | bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, |
473 | ram_addr_t start, |
474 | ram_addr_t length); |
475 | |
476 | static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, |
477 | ram_addr_t length) |
478 | { |
479 | cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); |
480 | cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); |
481 | cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); |
482 | } |
483 | |
484 | |
485 | /* Called with RCU critical section */ |
486 | static inline |
487 | uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, |
488 | ram_addr_t start, |
489 | ram_addr_t length, |
490 | uint64_t *real_dirty_pages) |
491 | { |
492 | ram_addr_t addr; |
493 | unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); |
494 | uint64_t num_dirty = 0; |
495 | unsigned long *dest = rb->bmap; |
496 | |
497 | /* start address and length is aligned at the start of a word? */ |
498 | if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == |
499 | (start + rb->offset) && |
500 | !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { |
501 | int k; |
502 | int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); |
503 | unsigned long * const *src; |
504 | unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE; |
505 | unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % |
506 | DIRTY_MEMORY_BLOCK_SIZE); |
507 | unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); |
508 | |
509 | src = atomic_rcu_read( |
510 | &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks; |
511 | |
512 | for (k = page; k < page + nr; k++) { |
513 | if (src[idx][offset]) { |
514 | unsigned long bits = atomic_xchg(&src[idx][offset], 0); |
515 | unsigned long new_dirty; |
516 | *real_dirty_pages += ctpopl(bits); |
517 | new_dirty = ~dest[k]; |
518 | dest[k] |= bits; |
519 | new_dirty &= bits; |
520 | num_dirty += ctpopl(new_dirty); |
521 | } |
522 | |
523 | if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { |
524 | offset = 0; |
525 | idx++; |
526 | } |
527 | } |
528 | |
529 | if (rb->clear_bmap) { |
530 | /* |
531 | * Postpone the dirty bitmap clear to the point before we |
532 | * really send the pages, also we will split the clear |
533 | * dirty procedure into smaller chunks. |
534 | */ |
535 | clear_bmap_set(rb, start >> TARGET_PAGE_BITS, |
536 | length >> TARGET_PAGE_BITS); |
537 | } else { |
538 | /* Slow path - still do that in a huge chunk */ |
539 | memory_region_clear_dirty_bitmap(rb->mr, start, length); |
540 | } |
541 | } else { |
542 | ram_addr_t offset = rb->offset; |
543 | |
544 | for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { |
545 | if (cpu_physical_memory_test_and_clear_dirty( |
546 | start + addr + offset, |
547 | TARGET_PAGE_SIZE, |
548 | DIRTY_MEMORY_MIGRATION)) { |
549 | *real_dirty_pages += 1; |
550 | long k = (start + addr) >> TARGET_PAGE_BITS; |
551 | if (!test_and_set_bit(k, dest)) { |
552 | num_dirty++; |
553 | } |
554 | } |
555 | } |
556 | } |
557 | |
558 | return num_dirty; |
559 | } |
560 | #endif |
561 | #endif |
562 | |