1 | /* |
2 | * QEMU Host Memory Backend |
3 | * |
4 | * Copyright (C) 2013-2014 Red Hat Inc |
5 | * |
6 | * Authors: |
7 | * Igor Mammedov <imammedo@redhat.com> |
8 | * |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or later. |
10 | * See the COPYING file in the top-level directory. |
11 | */ |
12 | |
13 | #include "qemu/osdep.h" |
14 | #include "sysemu/hostmem.h" |
15 | #include "sysemu/sysemu.h" |
16 | #include "hw/boards.h" |
17 | #include "qapi/error.h" |
18 | #include "qapi/qapi-builtin-visit.h" |
19 | #include "qapi/visitor.h" |
20 | #include "qemu/config-file.h" |
21 | #include "qom/object_interfaces.h" |
22 | #include "qemu/mmap-alloc.h" |
23 | |
24 | #ifdef CONFIG_NUMA |
25 | #include <numaif.h> |
26 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); |
27 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); |
28 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); |
29 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); |
30 | #endif |
31 | |
32 | char * |
33 | host_memory_backend_get_name(HostMemoryBackend *backend) |
34 | { |
35 | if (!backend->use_canonical_path) { |
36 | return object_get_canonical_path_component(OBJECT(backend)); |
37 | } |
38 | |
39 | return object_get_canonical_path(OBJECT(backend)); |
40 | } |
41 | |
42 | static void |
43 | host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, |
44 | void *opaque, Error **errp) |
45 | { |
46 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
47 | uint64_t value = backend->size; |
48 | |
49 | visit_type_size(v, name, &value, errp); |
50 | } |
51 | |
52 | static void |
53 | host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, |
54 | void *opaque, Error **errp) |
55 | { |
56 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
57 | Error *local_err = NULL; |
58 | uint64_t value; |
59 | |
60 | if (host_memory_backend_mr_inited(backend)) { |
61 | error_setg(&local_err, "cannot change property %s of %s " , |
62 | name, object_get_typename(obj)); |
63 | goto out; |
64 | } |
65 | |
66 | visit_type_size(v, name, &value, &local_err); |
67 | if (local_err) { |
68 | goto out; |
69 | } |
70 | if (!value) { |
71 | error_setg(&local_err, |
72 | "property '%s' of %s doesn't take value '%" PRIu64 "'" , |
73 | name, object_get_typename(obj), value); |
74 | goto out; |
75 | } |
76 | backend->size = value; |
77 | out: |
78 | error_propagate(errp, local_err); |
79 | } |
80 | |
81 | static void |
82 | host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, |
83 | void *opaque, Error **errp) |
84 | { |
85 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
86 | uint16List *host_nodes = NULL; |
87 | uint16List **node = &host_nodes; |
88 | unsigned long value; |
89 | |
90 | value = find_first_bit(backend->host_nodes, MAX_NODES); |
91 | if (value == MAX_NODES) { |
92 | goto ret; |
93 | } |
94 | |
95 | *node = g_malloc0(sizeof(**node)); |
96 | (*node)->value = value; |
97 | node = &(*node)->next; |
98 | |
99 | do { |
100 | value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); |
101 | if (value == MAX_NODES) { |
102 | break; |
103 | } |
104 | |
105 | *node = g_malloc0(sizeof(**node)); |
106 | (*node)->value = value; |
107 | node = &(*node)->next; |
108 | } while (true); |
109 | |
110 | ret: |
111 | visit_type_uint16List(v, name, &host_nodes, errp); |
112 | } |
113 | |
114 | static void |
115 | host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, |
116 | void *opaque, Error **errp) |
117 | { |
118 | #ifdef CONFIG_NUMA |
119 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
120 | uint16List *l, *host_nodes = NULL; |
121 | |
122 | visit_type_uint16List(v, name, &host_nodes, errp); |
123 | |
124 | for (l = host_nodes; l; l = l->next) { |
125 | if (l->value >= MAX_NODES) { |
126 | error_setg(errp, "Invalid host-nodes value: %d" , l->value); |
127 | goto out; |
128 | } |
129 | } |
130 | |
131 | for (l = host_nodes; l; l = l->next) { |
132 | bitmap_set(backend->host_nodes, l->value, 1); |
133 | } |
134 | |
135 | out: |
136 | qapi_free_uint16List(host_nodes); |
137 | #else |
138 | error_setg(errp, "NUMA node binding are not supported by this QEMU" ); |
139 | #endif |
140 | } |
141 | |
142 | static int |
143 | host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) |
144 | { |
145 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
146 | return backend->policy; |
147 | } |
148 | |
149 | static void |
150 | host_memory_backend_set_policy(Object *obj, int policy, Error **errp) |
151 | { |
152 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
153 | backend->policy = policy; |
154 | |
155 | #ifndef CONFIG_NUMA |
156 | if (policy != HOST_MEM_POLICY_DEFAULT) { |
157 | error_setg(errp, "NUMA policies are not supported by this QEMU" ); |
158 | } |
159 | #endif |
160 | } |
161 | |
162 | static bool host_memory_backend_get_merge(Object *obj, Error **errp) |
163 | { |
164 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
165 | |
166 | return backend->merge; |
167 | } |
168 | |
169 | static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) |
170 | { |
171 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
172 | |
173 | if (!host_memory_backend_mr_inited(backend)) { |
174 | backend->merge = value; |
175 | return; |
176 | } |
177 | |
178 | if (value != backend->merge) { |
179 | void *ptr = memory_region_get_ram_ptr(&backend->mr); |
180 | uint64_t sz = memory_region_size(&backend->mr); |
181 | |
182 | qemu_madvise(ptr, sz, |
183 | value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); |
184 | backend->merge = value; |
185 | } |
186 | } |
187 | |
188 | static bool host_memory_backend_get_dump(Object *obj, Error **errp) |
189 | { |
190 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
191 | |
192 | return backend->dump; |
193 | } |
194 | |
195 | static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) |
196 | { |
197 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
198 | |
199 | if (!host_memory_backend_mr_inited(backend)) { |
200 | backend->dump = value; |
201 | return; |
202 | } |
203 | |
204 | if (value != backend->dump) { |
205 | void *ptr = memory_region_get_ram_ptr(&backend->mr); |
206 | uint64_t sz = memory_region_size(&backend->mr); |
207 | |
208 | qemu_madvise(ptr, sz, |
209 | value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); |
210 | backend->dump = value; |
211 | } |
212 | } |
213 | |
214 | static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) |
215 | { |
216 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
217 | |
218 | return backend->prealloc || backend->force_prealloc; |
219 | } |
220 | |
221 | static void host_memory_backend_set_prealloc(Object *obj, bool value, |
222 | Error **errp) |
223 | { |
224 | Error *local_err = NULL; |
225 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
226 | MachineState *ms = MACHINE(qdev_get_machine()); |
227 | |
228 | if (backend->force_prealloc) { |
229 | if (value) { |
230 | error_setg(errp, |
231 | "remove -mem-prealloc to use the prealloc property" ); |
232 | return; |
233 | } |
234 | } |
235 | |
236 | if (!host_memory_backend_mr_inited(backend)) { |
237 | backend->prealloc = value; |
238 | return; |
239 | } |
240 | |
241 | if (value && !backend->prealloc) { |
242 | int fd = memory_region_get_fd(&backend->mr); |
243 | void *ptr = memory_region_get_ram_ptr(&backend->mr); |
244 | uint64_t sz = memory_region_size(&backend->mr); |
245 | |
246 | os_mem_prealloc(fd, ptr, sz, ms->smp.cpus, &local_err); |
247 | if (local_err) { |
248 | error_propagate(errp, local_err); |
249 | return; |
250 | } |
251 | backend->prealloc = true; |
252 | } |
253 | } |
254 | |
255 | static void host_memory_backend_init(Object *obj) |
256 | { |
257 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
258 | MachineState *machine = MACHINE(qdev_get_machine()); |
259 | |
260 | backend->merge = machine_mem_merge(machine); |
261 | backend->dump = machine_dump_guest_core(machine); |
262 | backend->prealloc = mem_prealloc; |
263 | } |
264 | |
265 | static void host_memory_backend_post_init(Object *obj) |
266 | { |
267 | object_apply_compat_props(obj); |
268 | } |
269 | |
270 | bool host_memory_backend_mr_inited(HostMemoryBackend *backend) |
271 | { |
272 | /* |
273 | * NOTE: We forbid zero-length memory backend, so here zero means |
274 | * "we haven't inited the backend memory region yet". |
275 | */ |
276 | return memory_region_size(&backend->mr) != 0; |
277 | } |
278 | |
279 | MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend) |
280 | { |
281 | return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; |
282 | } |
283 | |
284 | void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) |
285 | { |
286 | backend->is_mapped = mapped; |
287 | } |
288 | |
289 | bool host_memory_backend_is_mapped(HostMemoryBackend *backend) |
290 | { |
291 | return backend->is_mapped; |
292 | } |
293 | |
294 | #ifdef __linux__ |
295 | size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) |
296 | { |
297 | Object *obj = OBJECT(memdev); |
298 | char *path = object_property_get_str(obj, "mem-path" , NULL); |
299 | size_t pagesize = qemu_mempath_getpagesize(path); |
300 | |
301 | g_free(path); |
302 | return pagesize; |
303 | } |
304 | #else |
305 | size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) |
306 | { |
307 | return getpagesize(); |
308 | } |
309 | #endif |
310 | |
311 | static void |
312 | host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) |
313 | { |
314 | HostMemoryBackend *backend = MEMORY_BACKEND(uc); |
315 | HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); |
316 | MachineState *ms = MACHINE(qdev_get_machine()); |
317 | Error *local_err = NULL; |
318 | void *ptr; |
319 | uint64_t sz; |
320 | |
321 | if (bc->alloc) { |
322 | bc->alloc(backend, &local_err); |
323 | if (local_err) { |
324 | goto out; |
325 | } |
326 | |
327 | ptr = memory_region_get_ram_ptr(&backend->mr); |
328 | sz = memory_region_size(&backend->mr); |
329 | |
330 | if (backend->merge) { |
331 | qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); |
332 | } |
333 | if (!backend->dump) { |
334 | qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); |
335 | } |
336 | #ifdef CONFIG_NUMA |
337 | unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); |
338 | /* lastbit == MAX_NODES means maxnode = 0 */ |
339 | unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); |
340 | /* ensure policy won't be ignored in case memory is preallocated |
341 | * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so |
342 | * this doesn't catch hugepage case. */ |
343 | unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; |
344 | |
345 | /* check for invalid host-nodes and policies and give more verbose |
346 | * error messages than mbind(). */ |
347 | if (maxnode && backend->policy == MPOL_DEFAULT) { |
348 | error_setg(errp, "host-nodes must be empty for policy default," |
349 | " or you should explicitly specify a policy other" |
350 | " than default" ); |
351 | return; |
352 | } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { |
353 | error_setg(errp, "host-nodes must be set for policy %s" , |
354 | HostMemPolicy_str(backend->policy)); |
355 | return; |
356 | } |
357 | |
358 | /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 |
359 | * as argument to mbind() due to an old Linux bug (feature?) which |
360 | * cuts off the last specified node. This means backend->host_nodes |
361 | * must have MAX_NODES+1 bits available. |
362 | */ |
363 | assert(sizeof(backend->host_nodes) >= |
364 | BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); |
365 | assert(maxnode <= MAX_NODES); |
366 | if (mbind(ptr, sz, backend->policy, |
367 | maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { |
368 | if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { |
369 | error_setg_errno(errp, errno, |
370 | "cannot bind memory to host NUMA nodes" ); |
371 | return; |
372 | } |
373 | } |
374 | #endif |
375 | /* Preallocate memory after the NUMA policy has been instantiated. |
376 | * This is necessary to guarantee memory is allocated with |
377 | * specified NUMA policy in place. |
378 | */ |
379 | if (backend->prealloc) { |
380 | os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, |
381 | ms->smp.cpus, &local_err); |
382 | if (local_err) { |
383 | goto out; |
384 | } |
385 | } |
386 | } |
387 | out: |
388 | error_propagate(errp, local_err); |
389 | } |
390 | |
391 | static bool |
392 | host_memory_backend_can_be_deleted(UserCreatable *uc) |
393 | { |
394 | if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) { |
395 | return false; |
396 | } else { |
397 | return true; |
398 | } |
399 | } |
400 | |
401 | static bool host_memory_backend_get_share(Object *o, Error **errp) |
402 | { |
403 | HostMemoryBackend *backend = MEMORY_BACKEND(o); |
404 | |
405 | return backend->share; |
406 | } |
407 | |
408 | static void host_memory_backend_set_share(Object *o, bool value, Error **errp) |
409 | { |
410 | HostMemoryBackend *backend = MEMORY_BACKEND(o); |
411 | |
412 | if (host_memory_backend_mr_inited(backend)) { |
413 | error_setg(errp, "cannot change property value" ); |
414 | return; |
415 | } |
416 | backend->share = value; |
417 | } |
418 | |
419 | static bool |
420 | host_memory_backend_get_use_canonical_path(Object *obj, Error **errp) |
421 | { |
422 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
423 | |
424 | return backend->use_canonical_path; |
425 | } |
426 | |
427 | static void |
428 | host_memory_backend_set_use_canonical_path(Object *obj, bool value, |
429 | Error **errp) |
430 | { |
431 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
432 | |
433 | backend->use_canonical_path = value; |
434 | } |
435 | |
436 | static void |
437 | host_memory_backend_class_init(ObjectClass *oc, void *data) |
438 | { |
439 | UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); |
440 | |
441 | ucc->complete = host_memory_backend_memory_complete; |
442 | ucc->can_be_deleted = host_memory_backend_can_be_deleted; |
443 | |
444 | object_class_property_add_bool(oc, "merge" , |
445 | host_memory_backend_get_merge, |
446 | host_memory_backend_set_merge, &error_abort); |
447 | object_class_property_set_description(oc, "merge" , |
448 | "Mark memory as mergeable" , &error_abort); |
449 | object_class_property_add_bool(oc, "dump" , |
450 | host_memory_backend_get_dump, |
451 | host_memory_backend_set_dump, &error_abort); |
452 | object_class_property_set_description(oc, "dump" , |
453 | "Set to 'off' to exclude from core dump" , &error_abort); |
454 | object_class_property_add_bool(oc, "prealloc" , |
455 | host_memory_backend_get_prealloc, |
456 | host_memory_backend_set_prealloc, &error_abort); |
457 | object_class_property_set_description(oc, "prealloc" , |
458 | "Preallocate memory" , &error_abort); |
459 | object_class_property_add(oc, "size" , "int" , |
460 | host_memory_backend_get_size, |
461 | host_memory_backend_set_size, |
462 | NULL, NULL, &error_abort); |
463 | object_class_property_set_description(oc, "size" , |
464 | "Size of the memory region (ex: 500M)" , &error_abort); |
465 | object_class_property_add(oc, "host-nodes" , "int" , |
466 | host_memory_backend_get_host_nodes, |
467 | host_memory_backend_set_host_nodes, |
468 | NULL, NULL, &error_abort); |
469 | object_class_property_set_description(oc, "host-nodes" , |
470 | "Binds memory to the list of NUMA host nodes" , &error_abort); |
471 | object_class_property_add_enum(oc, "policy" , "HostMemPolicy" , |
472 | &HostMemPolicy_lookup, |
473 | host_memory_backend_get_policy, |
474 | host_memory_backend_set_policy, &error_abort); |
475 | object_class_property_set_description(oc, "policy" , |
476 | "Set the NUMA policy" , &error_abort); |
477 | object_class_property_add_bool(oc, "share" , |
478 | host_memory_backend_get_share, host_memory_backend_set_share, |
479 | &error_abort); |
480 | object_class_property_set_description(oc, "share" , |
481 | "Mark the memory as private to QEMU or shared" , &error_abort); |
482 | object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id" , |
483 | host_memory_backend_get_use_canonical_path, |
484 | host_memory_backend_set_use_canonical_path, &error_abort); |
485 | } |
486 | |
487 | static const TypeInfo host_memory_backend_info = { |
488 | .name = TYPE_MEMORY_BACKEND, |
489 | .parent = TYPE_OBJECT, |
490 | .abstract = true, |
491 | .class_size = sizeof(HostMemoryBackendClass), |
492 | .class_init = host_memory_backend_class_init, |
493 | .instance_size = sizeof(HostMemoryBackend), |
494 | .instance_init = host_memory_backend_init, |
495 | .instance_post_init = host_memory_backend_post_init, |
496 | .interfaces = (InterfaceInfo[]) { |
497 | { TYPE_USER_CREATABLE }, |
498 | { } |
499 | } |
500 | }; |
501 | |
502 | static void register_types(void) |
503 | { |
504 | type_register_static(&host_memory_backend_info); |
505 | } |
506 | |
507 | type_init(register_types); |
508 | |