block-backend.c source code [qemu/block/block-backend.c]

1	/*
2	* QEMU Block backends
3	*
4	* Copyright (C) 2014-2016 Red Hat, Inc.
5	*
6	* Authors:
7	* Markus Armbruster <armbru@redhat.com>,
8	*
9	* This work is licensed under the terms of the GNU LGPL, version 2.1
10	* or later. See the COPYING.LIB file in the top-level directory.
11	*/
12
13	#include "qemu/osdep.h"
14	#include "sysemu/block-backend.h"
15	#include "block/block_int.h"
16	#include "block/blockjob.h"
17	#include "block/throttle-groups.h"
18	#include "hw/qdev-core.h"
19	#include "sysemu/blockdev.h"
20	#include "sysemu/runstate.h"
21	#include "qapi/error.h"
22	#include "qapi/qapi-events-block.h"
23	#include "qemu/id.h"
24	#include "qemu/main-loop.h"
25	#include "qemu/option.h"
26	#include "trace.h"
27	#include "migration/misc.h"
28
29	/ Number of coroutines to reserve per attached device model /
30	#define COROUTINE_POOL_RESERVATION 64
31
32	#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
33
34	static AioContext blk_aiocb_get_aio_context(BlockAIOCB acb);
35
36	typedef struct BlockBackendAioNotifier {
37	void (attached_aio_context)(AioContext new_context, void *opaque);
38	void (detach_aio_context)(void* *opaque);
39	void *opaque;
40	QLIST_ENTRY(BlockBackendAioNotifier) list;
41	} BlockBackendAioNotifier;
42
43	struct BlockBackend {
44	char *name;
45	int refcnt;
46	BdrvChild *root;
47	AioContext *ctx;
48	DriveInfo legacy_dinfo; /* null unless created by drive_new() /
49	QTAILQ_ENTRY(BlockBackend) link; / for block_backends /
50	QTAILQ_ENTRY(BlockBackend) monitor_link; / for monitor_block_backends /
51	BlockBackendPublic public;
52
53	DeviceState dev; /* attached device model, if any /
54	const BlockDevOps *dev_ops;
55	void *dev_opaque;
56
57	/ the block size for which the guest device expects atomicity /
58	int guest_block_size;
59
60	/ If the BDS tree is removed, some of its options are stored here (which*
61	* can be used to restore those options in the new BDS on insert) */
62	BlockBackendRootState root_state;
63
64	bool enable_write_cache;
65
66	/ I/O stats (display with "info blockstats"). /
67	BlockAcctStats stats;
68
69	BlockdevOnError on_read_error, on_write_error;
70	bool iostatus_enabled;
71	BlockDeviceIoStatus iostatus;
72
73	uint64_t perm;
74	uint64_t shared_perm;
75	bool disable_perm;
76
77	bool allow_aio_context_change;
78	bool allow_write_beyond_eof;
79
80	NotifierList remove_bs_notifiers, insert_bs_notifiers;
81	QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
82
83	int quiesce_counter;
84	CoQueue queued_requests;
85	bool disable_request_queuing;
86
87	VMChangeStateEntry *vmsh;
88	bool force_allow_inactivate;
89
90	/ Number of in-flight aio requests. BlockDriverState also counts*
91	* in-flight requests but aio requests can exist even when blk->root is
92	* NULL, so we cannot rely on its counter for that case.
93	* Accessed with atomic ops.
94	*/
95	unsigned int in_flight;
96	};
97
98	typedef struct BlockBackendAIOCB {
99	BlockAIOCB common;
100	BlockBackend *blk;
101	int ret;
102	} BlockBackendAIOCB;
103
104	static const AIOCBInfo block_backend_aiocb_info = {
105	.get_aio_context = blk_aiocb_get_aio_context,
106	.aiocb_size = sizeof(BlockBackendAIOCB),
107	};
108
109	static void drive_info_del(DriveInfo *dinfo);
110	static BlockBackend bdrv_first_blk(BlockDriverState bs);
111
112	/ All BlockBackends /
113	static QTAILQ_HEAD(, BlockBackend) block_backends =
114	QTAILQ_HEAD_INITIALIZER(block_backends);
115
116	/ All BlockBackends referenced by the monitor and which are iterated through by*
117	* blk_next() */
118	static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
119	QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
120
121	static void blk_root_inherit_options(int child_flags, QDict child_options,
122	int parent_flags, QDict *parent_options)
123	{
124	/ We're not supposed to call this function for root nodes /
125	abort();
126	}
127	static void blk_root_drained_begin(BdrvChild *child);
128	static bool blk_root_drained_poll(BdrvChild *child);
129	static void blk_root_drained_end(BdrvChild child, int* *drained_end_counter);
130
131	static void blk_root_change_media(BdrvChild *child, bool load);
132	static void blk_root_resize(BdrvChild *child);
133
134	static bool blk_root_can_set_aio_ctx(BdrvChild child, AioContext ctx,
135	GSList ignore, Error errp);
136	static void blk_root_set_aio_ctx(BdrvChild child, AioContext ctx,
137	GSList **ignore);
138
139	static char blk_root_get_parent_desc(BdrvChild child)
140	{
141	BlockBackend *blk = child->opaque;
142	char *dev_id;
143
144	if (blk->name) {
145	return g_strdup(blk->name);
146	}
147
148	dev_id = blk_get_attached_dev_id(blk);
149	if (*dev_id) {
150	return dev_id;
151	} else {
152	/ TODO Callback into the BB owner for something more detailed /
153	g_free(dev_id);
154	return g_strdup("a block device");
155	}
156	}
157
158	static const char blk_root_get_name(BdrvChild child)
159	{
160	return blk_name(child->opaque);
161	}
162
163	static void blk_vm_state_changed(void opaque, int* running, RunState state)
164	{
165	Error *local_err = NULL;
166	BlockBackend *blk = opaque;
167
168	if (state == RUN_STATE_INMIGRATE) {
169	return;
170	}
171
172	qemu_del_vm_change_state_handler(blk->vmsh);
173	blk->vmsh = NULL;
174	blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
175	if (local_err) {
176	error_report_err(local_err);
177	}
178	}
179
180	/*
181	* Notifies the user of the BlockBackend that migration has completed. qdev
182	* devices can tighten their permissions in response (specifically revoke
183	* shared write permissions that we needed for storage migration).
184	*
185	* If an error is returned, the VM cannot be allowed to be resumed.
186	*/
187	static void blk_root_activate(BdrvChild child, Error *errp)
188	{
189	BlockBackend *blk = child->opaque;
190	Error *local_err = NULL;
191
192	if (!blk->disable_perm) {
193	return;
194	}
195
196	blk->disable_perm = false;
197
198	blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
199	if (local_err) {
200	error_propagate(errp, local_err);
201	blk->disable_perm = true;
202	return;
203	}
204
205	if (runstate_check(RUN_STATE_INMIGRATE)) {
206	/ Activation can happen when migration process is still active, for*
207	* example when nbd_server_add is called during non-shared storage
208	* migration. Defer the shared_perm update to migration completion. */
209	if (!blk->vmsh) {
210	blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed,
211	blk);
212	}
213	return;
214	}
215
216	blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
217	if (local_err) {
218	error_propagate(errp, local_err);
219	blk->disable_perm = true;
220	return;
221	}
222	}
223
224	void blk_set_force_allow_inactivate(BlockBackend *blk)
225	{
226	blk->force_allow_inactivate = true;
227	}
228
229	static bool blk_can_inactivate(BlockBackend *blk)
230	{
231	/ If it is a guest device, inactivate is ok. /
232	if (blk->dev \|\| blk_name(blk)[`0`]) {
233	return true;
234	}
235
236	/ Inactivating means no more writes to the image can be done,*
237	* even if those writes would be changes invisible to the
238	* guest. For block job BBs that satisfy this, we can just allow
239	* it. This is the case for mirror job source, which is required
240	* by libvirt non-shared block migration. */
241	if (!(blk->perm & (BLK_PERM_WRITE \| BLK_PERM_WRITE_UNCHANGED))) {
242	return true;
243	}
244
245	return blk->force_allow_inactivate;
246	}
247
248	static int blk_root_inactivate(BdrvChild *child)
249	{
250	BlockBackend *blk = child->opaque;
251
252	if (blk->disable_perm) {
253	return `0`;
254	}
255
256	if (!blk_can_inactivate(blk)) {
257	return -EPERM;
258	}
259
260	blk->disable_perm = true;
261	if (blk->root) {
262	bdrv_child_try_set_perm(blk->root, `0`, BLK_PERM_ALL, &error_abort);
263	}
264
265	return `0`;
266	}
267
268	static void blk_root_attach(BdrvChild *child)
269	{
270	BlockBackend *blk = child->opaque;
271	BlockBackendAioNotifier *notifier;
272
273	trace_blk_root_attach(child, blk, child->bs);
274
275	QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
276	bdrv_add_aio_context_notifier(child->bs,
277	notifier->attached_aio_context,
278	notifier->detach_aio_context,
279	notifier->opaque);
280	}
281	}
282
283	static void blk_root_detach(BdrvChild *child)
284	{
285	BlockBackend *blk = child->opaque;
286	BlockBackendAioNotifier *notifier;
287
288	trace_blk_root_detach(child, blk, child->bs);
289
290	QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
291	bdrv_remove_aio_context_notifier(child->bs,
292	notifier->attached_aio_context,
293	notifier->detach_aio_context,
294	notifier->opaque);
295	}
296	}
297
298	static const BdrvChildRole child_root = {
299	.inherit_options = blk_root_inherit_options,
300
301	.change_media = blk_root_change_media,
302	.resize = blk_root_resize,
303	.get_name = blk_root_get_name,
304	.get_parent_desc = blk_root_get_parent_desc,
305
306	.drained_begin = blk_root_drained_begin,
307	.drained_poll = blk_root_drained_poll,
308	.drained_end = blk_root_drained_end,
309
310	.activate = blk_root_activate,
311	.inactivate = blk_root_inactivate,
312
313	.attach = blk_root_attach,
314	.detach = blk_root_detach,
315
316	.can_set_aio_ctx = blk_root_can_set_aio_ctx,
317	.set_aio_ctx = blk_root_set_aio_ctx,
318	};
319
320	/*
321	* Create a new BlockBackend with a reference count of one.
322	*
323	* @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
324	* to request for a block driver node that is attached to this BlockBackend.
325	* @shared_perm is a bitmask which describes which permissions may be granted
326	* to other users of the attached node.
327	* Both sets of permissions can be changed later using blk_set_perm().
328	*
329	* Return the new BlockBackend on success, null on failure.
330	*/
331	BlockBackend blk_new(AioContext ctx, uint64_t perm, uint64_t shared_perm)
332	{
333	BlockBackend *blk;
334
335	blk = g_new0(BlockBackend, `1`);
336	blk->refcnt = `1`;
337	blk->ctx = ctx;
338	blk->perm = perm;
339	blk->shared_perm = shared_perm;
340	blk_set_enable_write_cache(blk, true);
341
342	blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT;
343	blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
344
345	block_acct_init(&blk->stats);
346
347	qemu_co_queue_init(&blk->queued_requests);
348	notifier_list_init(&blk->remove_bs_notifiers);
349	notifier_list_init(&blk->insert_bs_notifiers);
350	QLIST_INIT(&blk->aio_notifiers);
351
352	QTAILQ_INSERT_TAIL(&block_backends, blk, link);
353	return blk;
354	}
355
356	/*
357	* Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
358	* The new BlockBackend is in the main AioContext.
359	*
360	* Just as with bdrv_open(), after having called this function the reference to
361	* @options belongs to the block layer (even on failure).
362	*
363	* TODO: Remove @filename and @flags; it should be possible to specify a whole
364	* BDS tree just by specifying the @options QDict (or @reference,
365	* alternatively). At the time of adding this function, this is not possible,
366	* though, so callers of this function have to be able to specify @filename and
367	* @flags.
368	*/
369	BlockBackend blk_new_open(const* char filename, const* char *reference,
370	QDict options, int* flags, Error **errp)
371	{
372	BlockBackend *blk;
373	BlockDriverState *bs;
374	uint64_t perm = `0`;
375
376	/ blk_new_open() is mainly used in .bdrv_create implementations and the*
377	* tools where sharing isn't a concern because the BDS stays private, so we
378	* just request permission according to the flags.
379	*
380	* The exceptions are xen_disk and blockdev_init(); in these cases, the
381	* caller of blk_new_open() doesn't make use of the permissions, but they
382	* shouldn't hurt either. We can still share everything here because the
383	* guest devices will add their own blockers if they can't share. */
384	if ((flags & BDRV_O_NO_IO) == `0`) {
385	perm \|= BLK_PERM_CONSISTENT_READ;
386	if (flags & BDRV_O_RDWR) {
387	perm \|= BLK_PERM_WRITE;
388	}
389	}
390	if (flags & BDRV_O_RESIZE) {
391	perm \|= BLK_PERM_RESIZE;
392	}
393
394	blk = blk_new(qemu_get_aio_context(), perm, BLK_PERM_ALL);
395	bs = bdrv_open(filename, reference, options, flags, errp);
396	if (!bs) {
397	blk_unref(blk);
398	return NULL;
399	}
400
401	blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx,
402	perm, BLK_PERM_ALL, blk, errp);
403	if (!blk->root) {
404	blk_unref(blk);
405	return NULL;
406	}
407
408	return blk;
409	}
410
411	static void blk_delete(BlockBackend *blk)
412	{
413	assert(!blk->refcnt);
414	assert(!blk->name);
415	assert(!blk->dev);
416	if (blk->public.throttle_group_member.throttle_state) {
417	blk_io_limits_disable(blk);
418	}
419	if (blk->root) {
420	blk_remove_bs(blk);
421	}
422	if (blk->vmsh) {
423	qemu_del_vm_change_state_handler(blk->vmsh);
424	blk->vmsh = NULL;
425	}
426	assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
427	assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
428	assert(QLIST_EMPTY(&blk->aio_notifiers));
429	QTAILQ_REMOVE(&block_backends, blk, link);
430	drive_info_del(blk->legacy_dinfo);
431	block_acct_cleanup(&blk->stats);
432	g_free(blk);
433	}
434
435	static void drive_info_del(DriveInfo *dinfo)
436	{
437	if (!dinfo) {
438	return;
439	}
440	qemu_opts_del(dinfo->opts);
441	g_free(dinfo);
442	}
443
444	int blk_get_refcnt(BlockBackend *blk)
445	{
446	return blk ? blk->refcnt : `0`;
447	}
448
449	/*
450	* Increment @blk's reference count.
451	* @blk must not be null.
452	*/
453	void blk_ref(BlockBackend *blk)
454	{
455	assert(blk->refcnt > `0`);
456	blk->refcnt++;
457	}
458
459	/*
460	* Decrement @blk's reference count.
461	* If this drops it to zero, destroy @blk.
462	* For convenience, do nothing if @blk is null.
463	*/
464	void blk_unref(BlockBackend *blk)
465	{
466	if (blk) {
467	assert(blk->refcnt > `0`);
468	if (blk->refcnt > `1`) {
469	blk->refcnt--;
470	} else {
471	blk_drain(blk);
472	/ blk_drain() cannot resurrect blk, nobody held a reference /
473	assert(blk->refcnt == `1`);
474	blk->refcnt = `0`;
475	blk_delete(blk);
476	}
477	}
478	}
479
480	/*
481	* Behaves similarly to blk_next() but iterates over all BlockBackends, even the
482	* ones which are hidden (i.e. are not referenced by the monitor).
483	*/
484	BlockBackend blk_all_next(BlockBackend blk)
485	{
486	return blk ? QTAILQ_NEXT(blk, link)
487	: QTAILQ_FIRST(&block_backends);
488	}
489
490	void blk_remove_all_bs(void)
491	{
492	BlockBackend *blk = NULL;
493
494	while ((blk = blk_all_next(blk)) != NULL) {
495	AioContext *ctx = blk_get_aio_context(blk);
496
497	aio_context_acquire(ctx);
498	if (blk->root) {
499	blk_remove_bs(blk);
500	}
501	aio_context_release(ctx);
502	}
503	}
504
505	/*
506	* Return the monitor-owned BlockBackend after @blk.
507	* If @blk is null, return the first one.
508	* Else, return @blk's next sibling, which may be null.
509	*
510	* To iterate over all BlockBackends, do
511	* for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
512	* ...
513	* }
514	*/
515	BlockBackend blk_next(BlockBackend blk)
516	{
517	return blk ? QTAILQ_NEXT(blk, monitor_link)
518	: QTAILQ_FIRST(&monitor_block_backends);
519	}
520
521	/ Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by*
522	* the monitor or attached to a BlockBackend */
523	BlockDriverState bdrv_next(BdrvNextIterator it)
524	{
525	BlockDriverState bs, old_bs;
526
527	/ Must be called from the main loop /
528	assert(qemu_get_current_aio_context() == qemu_get_aio_context());
529
530	/ First, return all root nodes of BlockBackends. In order to avoid*
531	* returning a BDS twice when multiple BBs refer to it, we only return it
532	* if the BB is the first one in the parent list of the BDS. */
533	if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
534	BlockBackend *old_blk = it->blk;
535
536	old_bs = old_blk ? blk_bs(old_blk) : NULL;
537
538	do {
539	it->blk = blk_all_next(it->blk);
540	bs = it->blk ? blk_bs(it->blk) : NULL;
541	} while (it->blk && (bs == NULL \|\| bdrv_first_blk(bs) != it->blk));
542
543	if (it->blk) {
544	blk_ref(it->blk);
545	}
546	blk_unref(old_blk);
547
548	if (bs) {
549	bdrv_ref(bs);
550	bdrv_unref(old_bs);
551	return bs;
552	}
553	it->phase = BDRV_NEXT_MONITOR_OWNED;
554	} else {
555	old_bs = it->bs;
556	}
557
558	/ Then return the monitor-owned BDSes without a BB attached. Ignore all*
559	* BDSes that are attached to a BlockBackend here; they have been handled
560	* by the above block already */
561	do {
562	it->bs = bdrv_next_monitor_owned(it->bs);
563	bs = it->bs;
564	} while (bs && bdrv_has_blk(bs));
565
566	if (bs) {
567	bdrv_ref(bs);
568	}
569	bdrv_unref(old_bs);
570
571	return bs;
572	}
573
574	static void bdrv_next_reset(BdrvNextIterator *it)
575	{
576	*it = (BdrvNextIterator) {
577	.phase = BDRV_NEXT_BACKEND_ROOTS,
578	};
579	}
580
581	BlockDriverState bdrv_first(BdrvNextIterator it)
582	{
583	bdrv_next_reset(it);
584	return bdrv_next(it);
585	}
586
587	/ Must be called when aborting a bdrv_next() iteration before*
588	* bdrv_next() returns NULL */
589	void bdrv_next_cleanup(BdrvNextIterator *it)
590	{
591	/ Must be called from the main loop /
592	assert(qemu_get_current_aio_context() == qemu_get_aio_context());
593
594	if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
595	if (it->blk) {
596	bdrv_unref(blk_bs(it->blk));
597	blk_unref(it->blk);
598	}
599	} else {
600	bdrv_unref(it->bs);
601	}
602
603	bdrv_next_reset(it);
604	}
605
606	/*
607	* Add a BlockBackend into the list of backends referenced by the monitor, with
608	* the given @name acting as the handle for the monitor.
609	* Strictly for use by blockdev.c.
610	*
611	* @name must not be null or empty.
612	*
613	* Returns true on success and false on failure. In the latter case, an Error
614	* object is returned through @errp.
615	*/
616	bool monitor_add_blk(BlockBackend blk, const* char name, Error *errp)
617	{
618	assert(!blk->name);
619	assert(name && name[`0`]);
620
621	if (!id_wellformed(name)) {
622	error_setg(errp, "Invalid device name");
623	return false;
624	}
625	if (blk_by_name(name)) {
626	error_setg(errp, "Device with id '%s' already exists", name);
627	return false;
628	}
629	if (bdrv_find_node(name)) {
630	error_setg(errp,
631	"Device name '%s' conflicts with an existing node name",
632	name);
633	return false;
634	}
635
636	blk->name = g_strdup(name);
637	QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
638	return true;
639	}
640
641	/*
642	* Remove a BlockBackend from the list of backends referenced by the monitor.
643	* Strictly for use by blockdev.c.
644	*/
645	void monitor_remove_blk(BlockBackend *blk)
646	{
647	if (!blk->name) {
648	return;
649	}
650
651	QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
652	g_free(blk->name);
653	blk->name = NULL;
654	}
655
656	/*
657	* Return @blk's name, a non-null string.
658	* Returns an empty string iff @blk is not referenced by the monitor.
659	*/
660	const char blk_name(const* BlockBackend *blk)
661	{
662	return blk->name ?: "";
663	}
664
665	/*
666	* Return the BlockBackend with name @name if it exists, else null.
667	* @name must not be null.
668	*/
669	BlockBackend blk_by_name(const* char *name)
670	{
671	BlockBackend *blk = NULL;
672
673	assert(name);
674	while ((blk = blk_next(blk)) != NULL) {
675	if (!strcmp(name, blk->name)) {
676	return blk;
677	}
678	}
679	return NULL;
680	}
681
682	/*
683	* Return the BlockDriverState attached to @blk if any, else null.
684	*/
685	BlockDriverState blk_bs(BlockBackend blk)
686	{
687	return blk->root ? blk->root->bs : NULL;
688	}
689
690	static BlockBackend bdrv_first_blk(BlockDriverState bs)
691	{
692	BdrvChild *child;
693	QLIST_FOREACH(child, &bs->parents, next_parent) {
694	if (child->role == &child_root) {
695	return child->opaque;
696	}
697	}
698
699	return NULL;
700	}
701
702	/*
703	* Returns true if @bs has an associated BlockBackend.
704	*/
705	bool bdrv_has_blk(BlockDriverState *bs)
706	{
707	return bdrv_first_blk(bs) != NULL;
708	}
709
710	/*
711	* Returns true if @bs has only BlockBackends as parents.
712	*/
713	bool bdrv_is_root_node(BlockDriverState *bs)
714	{
715	BdrvChild *c;
716
717	QLIST_FOREACH(c, &bs->parents, next_parent) {
718	if (c->role != &child_root) {
719	return false;
720	}
721	}
722
723	return true;
724	}
725
726	/*
727	* Return @blk's DriveInfo if any, else null.
728	*/
729	DriveInfo blk_legacy_dinfo(BlockBackend blk)
730	{
731	return blk->legacy_dinfo;
732	}
733
734	/*
735	* Set @blk's DriveInfo to @dinfo, and return it.
736	* @blk must not have a DriveInfo set already.
737	* No other BlockBackend may have the same DriveInfo set.
738	*/
739	DriveInfo blk_set_legacy_dinfo(BlockBackend blk, DriveInfo *dinfo)
740	{
741	assert(!blk->legacy_dinfo);
742	return blk->legacy_dinfo = dinfo;
743	}
744
745	/*
746	* Return the BlockBackend with DriveInfo @dinfo.
747	* It must exist.
748	*/
749	BlockBackend blk_by_legacy_dinfo(DriveInfo dinfo)
750	{
751	BlockBackend *blk = NULL;
752
753	while ((blk = blk_next(blk)) != NULL) {
754	if (blk->legacy_dinfo == dinfo) {
755	return blk;
756	}
757	}
758	abort();
759	}
760
761	/*
762	* Returns a pointer to the publicly accessible fields of @blk.
763	*/
764	BlockBackendPublic blk_get_public(BlockBackend blk)
765	{
766	return &blk->public;
767	}
768
769	/*
770	* Returns a BlockBackend given the associated @public fields.
771	*/
772	BlockBackend blk_by_public(BlockBackendPublic public)
773	{
774	return container_of(public, BlockBackend, public);
775	}
776
777	/*
778	* Disassociates the currently associated BlockDriverState from @blk.
779	*/
780	void blk_remove_bs(BlockBackend *blk)
781	{
782	ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
783	BlockDriverState *bs;
784
785	notifier_list_notify(&blk->remove_bs_notifiers, blk);
786	if (tgm->throttle_state) {
787	bs = blk_bs(blk);
788	bdrv_drained_begin(bs);
789	throttle_group_detach_aio_context(tgm);
790	throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
791	bdrv_drained_end(bs);
792	}
793
794	blk_update_root_state(blk);
795
796	/ bdrv_root_unref_child() will cause blk->root to become stale and may*
797	* switch to a completion coroutine later on. Let's drain all I/O here
798	* to avoid that and a potential QEMU crash.
799	*/
800	blk_drain(blk);
801	bdrv_root_unref_child(blk->root);
802	blk->root = NULL;
803	}
804
805	/*
806	* Associates a new BlockDriverState with @blk.
807	*/
808	int blk_insert_bs(BlockBackend blk, BlockDriverState bs, Error **errp)
809	{
810	ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
811	bdrv_ref(bs);
812	blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx,
813	blk->perm, blk->shared_perm, blk, errp);
814	if (blk->root == NULL) {
815	return -EPERM;
816	}
817
818	notifier_list_notify(&blk->insert_bs_notifiers, blk);
819	if (tgm->throttle_state) {
820	throttle_group_detach_aio_context(tgm);
821	throttle_group_attach_aio_context(tgm, bdrv_get_aio_context(bs));
822	}
823
824	return `0`;
825	}
826
827	/*
828	* Sets the permission bitmasks that the user of the BlockBackend needs.
829	*/
830	int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
831	Error **errp)
832	{
833	int ret;
834
835	if (blk->root && !blk->disable_perm) {
836	ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
837	if (ret < `0`) {
838	return ret;
839	}
840	}
841
842	blk->perm = perm;
843	blk->shared_perm = shared_perm;
844
845	return `0`;
846	}
847
848	void blk_get_perm(BlockBackend blk, uint64_t perm, uint64_t *shared_perm)
849	{
850	*perm = blk->perm;
851	*shared_perm = blk->shared_perm;
852	}
853
854	/*
855	* Attach device model @dev to @blk.
856	* Return 0 on success, -EBUSY when a device model is attached already.
857	*/
858	int blk_attach_dev(BlockBackend blk, DeviceState dev)
859	{
860	if (blk->dev) {
861	return -EBUSY;
862	}
863
864	/ While migration is still incoming, we don't need to apply the*
865	* permissions of guest device BlockBackends. We might still have a block
866	* job or NBD server writing to the image for storage migration. */
867	if (runstate_check(RUN_STATE_INMIGRATE)) {
868	blk->disable_perm = true;
869	}
870
871	blk_ref(blk);
872	blk->dev = dev;
873	blk_iostatus_reset(blk);
874
875	return `0`;
876	}
877
878	/*
879	* Detach device model @dev from @blk.
880	* @dev must be currently attached to @blk.
881	*/
882	void blk_detach_dev(BlockBackend blk, DeviceState dev)
883	{
884	assert(blk->dev == dev);
885	blk->dev = NULL;
886	blk->dev_ops = NULL;
887	blk->dev_opaque = NULL;
888	blk->guest_block_size = `512`;
889	blk_set_perm(blk, `0`, BLK_PERM_ALL, &error_abort);
890	blk_unref(blk);
891	}
892
893	/*
894	* Return the device model attached to @blk if any, else null.
895	*/
896	DeviceState blk_get_attached_dev(BlockBackend blk)
897	{
898	return blk->dev;
899	}
900
901	/ Return the qdev ID, or if no ID is assigned the QOM path, of the block*
902	* device attached to the BlockBackend. */
903	char blk_get_attached_dev_id(BlockBackend blk)
904	{
905	DeviceState *dev = blk->dev;
906
907	if (!dev) {
908	return g_strdup("");
909	} else if (dev->id) {
910	return g_strdup(dev->id);
911	}
912
913	return object_get_canonical_path(OBJECT(dev)) ?: g_strdup("");
914	}
915
916	/*
917	* Return the BlockBackend which has the device model @dev attached if it
918	* exists, else null.
919	*
920	* @dev must not be null.
921	*/
922	BlockBackend blk_by_dev(void* *dev)
923	{
924	BlockBackend *blk = NULL;
925
926	assert(dev != NULL);
927	while ((blk = blk_all_next(blk)) != NULL) {
928	if (blk->dev == dev) {
929	return blk;
930	}
931	}
932	return NULL;
933	}
934
935	/*
936	* Set @blk's device model callbacks to @ops.
937	* @opaque is the opaque argument to pass to the callbacks.
938	* This is for use by device models.
939	*/
940	void blk_set_dev_ops(BlockBackend blk, const* BlockDevOps *ops,
941	void *opaque)
942	{
943	blk->dev_ops = ops;
944	blk->dev_opaque = opaque;
945
946	/ Are we currently quiesced? Should we enforce this right now? /
947	if (blk->quiesce_counter && ops->drained_begin) {
948	ops->drained_begin(opaque);
949	}
950	}
951
952	/*
953	* Notify @blk's attached device model of media change.
954	*
955	* If @load is true, notify of media load. This action can fail, meaning that
956	* the medium cannot be loaded. @errp is set then.
957	*
958	* If @load is false, notify of media eject. This can never fail.
959	*
960	* Also send DEVICE_TRAY_MOVED events as appropriate.
961	*/
962	void blk_dev_change_media_cb(BlockBackend blk, bool load, Error *errp)
963	{
964	if (blk->dev_ops && blk->dev_ops->change_media_cb) {
965	bool tray_was_open, tray_is_open;
966	Error *local_err = NULL;
967
968	tray_was_open = blk_dev_is_tray_open(blk);
969	blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
970	if (local_err) {
971	assert(load == true);
972	error_propagate(errp, local_err);
973	return;
974	}
975	tray_is_open = blk_dev_is_tray_open(blk);
976
977	if (tray_was_open != tray_is_open) {
978	char *id = blk_get_attached_dev_id(blk);
979	qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open);
980	g_free(id);
981	}
982	}
983	}
984
985	static void blk_root_change_media(BdrvChild *child, bool load)
986	{
987	blk_dev_change_media_cb(child->opaque, load, NULL);
988	}
989
990	/*
991	* Does @blk's attached device model have removable media?
992	* %true if no device model is attached.
993	*/
994	bool blk_dev_has_removable_media(BlockBackend *blk)
995	{
996	return !blk->dev \|\| (blk->dev_ops && blk->dev_ops->change_media_cb);
997	}
998
999	/*
1000	* Does @blk's attached device model have a tray?
1001	*/
1002	bool blk_dev_has_tray(BlockBackend *blk)
1003	{
1004	return blk->dev_ops && blk->dev_ops->is_tray_open;
1005	}
1006
1007	/*
1008	* Notify @blk's attached device model of a media eject request.
1009	* If @force is true, the medium is about to be yanked out forcefully.
1010	*/
1011	void blk_dev_eject_request(BlockBackend *blk, bool force)
1012	{
1013	if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
1014	blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
1015	}
1016	}
1017
1018	/*
1019	* Does @blk's attached device model have a tray, and is it open?
1020	*/
1021	bool blk_dev_is_tray_open(BlockBackend *blk)
1022	{
1023	if (blk_dev_has_tray(blk)) {
1024	return blk->dev_ops->is_tray_open(blk->dev_opaque);
1025	}
1026	return false;
1027	}
1028
1029	/*
1030	* Does @blk's attached device model have the medium locked?
1031	* %false if the device model has no such lock.
1032	*/
1033	bool blk_dev_is_medium_locked(BlockBackend *blk)
1034	{
1035	if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
1036	return blk->dev_ops->is_medium_locked(blk->dev_opaque);
1037	}
1038	return false;
1039	}
1040
1041	/*
1042	* Notify @blk's attached device model of a backend size change.
1043	*/
1044	static void blk_root_resize(BdrvChild *child)
1045	{
1046	BlockBackend *blk = child->opaque;
1047
1048	if (blk->dev_ops && blk->dev_ops->resize_cb) {
1049	blk->dev_ops->resize_cb(blk->dev_opaque);
1050	}
1051	}
1052
1053	void blk_iostatus_enable(BlockBackend *blk)
1054	{
1055	blk->iostatus_enabled = true;
1056	blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1057	}
1058
1059	/ The I/O status is only enabled if the drive explicitly*
1060	* enables it _and_ the VM is configured to stop on errors */
1061	bool blk_iostatus_is_enabled(const BlockBackend *blk)
1062	{
1063	return (blk->iostatus_enabled &&
1064	(blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC \|\|
1065	blk->on_write_error == BLOCKDEV_ON_ERROR_STOP \|\|
1066	blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
1067	}
1068
1069	BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
1070	{
1071	return blk->iostatus;
1072	}
1073
1074	void blk_iostatus_disable(BlockBackend *blk)
1075	{
1076	blk->iostatus_enabled = false;
1077	}
1078
1079	void blk_iostatus_reset(BlockBackend *blk)
1080	{
1081	if (blk_iostatus_is_enabled(blk)) {
1082	blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1083	}
1084	}
1085
1086	void blk_iostatus_set_err(BlockBackend blk, int* error)
1087	{
1088	assert(blk_iostatus_is_enabled(blk));
1089	if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
1090	blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
1091	BLOCK_DEVICE_IO_STATUS_FAILED;
1092	}
1093	}
1094
1095	void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
1096	{
1097	blk->allow_write_beyond_eof = allow;
1098	}
1099
1100	void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
1101	{
1102	blk->allow_aio_context_change = allow;
1103	}
1104
1105	void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
1106	{
1107	blk->disable_request_queuing = disable;
1108	}
1109
1110	static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
1111	size_t size)
1112	{
1113	int64_t len;
1114
1115	if (size > INT_MAX) {
1116	return -EIO;
1117	}
1118
1119	if (!blk_is_available(blk)) {
1120	return -ENOMEDIUM;
1121	}
1122
1123	if (offset < `0`) {
1124	return -EIO;
1125	}
1126
1127	if (!blk->allow_write_beyond_eof) {
1128	len = blk_getlength(blk);
1129	if (len < `0`) {
1130	return len;
1131	}
1132
1133	if (offset > len \|\| len - offset < size) {
1134	return -EIO;
1135	}
1136	}
1137
1138	return `0`;
1139	}
1140
1141	static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
1142	{
1143	if (blk->quiesce_counter && !blk->disable_request_queuing) {
1144	qemu_co_queue_wait(&blk->queued_requests, NULL);
1145	}
1146	}
1147
1148	int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
1149	unsigned int bytes, QEMUIOVector *qiov,
1150	BdrvRequestFlags flags)
1151	{
1152	int ret;
1153	BlockDriverState *bs;
1154
1155	blk_wait_while_drained(blk);
1156
1157	/ Call blk_bs() only after waiting, the graph may have changed /
1158	bs = blk_bs(blk);
1159	trace_blk_co_preadv(blk, bs, offset, bytes, flags);
1160
1161	ret = blk_check_byte_request(blk, offset, bytes);
1162	if (ret < `0`) {
1163	return ret;
1164	}
1165
1166	bdrv_inc_in_flight(bs);
1167
1168	/ throttling disk I/O /
1169	if (blk->public.throttle_group_member.throttle_state) {
1170	throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1171	bytes, false);
1172	}
1173
1174	ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
1175	bdrv_dec_in_flight(bs);
1176	return ret;
1177	}
1178
1179	int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
1180	unsigned int bytes, QEMUIOVector *qiov,
1181	BdrvRequestFlags flags)
1182	{
1183	int ret;
1184	BlockDriverState *bs;
1185
1186	blk_wait_while_drained(blk);
1187
1188	/ Call blk_bs() only after waiting, the graph may have changed /
1189	bs = blk_bs(blk);
1190	trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
1191
1192	ret = blk_check_byte_request(blk, offset, bytes);
1193	if (ret < `0`) {
1194	return ret;
1195	}
1196
1197	bdrv_inc_in_flight(bs);
1198	/ throttling disk I/O /
1199	if (blk->public.throttle_group_member.throttle_state) {
1200	throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1201	bytes, true);
1202	}
1203
1204	if (!blk->enable_write_cache) {
1205	flags \|= BDRV_REQ_FUA;
1206	}
1207
1208	ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
1209	bdrv_dec_in_flight(bs);
1210	return ret;
1211	}
1212
1213	typedef struct BlkRwCo {
1214	BlockBackend *blk;
1215	int64_t offset;
1216	void *iobuf;
1217	int ret;
1218	BdrvRequestFlags flags;
1219	} BlkRwCo;
1220
1221	static void blk_read_entry(void *opaque)
1222	{
1223	BlkRwCo *rwco = opaque;
1224	QEMUIOVector *qiov = rwco->iobuf;
1225
1226	rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
1227	qiov, rwco->flags);
1228	aio_wait_kick();
1229	}
1230
1231	static void blk_write_entry(void *opaque)
1232	{
1233	BlkRwCo *rwco = opaque;
1234	QEMUIOVector *qiov = rwco->iobuf;
1235
1236	rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
1237	qiov, rwco->flags);
1238	aio_wait_kick();
1239	}
1240
1241	static int blk_prw(BlockBackend blk, int64_t offset, uint8_t buf,
1242	int64_t bytes, CoroutineEntry co_entry,
1243	BdrvRequestFlags flags)
1244	{
1245	QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1246	BlkRwCo rwco = {
1247	.blk = blk,
1248	.offset = offset,
1249	.iobuf = &qiov,
1250	.flags = flags,
1251	.ret = NOT_DONE,
1252	};
1253
1254	if (qemu_in_coroutine()) {
1255	/ Fast-path if already in coroutine context /
1256	co_entry(&rwco);
1257	} else {
1258	Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
1259	bdrv_coroutine_enter(blk_bs(blk), co);
1260	BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
1261	}
1262
1263	return rwco.ret;
1264	}
1265
1266	int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1267	int bytes, BdrvRequestFlags flags)
1268	{
1269	return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
1270	flags \| BDRV_REQ_ZERO_WRITE);
1271	}
1272
1273	int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
1274	{
1275	return bdrv_make_zero(blk->root, flags);
1276	}
1277
1278	void blk_inc_in_flight(BlockBackend *blk)
1279	{
1280	atomic_inc(&blk->in_flight);
1281	}
1282
1283	void blk_dec_in_flight(BlockBackend *blk)
1284	{
1285	atomic_dec(&blk->in_flight);
1286	aio_wait_kick();
1287	}
1288
1289	static void error_callback_bh(void *opaque)
1290	{
1291	struct BlockBackendAIOCB *acb = opaque;
1292
1293	blk_dec_in_flight(acb->blk);
1294	acb->common.cb(acb->common.opaque, acb->ret);
1295	qemu_aio_unref(acb);
1296	}
1297
1298	BlockAIOCB blk_abort_aio_request(BlockBackend blk,
1299	BlockCompletionFunc *cb,
1300	void opaque, int* ret)
1301	{
1302	struct BlockBackendAIOCB *acb;
1303
1304	blk_inc_in_flight(blk);
1305	acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
1306	acb->blk = blk;
1307	acb->ret = ret;
1308
1309	aio_bh_schedule_oneshot(blk_get_aio_context(blk), error_callback_bh, acb);
1310	return &acb->common;
1311	}
1312
1313	typedef struct BlkAioEmAIOCB {
1314	BlockAIOCB common;
1315	BlkRwCo rwco;
1316	int bytes;
1317	bool has_returned;
1318	} BlkAioEmAIOCB;
1319
1320	static const AIOCBInfo blk_aio_em_aiocb_info = {
1321	.aiocb_size = sizeof(BlkAioEmAIOCB),
1322	};
1323
1324	static void blk_aio_complete(BlkAioEmAIOCB *acb)
1325	{
1326	if (acb->has_returned) {
1327	acb->common.cb(acb->common.opaque, acb->rwco.ret);
1328	blk_dec_in_flight(acb->rwco.blk);
1329	qemu_aio_unref(acb);
1330	}
1331	}
1332
1333	static void blk_aio_complete_bh(void *opaque)
1334	{
1335	BlkAioEmAIOCB *acb = opaque;
1336	assert(acb->has_returned);
1337	blk_aio_complete(acb);
1338	}
1339
1340	static BlockAIOCB blk_aio_prwv(BlockBackend blk, int64_t offset, int bytes,
1341	void *iobuf, CoroutineEntry co_entry,
1342	BdrvRequestFlags flags,
1343	BlockCompletionFunc cb, void* *opaque)
1344	{
1345	BlkAioEmAIOCB *acb;
1346	Coroutine *co;
1347
1348	blk_inc_in_flight(blk);
1349	acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
1350	acb->rwco = (BlkRwCo) {
1351	.blk = blk,
1352	.offset = offset,
1353	.iobuf = iobuf,
1354	.flags = flags,
1355	.ret = NOT_DONE,
1356	};
1357	acb->bytes = bytes;
1358	acb->has_returned = false;
1359
1360	co = qemu_coroutine_create(co_entry, acb);
1361	bdrv_coroutine_enter(blk_bs(blk), co);
1362
1363	acb->has_returned = true;
1364	if (acb->rwco.ret != NOT_DONE) {
1365	aio_bh_schedule_oneshot(blk_get_aio_context(blk),
1366	blk_aio_complete_bh, acb);
1367	}
1368
1369	return &acb->common;
1370	}
1371
1372	static void blk_aio_read_entry(void *opaque)
1373	{
1374	BlkAioEmAIOCB *acb = opaque;
1375	BlkRwCo *rwco = &acb->rwco;
1376	QEMUIOVector *qiov = rwco->iobuf;
1377
1378	if (rwco->blk->quiesce_counter) {
1379	blk_dec_in_flight(rwco->blk);
1380	blk_wait_while_drained(rwco->blk);
1381	blk_inc_in_flight(rwco->blk);
1382	}
1383
1384	assert(qiov->size == acb->bytes);
1385	rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
1386	qiov, rwco->flags);
1387	blk_aio_complete(acb);
1388	}
1389
1390	static void blk_aio_write_entry(void *opaque)
1391	{
1392	BlkAioEmAIOCB *acb = opaque;
1393	BlkRwCo *rwco = &acb->rwco;
1394	QEMUIOVector *qiov = rwco->iobuf;
1395
1396	if (rwco->blk->quiesce_counter) {
1397	blk_dec_in_flight(rwco->blk);
1398	blk_wait_while_drained(rwco->blk);
1399	blk_inc_in_flight(rwco->blk);
1400	}
1401
1402	assert(!qiov \|\| qiov->size == acb->bytes);
1403	rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
1404	qiov, rwco->flags);
1405	blk_aio_complete(acb);
1406	}
1407
1408	BlockAIOCB blk_aio_pwrite_zeroes(BlockBackend blk, int64_t offset,
1409	int count, BdrvRequestFlags flags,
1410	BlockCompletionFunc cb, void* *opaque)
1411	{
1412	return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
1413	flags \| BDRV_REQ_ZERO_WRITE, cb, opaque);
1414	}
1415
1416	int blk_pread(BlockBackend blk, int64_t offset, void* buf, int* count)
1417	{
1418	int ret = blk_prw(blk, offset, buf, count, blk_read_entry, `0`);
1419	if (ret < `0`) {
1420	return ret;
1421	}
1422	return count;
1423	}
1424
1425	int blk_pwrite(BlockBackend blk, int64_t offset, const* void buf, int* count,
1426	BdrvRequestFlags flags)
1427	{
1428	int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1429	flags);
1430	if (ret < `0`) {
1431	return ret;
1432	}
1433	return count;
1434	}
1435
1436	int64_t blk_getlength(BlockBackend *blk)
1437	{
1438	if (!blk_is_available(blk)) {
1439	return -ENOMEDIUM;
1440	}
1441
1442	return bdrv_getlength(blk_bs(blk));
1443	}
1444
1445	void blk_get_geometry(BlockBackend blk, uint64_t nb_sectors_ptr)
1446	{
1447	if (!blk_bs(blk)) {
1448	*nb_sectors_ptr = `0`;
1449	} else {
1450	bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
1451	}
1452	}
1453
1454	int64_t blk_nb_sectors(BlockBackend *blk)
1455	{
1456	if (!blk_is_available(blk)) {
1457	return -ENOMEDIUM;
1458	}
1459
1460	return bdrv_nb_sectors(blk_bs(blk));
1461	}
1462
1463	BlockAIOCB blk_aio_preadv(BlockBackend blk, int64_t offset,
1464	QEMUIOVector *qiov, BdrvRequestFlags flags,
1465	BlockCompletionFunc cb, void* *opaque)
1466	{
1467	return blk_aio_prwv(blk, offset, qiov->size, qiov,
1468	blk_aio_read_entry, flags, cb, opaque);
1469	}
1470
1471	BlockAIOCB blk_aio_pwritev(BlockBackend blk, int64_t offset,
1472	QEMUIOVector *qiov, BdrvRequestFlags flags,
1473	BlockCompletionFunc cb, void* *opaque)
1474	{
1475	return blk_aio_prwv(blk, offset, qiov->size, qiov,
1476	blk_aio_write_entry, flags, cb, opaque);
1477	}
1478
1479	static void blk_aio_flush_entry(void *opaque)
1480	{
1481	BlkAioEmAIOCB *acb = opaque;
1482	BlkRwCo *rwco = &acb->rwco;
1483
1484	rwco->ret = blk_co_flush(rwco->blk);
1485	blk_aio_complete(acb);
1486	}
1487
1488	BlockAIOCB blk_aio_flush(BlockBackend blk,
1489	BlockCompletionFunc cb, void* *opaque)
1490	{
1491	return blk_aio_prwv(blk, `0`, `0`, NULL, blk_aio_flush_entry, `0`, cb, opaque);
1492	}
1493
1494	static void blk_aio_pdiscard_entry(void *opaque)
1495	{
1496	BlkAioEmAIOCB *acb = opaque;
1497	BlkRwCo *rwco = &acb->rwco;
1498
1499	rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
1500	blk_aio_complete(acb);
1501	}
1502
1503	BlockAIOCB blk_aio_pdiscard(BlockBackend blk,
1504	int64_t offset, int bytes,
1505	BlockCompletionFunc cb, void* *opaque)
1506	{
1507	return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, `0`,
1508	cb, opaque);
1509	}
1510
1511	void blk_aio_cancel(BlockAIOCB *acb)
1512	{
1513	bdrv_aio_cancel(acb);
1514	}
1515
1516	void blk_aio_cancel_async(BlockAIOCB *acb)
1517	{
1518	bdrv_aio_cancel_async(acb);
1519	}
1520
1521	int blk_co_ioctl(BlockBackend blk, unsigned* long int req, void *buf)
1522	{
1523	blk_wait_while_drained(blk);
1524
1525	if (!blk_is_available(blk)) {
1526	return -ENOMEDIUM;
1527	}
1528
1529	return bdrv_co_ioctl(blk_bs(blk), req, buf);
1530	}
1531
1532	static void blk_ioctl_entry(void *opaque)
1533	{
1534	BlkRwCo *rwco = opaque;
1535	QEMUIOVector *qiov = rwco->iobuf;
1536
1537	rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
1538	qiov->iov[`0`].iov_base);
1539	aio_wait_kick();
1540	}
1541
1542	int blk_ioctl(BlockBackend blk, unsigned* long int req, void *buf)
1543	{
1544	return blk_prw(blk, req, buf, `0`, blk_ioctl_entry, `0`);
1545	}
1546
1547	static void blk_aio_ioctl_entry(void *opaque)
1548	{
1549	BlkAioEmAIOCB *acb = opaque;
1550	BlkRwCo *rwco = &acb->rwco;
1551
1552	rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
1553
1554	blk_aio_complete(acb);
1555	}
1556
1557	BlockAIOCB blk_aio_ioctl(BlockBackend blk, unsigned long int req, void *buf,
1558	BlockCompletionFunc cb, void* *opaque)
1559	{
1560	return blk_aio_prwv(blk, req, `0`, buf, blk_aio_ioctl_entry, `0`, cb, opaque);
1561	}
1562
1563	int blk_co_pdiscard(BlockBackend blk, int64_t offset, int* bytes)
1564	{
1565	int ret;
1566
1567	blk_wait_while_drained(blk);
1568
1569	ret = blk_check_byte_request(blk, offset, bytes);
1570	if (ret < `0`) {
1571	return ret;
1572	}
1573
1574	return bdrv_co_pdiscard(blk->root, offset, bytes);
1575	}
1576
1577	int blk_co_flush(BlockBackend *blk)
1578	{
1579	blk_wait_while_drained(blk);
1580
1581	if (!blk_is_available(blk)) {
1582	return -ENOMEDIUM;
1583	}
1584
1585	return bdrv_co_flush(blk_bs(blk));
1586	}
1587
1588	static void blk_flush_entry(void *opaque)
1589	{
1590	BlkRwCo *rwco = opaque;
1591	rwco->ret = blk_co_flush(rwco->blk);
1592	aio_wait_kick();
1593	}
1594
1595	int blk_flush(BlockBackend *blk)
1596	{
1597	return blk_prw(blk, `0`, NULL, `0`, blk_flush_entry, `0`);
1598	}
1599
1600	void blk_drain(BlockBackend *blk)
1601	{
1602	BlockDriverState *bs = blk_bs(blk);
1603
1604	if (bs) {
1605	bdrv_drained_begin(bs);
1606	}
1607
1608	/ We may have -ENOMEDIUM completions in flight /
1609	AIO_WAIT_WHILE(blk_get_aio_context(blk),
1610	atomic_mb_read(&blk->in_flight) > `0`);
1611
1612	if (bs) {
1613	bdrv_drained_end(bs);
1614	}
1615	}
1616
1617	void blk_drain_all(void)
1618	{
1619	BlockBackend *blk = NULL;
1620
1621	bdrv_drain_all_begin();
1622
1623	while ((blk = blk_all_next(blk)) != NULL) {
1624	AioContext *ctx = blk_get_aio_context(blk);
1625
1626	aio_context_acquire(ctx);
1627
1628	/ We may have -ENOMEDIUM completions in flight /
1629	AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > `0`);
1630
1631	aio_context_release(ctx);
1632	}
1633
1634	bdrv_drain_all_end();
1635	}
1636
1637	void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
1638	BlockdevOnError on_write_error)
1639	{
1640	blk->on_read_error = on_read_error;
1641	blk->on_write_error = on_write_error;
1642	}
1643
1644	BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
1645	{
1646	return is_read ? blk->on_read_error : blk->on_write_error;
1647	}
1648
1649	BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
1650	int error)
1651	{
1652	BlockdevOnError on_err = blk_get_on_error(blk, is_read);
1653
1654	switch (on_err) {
1655	case BLOCKDEV_ON_ERROR_ENOSPC:
1656	return (error == ENOSPC) ?
1657	BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
1658	case BLOCKDEV_ON_ERROR_STOP:
1659	return BLOCK_ERROR_ACTION_STOP;
1660	case BLOCKDEV_ON_ERROR_REPORT:
1661	return BLOCK_ERROR_ACTION_REPORT;
1662	case BLOCKDEV_ON_ERROR_IGNORE:
1663	return BLOCK_ERROR_ACTION_IGNORE;
1664	case BLOCKDEV_ON_ERROR_AUTO:
1665	default:
1666	abort();
1667	}
1668	}
1669
1670	static void send_qmp_error_event(BlockBackend *blk,
1671	BlockErrorAction action,
1672	bool is_read, int error)
1673	{
1674	IoOperationType optype;
1675	BlockDriverState *bs = blk_bs(blk);
1676
1677	optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
1678	qapi_event_send_block_io_error(blk_name(blk), !!bs,
1679	bs ? bdrv_get_node_name(bs) : NULL, optype,
1680	action, blk_iostatus_is_enabled(blk),
1681	error == ENOSPC, strerror(error));
1682	}
1683
1684	/ This is done by device models because, while the block layer knows*
1685	* about the error, it does not know whether an operation comes from
1686	* the device or the block layer (from a job, for example).
1687	*/
1688	void blk_error_action(BlockBackend *blk, BlockErrorAction action,
1689	bool is_read, int error)
1690	{
1691	assert(error >= `0`);
1692
1693	if (action == BLOCK_ERROR_ACTION_STOP) {
1694	/ First set the iostatus, so that "info block" returns an iostatus*
1695	* that matches the events raised so far (an additional error iostatus
1696	* is fine, but not a lost one).
1697	*/
1698	blk_iostatus_set_err(blk, error);
1699
1700	/ Then raise the request to stop the VM and the event.*
1701	* qemu_system_vmstop_request_prepare has two effects. First,
1702	* it ensures that the STOP event always comes after the
1703	* BLOCK_IO_ERROR event. Second, it ensures that even if management
1704	* can observe the STOP event and do a "cont" before the STOP
1705	* event is issued, the VM will not stop. In this case, vm_start()
1706	* also ensures that the STOP/RESUME pair of events is emitted.
1707	*/
1708	qemu_system_vmstop_request_prepare();
1709	send_qmp_error_event(blk, action, is_read, error);
1710	qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
1711	} else {
1712	send_qmp_error_event(blk, action, is_read, error);
1713	}
1714	}
1715
1716	bool blk_is_read_only(BlockBackend *blk)
1717	{
1718	BlockDriverState *bs = blk_bs(blk);
1719
1720	if (bs) {
1721	return bdrv_is_read_only(bs);
1722	} else {
1723	return blk->root_state.read_only;
1724	}
1725	}
1726
1727	bool blk_is_sg(BlockBackend *blk)
1728	{
1729	BlockDriverState *bs = blk_bs(blk);
1730
1731	if (!bs) {
1732	return false;
1733	}
1734
1735	return bdrv_is_sg(bs);
1736	}
1737
1738	bool blk_enable_write_cache(BlockBackend *blk)
1739	{
1740	return blk->enable_write_cache;
1741	}
1742
1743	void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
1744	{
1745	blk->enable_write_cache = wce;
1746	}
1747
1748	void blk_invalidate_cache(BlockBackend blk, Error *errp)
1749	{
1750	BlockDriverState *bs = blk_bs(blk);
1751
1752	if (!bs) {
1753	error_setg(errp, "Device '%s' has no medium", blk->name);
1754	return;
1755	}
1756
1757	bdrv_invalidate_cache(bs, errp);
1758	}
1759
1760	bool blk_is_inserted(BlockBackend *blk)
1761	{
1762	BlockDriverState *bs = blk_bs(blk);
1763
1764	return bs && bdrv_is_inserted(bs);
1765	}
1766
1767	bool blk_is_available(BlockBackend *blk)
1768	{
1769	return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
1770	}
1771
1772	void blk_lock_medium(BlockBackend *blk, bool locked)
1773	{
1774	BlockDriverState *bs = blk_bs(blk);
1775
1776	if (bs) {
1777	bdrv_lock_medium(bs, locked);
1778	}
1779	}
1780
1781	void blk_eject(BlockBackend *blk, bool eject_flag)
1782	{
1783	BlockDriverState *bs = blk_bs(blk);
1784	char *id;
1785
1786	if (bs) {
1787	bdrv_eject(bs, eject_flag);
1788	}
1789
1790	/ Whether or not we ejected on the backend,*
1791	* the frontend experienced a tray event. */
1792	id = blk_get_attached_dev_id(blk);
1793	qapi_event_send_device_tray_moved(blk_name(blk), id,
1794	eject_flag);
1795	g_free(id);
1796	}
1797
1798	int blk_get_flags(BlockBackend *blk)
1799	{
1800	BlockDriverState *bs = blk_bs(blk);
1801
1802	if (bs) {
1803	return bdrv_get_flags(bs);
1804	} else {
1805	return blk->root_state.open_flags;
1806	}
1807	}
1808
1809	/ Returns the minimum request alignment, in bytes; guaranteed nonzero /
1810	uint32_t blk_get_request_alignment(BlockBackend *blk)
1811	{
1812	BlockDriverState *bs = blk_bs(blk);
1813	return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
1814	}
1815
1816	/ Returns the maximum transfer length, in bytes; guaranteed nonzero /
1817	uint32_t blk_get_max_transfer(BlockBackend *blk)
1818	{
1819	BlockDriverState *bs = blk_bs(blk);
1820	uint32_t max = `0`;
1821
1822	if (bs) {
1823	max = bs->bl.max_transfer;
1824	}
1825	return MIN_NON_ZERO(max, INT_MAX);
1826	}
1827
1828	int blk_get_max_iov(BlockBackend *blk)
1829	{
1830	return blk->root->bs->bl.max_iov;
1831	}
1832
1833	void blk_set_guest_block_size(BlockBackend blk, int* align)
1834	{
1835	blk->guest_block_size = align;
1836	}
1837
1838	void blk_try_blockalign(BlockBackend blk, size_t size)
1839	{
1840	return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
1841	}
1842
1843	void blk_blockalign(BlockBackend blk, size_t size)
1844	{
1845	return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
1846	}
1847
1848	bool blk_op_is_blocked(BlockBackend blk, BlockOpType op, Error *errp)
1849	{
1850	BlockDriverState *bs = blk_bs(blk);
1851
1852	if (!bs) {
1853	return false;
1854	}
1855
1856	return bdrv_op_is_blocked(bs, op, errp);
1857	}
1858
1859	void blk_op_unblock(BlockBackend blk, BlockOpType op, Error reason)
1860	{
1861	BlockDriverState *bs = blk_bs(blk);
1862
1863	if (bs) {
1864	bdrv_op_unblock(bs, op, reason);
1865	}
1866	}
1867
1868	void blk_op_block_all(BlockBackend blk, Error reason)
1869	{
1870	BlockDriverState *bs = blk_bs(blk);
1871
1872	if (bs) {
1873	bdrv_op_block_all(bs, reason);
1874	}
1875	}
1876
1877	void blk_op_unblock_all(BlockBackend blk, Error reason)
1878	{
1879	BlockDriverState *bs = blk_bs(blk);
1880
1881	if (bs) {
1882	bdrv_op_unblock_all(bs, reason);
1883	}
1884	}
1885
1886	AioContext blk_get_aio_context(BlockBackend blk)
1887	{
1888	BlockDriverState *bs = blk_bs(blk);
1889
1890	if (bs) {
1891	AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
1892	assert(ctx == blk->ctx);
1893	}
1894
1895	return blk->ctx;
1896	}
1897
1898	static AioContext blk_aiocb_get_aio_context(BlockAIOCB acb)
1899	{
1900	BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
1901	return blk_get_aio_context(blk_acb->blk);
1902	}
1903
1904	static int blk_do_set_aio_context(BlockBackend blk, AioContext new_context,
1905	bool update_root_node, Error **errp)
1906	{
1907	BlockDriverState *bs = blk_bs(blk);
1908	ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
1909	int ret;
1910
1911	if (bs) {
1912	if (update_root_node) {
1913	ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root,
1914	errp);
1915	if (ret < `0`) {
1916	return ret;
1917	}
1918	}
1919	if (tgm->throttle_state) {
1920	bdrv_drained_begin(bs);
1921	throttle_group_detach_aio_context(tgm);
1922	throttle_group_attach_aio_context(tgm, new_context);
1923	bdrv_drained_end(bs);
1924	}
1925	}
1926
1927	blk->ctx = new_context;
1928	return `0`;
1929	}
1930
1931	int blk_set_aio_context(BlockBackend blk, AioContext new_context,
1932	Error **errp)
1933	{
1934	return blk_do_set_aio_context(blk, new_context, true, errp);
1935	}
1936
1937	static bool blk_root_can_set_aio_ctx(BdrvChild child, AioContext ctx,
1938	GSList ignore, Error errp)
1939	{
1940	BlockBackend *blk = child->opaque;
1941
1942	if (blk->allow_aio_context_change) {
1943	return true;
1944	}
1945
1946	/ Only manually created BlockBackends that are not attached to anything*
1947	* can change their AioContext without updating their user. */
1948	if (!blk->name \|\| blk->dev) {
1949	/ TODO Add BB name/QOM path /
1950	error_setg(errp, "Cannot change iothread of active block backend");
1951	return false;
1952	}
1953
1954	return true;
1955	}
1956
1957	static void blk_root_set_aio_ctx(BdrvChild child, AioContext ctx,
1958	GSList **ignore)
1959	{
1960	BlockBackend *blk = child->opaque;
1961	blk_do_set_aio_context(blk, ctx, false, &error_abort);
1962	}
1963
1964	void blk_add_aio_context_notifier(BlockBackend *blk,
1965	void (attached_aio_context)(AioContext new_context, void *opaque),
1966	void (detach_aio_context)(void* opaque), void* *opaque)
1967	{
1968	BlockBackendAioNotifier *notifier;
1969	BlockDriverState *bs = blk_bs(blk);
1970
1971	notifier = g_new(BlockBackendAioNotifier, `1`);
1972	notifier->attached_aio_context = attached_aio_context;
1973	notifier->detach_aio_context = detach_aio_context;
1974	notifier->opaque = opaque;
1975	QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list);
1976
1977	if (bs) {
1978	bdrv_add_aio_context_notifier(bs, attached_aio_context,
1979	detach_aio_context, opaque);
1980	}
1981	}
1982
1983	void blk_remove_aio_context_notifier(BlockBackend *blk,
1984	void (attached_aio_context)(AioContext ,
1985	void *),
1986	void (detach_aio_context)(void* *),
1987	void *opaque)
1988	{
1989	BlockBackendAioNotifier *notifier;
1990	BlockDriverState *bs = blk_bs(blk);
1991
1992	if (bs) {
1993	bdrv_remove_aio_context_notifier(bs, attached_aio_context,
1994	detach_aio_context, opaque);
1995	}
1996
1997	QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
1998	if (notifier->attached_aio_context == attached_aio_context &&
1999	notifier->detach_aio_context == detach_aio_context &&
2000	notifier->opaque == opaque) {
2001	QLIST_REMOVE(notifier, list);
2002	g_free(notifier);
2003	return;
2004	}
2005	}
2006
2007	abort();
2008	}
2009
2010	void blk_add_remove_bs_notifier(BlockBackend blk, Notifier notify)
2011	{
2012	notifier_list_add(&blk->remove_bs_notifiers, notify);
2013	}
2014
2015	void blk_add_insert_bs_notifier(BlockBackend blk, Notifier notify)
2016	{
2017	notifier_list_add(&blk->insert_bs_notifiers, notify);
2018	}
2019
2020	void blk_io_plug(BlockBackend *blk)
2021	{
2022	BlockDriverState *bs = blk_bs(blk);
2023
2024	if (bs) {
2025	bdrv_io_plug(bs);
2026	}
2027	}
2028
2029	void blk_io_unplug(BlockBackend *blk)
2030	{
2031	BlockDriverState *bs = blk_bs(blk);
2032
2033	if (bs) {
2034	bdrv_io_unplug(bs);
2035	}
2036	}
2037
2038	BlockAcctStats blk_get_stats(BlockBackend blk)
2039	{
2040	return &blk->stats;
2041	}
2042
2043	void blk_aio_get(const* AIOCBInfo aiocb_info, BlockBackend blk,
2044	BlockCompletionFunc cb, void* *opaque)
2045	{
2046	return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
2047	}
2048
2049	int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
2050	int bytes, BdrvRequestFlags flags)
2051	{
2052	return blk_co_pwritev(blk, offset, bytes, NULL,
2053	flags \| BDRV_REQ_ZERO_WRITE);
2054	}
2055
2056	int blk_pwrite_compressed(BlockBackend blk, int64_t offset, const* void *buf,
2057	int count)
2058	{
2059	return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
2060	BDRV_REQ_WRITE_COMPRESSED);
2061	}
2062
2063	int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
2064	Error **errp)
2065	{
2066	if (!blk_is_available(blk)) {
2067	error_setg(errp, "No medium inserted");
2068	return -ENOMEDIUM;
2069	}
2070
2071	return bdrv_truncate(blk->root, offset, prealloc, errp);
2072	}
2073
2074	static void blk_pdiscard_entry(void *opaque)
2075	{
2076	BlkRwCo *rwco = opaque;
2077	QEMUIOVector *qiov = rwco->iobuf;
2078
2079	rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
2080	aio_wait_kick();
2081	}
2082
2083	int blk_pdiscard(BlockBackend blk, int64_t offset, int* bytes)
2084	{
2085	return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, `0`);
2086	}
2087
2088	int blk_save_vmstate(BlockBackend blk, const* uint8_t *buf,
2089	int64_t pos, int size)
2090	{
2091	int ret;
2092
2093	if (!blk_is_available(blk)) {
2094	return -ENOMEDIUM;
2095	}
2096
2097	ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
2098	if (ret < `0`) {
2099	return ret;
2100	}
2101
2102	if (ret == size && !blk->enable_write_cache) {
2103	ret = bdrv_flush(blk_bs(blk));
2104	}
2105
2106	return ret < `0` ? ret : size;
2107	}
2108
2109	int blk_load_vmstate(BlockBackend blk, uint8_t buf, int64_t pos, int size)
2110	{
2111	if (!blk_is_available(blk)) {
2112	return -ENOMEDIUM;
2113	}
2114
2115	return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
2116	}
2117
2118	int blk_probe_blocksizes(BlockBackend blk, BlockSizes bsz)
2119	{
2120	if (!blk_is_available(blk)) {
2121	return -ENOMEDIUM;
2122	}
2123
2124	return bdrv_probe_blocksizes(blk_bs(blk), bsz);
2125	}
2126
2127	int blk_probe_geometry(BlockBackend blk, HDGeometry geo)
2128	{
2129	if (!blk_is_available(blk)) {
2130	return -ENOMEDIUM;
2131	}
2132
2133	return bdrv_probe_geometry(blk_bs(blk), geo);
2134	}
2135
2136	/*
2137	* Updates the BlockBackendRootState object with data from the currently
2138	* attached BlockDriverState.
2139	*/
2140	void blk_update_root_state(BlockBackend *blk)
2141	{
2142	assert(blk->root);
2143
2144	blk->root_state.open_flags = blk->root->bs->open_flags;
2145	blk->root_state.read_only = blk->root->bs->read_only;
2146	blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
2147	}
2148
2149	/*
2150	* Returns the detect-zeroes setting to be used for bdrv_open() of a
2151	* BlockDriverState which is supposed to inherit the root state.
2152	*/
2153	bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
2154	{
2155	return blk->root_state.detect_zeroes;
2156	}
2157
2158	/*
2159	* Returns the flags to be used for bdrv_open() of a BlockDriverState which is
2160	* supposed to inherit the root state.
2161	*/
2162	int blk_get_open_flags_from_root_state(BlockBackend *blk)
2163	{
2164	int bs_flags;
2165
2166	bs_flags = blk->root_state.read_only ? `0` : BDRV_O_RDWR;
2167	bs_flags \|= blk->root_state.open_flags & ~BDRV_O_RDWR;
2168
2169	return bs_flags;
2170	}
2171
2172	BlockBackendRootState blk_get_root_state(BlockBackend blk)
2173	{
2174	return &blk->root_state;
2175	}
2176
2177	int blk_commit_all(void)
2178	{
2179	BlockBackend *blk = NULL;
2180
2181	while ((blk = blk_all_next(blk)) != NULL) {
2182	AioContext *aio_context = blk_get_aio_context(blk);
2183
2184	aio_context_acquire(aio_context);
2185	if (blk_is_inserted(blk) && blk->root->bs->backing) {
2186	int ret = bdrv_commit(blk->root->bs);
2187	if (ret < `0`) {
2188	aio_context_release(aio_context);
2189	return ret;
2190	}
2191	}
2192	aio_context_release(aio_context);
2193	}
2194	return `0`;
2195	}
2196
2197
2198	/ throttling disk I/O limits /
2199	void blk_set_io_limits(BlockBackend blk, ThrottleConfig cfg)
2200	{
2201	throttle_group_config(&blk->public.throttle_group_member, cfg);
2202	}
2203
2204	void blk_io_limits_disable(BlockBackend *blk)
2205	{
2206	BlockDriverState *bs = blk_bs(blk);
2207	ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2208	assert(tgm->throttle_state);
2209	if (bs) {
2210	bdrv_drained_begin(bs);
2211	}
2212	throttle_group_unregister_tgm(tgm);
2213	if (bs) {
2214	bdrv_drained_end(bs);
2215	}
2216	}
2217
2218	/ should be called before blk_set_io_limits if a limit is set /
2219	void blk_io_limits_enable(BlockBackend blk, const* char *group)
2220	{
2221	assert(!blk->public.throttle_group_member.throttle_state);
2222	throttle_group_register_tgm(&blk->public.throttle_group_member,
2223	group, blk_get_aio_context(blk));
2224	}
2225
2226	void blk_io_limits_update_group(BlockBackend blk, const* char *group)
2227	{
2228	/ this BB is not part of any group /
2229	if (!blk->public.throttle_group_member.throttle_state) {
2230	return;
2231	}
2232
2233	/ this BB is a part of the same group than the one we want /
2234	if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member),
2235	group)) {
2236	return;
2237	}
2238
2239	/ need to change the group this bs belong to /
2240	blk_io_limits_disable(blk);
2241	blk_io_limits_enable(blk, group);
2242	}
2243
2244	static void blk_root_drained_begin(BdrvChild *child)
2245	{
2246	BlockBackend *blk = child->opaque;
2247
2248	if (++blk->quiesce_counter == `1`) {
2249	if (blk->dev_ops && blk->dev_ops->drained_begin) {
2250	blk->dev_ops->drained_begin(blk->dev_opaque);
2251	}
2252	}
2253
2254	/ Note that blk->root may not be accessible here yet if we are just*
2255	* attaching to a BlockDriverState that is drained. Use child instead. */
2256
2257	if (atomic_fetch_inc(&blk->public.throttle_group_member.io_limits_disabled) == `0`) {
2258	throttle_group_restart_tgm(&blk->public.throttle_group_member);
2259	}
2260	}
2261
2262	static bool blk_root_drained_poll(BdrvChild *child)
2263	{
2264	BlockBackend *blk = child->opaque;
2265	assert(blk->quiesce_counter);
2266	return !!blk->in_flight;
2267	}
2268
2269	static void blk_root_drained_end(BdrvChild child, int* *drained_end_counter)
2270	{
2271	BlockBackend *blk = child->opaque;
2272	assert(blk->quiesce_counter);
2273
2274	assert(blk->public.throttle_group_member.io_limits_disabled);
2275	atomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
2276
2277	if (--blk->quiesce_counter == `0`) {
2278	if (blk->dev_ops && blk->dev_ops->drained_end) {
2279	blk->dev_ops->drained_end(blk->dev_opaque);
2280	}
2281	while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
2282	/ Resume all queued requests /
2283	}
2284	}
2285	}
2286
2287	void blk_register_buf(BlockBackend blk, void* *host, size_t size)
2288	{
2289	bdrv_register_buf(blk_bs(blk), host, size);
2290	}
2291
2292	void blk_unregister_buf(BlockBackend blk, void* *host)
2293	{
2294	bdrv_unregister_buf(blk_bs(blk), host);
2295	}
2296
2297	int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
2298	BlockBackend *blk_out, int64_t off_out,
2299	int bytes, BdrvRequestFlags read_flags,
2300	BdrvRequestFlags write_flags)
2301	{
2302	int r;
2303	r = blk_check_byte_request(blk_in, off_in, bytes);
2304	if (r) {
2305	return r;
2306	}
2307	r = blk_check_byte_request(blk_out, off_out, bytes);
2308	if (r) {
2309	return r;
2310	}
2311	return bdrv_co_copy_range(blk_in->root, off_in,
2312	blk_out->root, off_out,
2313	bytes, read_flags, write_flags);
2314	}
2315
2316	const BdrvChild blk_root(BlockBackend blk)
2317	{
2318	return blk->root;
2319	}
2320

Browse the source code of qemu/block/block-backend.c