1 | /* |
2 | * QEMU System Emulator block driver |
3 | * |
4 | * Copyright (c) 2003 Fabrice Bellard |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | * of this software and associated documentation files (the "Software"), to deal |
8 | * in the Software without restriction, including without limitation the rights |
9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
10 | * copies of the Software, and to permit persons to whom the Software is |
11 | * furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be included in |
14 | * all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
22 | * THE SOFTWARE. |
23 | */ |
24 | #ifndef BLOCK_INT_H |
25 | #define BLOCK_INT_H |
26 | |
27 | #include "block/accounting.h" |
28 | #include "block/block.h" |
29 | #include "block/aio-wait.h" |
30 | #include "qemu/queue.h" |
31 | #include "qemu/coroutine.h" |
32 | #include "qemu/stats64.h" |
33 | #include "qemu/timer.h" |
34 | #include "qemu/hbitmap.h" |
35 | #include "block/snapshot.h" |
36 | #include "qemu/throttle.h" |
37 | |
38 | #define BLOCK_FLAG_LAZY_REFCOUNTS 8 |
39 | |
40 | #define BLOCK_OPT_SIZE "size" |
41 | #define BLOCK_OPT_ENCRYPT "encryption" |
42 | #define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format" |
43 | #define BLOCK_OPT_COMPAT6 "compat6" |
44 | #define BLOCK_OPT_HWVERSION "hwversion" |
45 | #define BLOCK_OPT_BACKING_FILE "backing_file" |
46 | #define BLOCK_OPT_BACKING_FMT "backing_fmt" |
47 | #define BLOCK_OPT_CLUSTER_SIZE "cluster_size" |
48 | #define BLOCK_OPT_TABLE_SIZE "table_size" |
49 | #define BLOCK_OPT_PREALLOC "preallocation" |
50 | #define BLOCK_OPT_SUBFMT "subformat" |
51 | #define BLOCK_OPT_COMPAT_LEVEL "compat" |
52 | #define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" |
53 | #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" |
54 | #define BLOCK_OPT_REDUNDANCY "redundancy" |
55 | #define BLOCK_OPT_NOCOW "nocow" |
56 | #define BLOCK_OPT_OBJECT_SIZE "object_size" |
57 | #define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" |
58 | #define BLOCK_OPT_DATA_FILE "data_file" |
59 | #define BLOCK_OPT_DATA_FILE_RAW "data_file_raw" |
60 | |
61 | #define BLOCK_PROBE_BUF_SIZE 512 |
62 | |
63 | enum BdrvTrackedRequestType { |
64 | BDRV_TRACKED_READ, |
65 | BDRV_TRACKED_WRITE, |
66 | BDRV_TRACKED_DISCARD, |
67 | BDRV_TRACKED_TRUNCATE, |
68 | }; |
69 | |
70 | typedef struct BdrvTrackedRequest { |
71 | BlockDriverState *bs; |
72 | int64_t offset; |
73 | uint64_t bytes; |
74 | enum BdrvTrackedRequestType type; |
75 | |
76 | bool serialising; |
77 | int64_t overlap_offset; |
78 | uint64_t overlap_bytes; |
79 | |
80 | QLIST_ENTRY(BdrvTrackedRequest) list; |
81 | Coroutine *co; /* owner, used for deadlock detection */ |
82 | CoQueue wait_queue; /* coroutines blocked on this request */ |
83 | |
84 | struct BdrvTrackedRequest *waiting_for; |
85 | } BdrvTrackedRequest; |
86 | |
87 | struct BlockDriver { |
88 | const char *format_name; |
89 | int instance_size; |
90 | |
91 | /* set to true if the BlockDriver is a block filter. Block filters pass |
92 | * certain callbacks that refer to data (see block.c) to their bs->file if |
93 | * the driver doesn't implement them. Drivers that do not wish to forward |
94 | * must implement them and return -ENOTSUP. |
95 | */ |
96 | bool is_filter; |
97 | /* for snapshots block filter like Quorum can implement the |
98 | * following recursive callback. |
99 | * It's purpose is to recurse on the filter children while calling |
100 | * bdrv_recurse_is_first_non_filter on them. |
101 | * For a sample implementation look in the future Quorum block filter. |
102 | */ |
103 | bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs, |
104 | BlockDriverState *candidate); |
105 | |
106 | int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); |
107 | int (*bdrv_probe_device)(const char *filename); |
108 | |
109 | /* Any driver implementing this callback is expected to be able to handle |
110 | * NULL file names in its .bdrv_open() implementation */ |
111 | void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp); |
112 | /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have |
113 | * this field set to true, except ones that are defined only by their |
114 | * child's bs. |
115 | * An example of the last type will be the quorum block driver. |
116 | */ |
117 | bool bdrv_needs_filename; |
118 | |
119 | /* Set if a driver can support backing files */ |
120 | bool supports_backing; |
121 | |
122 | /* For handling image reopen for split or non-split files */ |
123 | int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, |
124 | BlockReopenQueue *queue, Error **errp); |
125 | void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); |
126 | void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); |
127 | void (*bdrv_join_options)(QDict *options, QDict *old_options); |
128 | |
129 | int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags, |
130 | Error **errp); |
131 | |
132 | /* Protocol drivers should implement this instead of bdrv_open */ |
133 | int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, |
134 | Error **errp); |
135 | void (*bdrv_close)(BlockDriverState *bs); |
136 | int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, |
137 | Error **errp); |
138 | int coroutine_fn (*bdrv_co_create_opts)(const char *filename, |
139 | QemuOpts *opts, |
140 | Error **errp); |
141 | int (*bdrv_make_empty)(BlockDriverState *bs); |
142 | |
143 | /* |
144 | * Refreshes the bs->exact_filename field. If that is impossible, |
145 | * bs->exact_filename has to be left empty. |
146 | */ |
147 | void (*bdrv_refresh_filename)(BlockDriverState *bs); |
148 | |
149 | /* |
150 | * Gathers the open options for all children into @target. |
151 | * A simple format driver (without backing file support) might |
152 | * implement this function like this: |
153 | * |
154 | * QINCREF(bs->file->bs->full_open_options); |
155 | * qdict_put(target, "file", bs->file->bs->full_open_options); |
156 | * |
157 | * If not specified, the generic implementation will simply put |
158 | * all children's options under their respective name. |
159 | * |
160 | * @backing_overridden is true when bs->backing seems not to be |
161 | * the child that would result from opening bs->backing_file. |
162 | * Therefore, if it is true, the backing child's options should be |
163 | * gathered; otherwise, there is no need since the backing child |
164 | * is the one implied by the image header. |
165 | * |
166 | * Note that ideally this function would not be needed. Every |
167 | * block driver which implements it is probably doing something |
168 | * shady regarding its runtime option structure. |
169 | */ |
170 | void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target, |
171 | bool backing_overridden); |
172 | |
173 | /* |
174 | * Returns an allocated string which is the directory name of this BDS: It |
175 | * will be used to make relative filenames absolute by prepending this |
176 | * function's return value to them. |
177 | */ |
178 | char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp); |
179 | |
180 | /* aio */ |
181 | BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs, |
182 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags, |
183 | BlockCompletionFunc *cb, void *opaque); |
184 | BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs, |
185 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags, |
186 | BlockCompletionFunc *cb, void *opaque); |
187 | BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, |
188 | BlockCompletionFunc *cb, void *opaque); |
189 | BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs, |
190 | int64_t offset, int bytes, |
191 | BlockCompletionFunc *cb, void *opaque); |
192 | |
193 | int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, |
194 | int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); |
195 | |
196 | /** |
197 | * @offset: position in bytes to read at |
198 | * @bytes: number of bytes to read |
199 | * @qiov: the buffers to fill with read data |
200 | * @flags: currently unused, always 0 |
201 | * |
202 | * @offset and @bytes will be a multiple of 'request_alignment', |
203 | * but the length of individual @qiov elements does not have to |
204 | * be a multiple. |
205 | * |
206 | * @bytes will always equal the total size of @qiov, and will be |
207 | * no larger than 'max_transfer'. |
208 | * |
209 | * The buffer in @qiov may point directly to guest memory. |
210 | */ |
211 | int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs, |
212 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); |
213 | int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs, |
214 | uint64_t offset, uint64_t bytes, |
215 | QEMUIOVector *qiov, size_t qiov_offset, int flags); |
216 | int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, |
217 | int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags); |
218 | /** |
219 | * @offset: position in bytes to write at |
220 | * @bytes: number of bytes to write |
221 | * @qiov: the buffers containing data to write |
222 | * @flags: zero or more bits allowed by 'supported_write_flags' |
223 | * |
224 | * @offset and @bytes will be a multiple of 'request_alignment', |
225 | * but the length of individual @qiov elements does not have to |
226 | * be a multiple. |
227 | * |
228 | * @bytes will always equal the total size of @qiov, and will be |
229 | * no larger than 'max_transfer'. |
230 | * |
231 | * The buffer in @qiov may point directly to guest memory. |
232 | */ |
233 | int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs, |
234 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); |
235 | int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs, |
236 | uint64_t offset, uint64_t bytes, |
237 | QEMUIOVector *qiov, size_t qiov_offset, int flags); |
238 | |
239 | /* |
240 | * Efficiently zero a region of the disk image. Typically an image format |
241 | * would use a compact metadata representation to implement this. This |
242 | * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev() |
243 | * will be called instead. |
244 | */ |
245 | int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs, |
246 | int64_t offset, int bytes, BdrvRequestFlags flags); |
247 | int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, |
248 | int64_t offset, int bytes); |
249 | |
250 | /* Map [offset, offset + nbytes) range onto a child of @bs to copy from, |
251 | * and invoke bdrv_co_copy_range_from(child, ...), or invoke |
252 | * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from. |
253 | * |
254 | * See the comment of bdrv_co_copy_range for the parameter and return value |
255 | * semantics. |
256 | */ |
257 | int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs, |
258 | BdrvChild *src, |
259 | uint64_t offset, |
260 | BdrvChild *dst, |
261 | uint64_t dst_offset, |
262 | uint64_t bytes, |
263 | BdrvRequestFlags read_flags, |
264 | BdrvRequestFlags write_flags); |
265 | |
266 | /* Map [offset, offset + nbytes) range onto a child of bs to copy data to, |
267 | * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy |
268 | * operation if @bs is the leaf and @src has the same BlockDriver. Return |
269 | * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver. |
270 | * |
271 | * See the comment of bdrv_co_copy_range for the parameter and return value |
272 | * semantics. |
273 | */ |
274 | int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs, |
275 | BdrvChild *src, |
276 | uint64_t src_offset, |
277 | BdrvChild *dst, |
278 | uint64_t dst_offset, |
279 | uint64_t bytes, |
280 | BdrvRequestFlags read_flags, |
281 | BdrvRequestFlags write_flags); |
282 | |
283 | /* |
284 | * Building block for bdrv_block_status[_above] and |
285 | * bdrv_is_allocated[_above]. The driver should answer only |
286 | * according to the current layer, and should only need to set |
287 | * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, |
288 | * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing |
289 | * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See |
290 | * block.h for the overall meaning of the bits. As a hint, the |
291 | * flag want_zero is true if the caller cares more about precise |
292 | * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for |
293 | * overall allocation (favor larger *pnum, perhaps by reporting |
294 | * _DATA instead of _ZERO). The block layer guarantees input |
295 | * clamped to bdrv_getlength() and aligned to request_alignment, |
296 | * as well as non-NULL pnum, map, and file; in turn, the driver |
297 | * must return an error or set pnum to an aligned non-zero value. |
298 | */ |
299 | int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, |
300 | bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, |
301 | int64_t *map, BlockDriverState **file); |
302 | |
303 | /* |
304 | * Invalidate any cached meta-data. |
305 | */ |
306 | void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs, |
307 | Error **errp); |
308 | int (*bdrv_inactivate)(BlockDriverState *bs); |
309 | |
310 | /* |
311 | * Flushes all data for all layers by calling bdrv_co_flush for underlying |
312 | * layers, if needed. This function is needed for deterministic |
313 | * synchronization of the flush finishing callback. |
314 | */ |
315 | int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); |
316 | |
317 | /* |
318 | * Flushes all data that was already written to the OS all the way down to |
319 | * the disk (for example file-posix.c calls fsync()). |
320 | */ |
321 | int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); |
322 | |
323 | /* |
324 | * Flushes all internal caches to the OS. The data may still sit in a |
325 | * writeback cache of the host OS, but it will survive a crash of the qemu |
326 | * process. |
327 | */ |
328 | int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs); |
329 | |
330 | /* |
331 | * Drivers setting this field must be able to work with just a plain |
332 | * filename with '<protocol_name>:' as a prefix, and no other options. |
333 | * Options may be extracted from the filename by implementing |
334 | * bdrv_parse_filename. |
335 | */ |
336 | const char *protocol_name; |
337 | int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, |
338 | PreallocMode prealloc, Error **errp); |
339 | |
340 | int64_t (*bdrv_getlength)(BlockDriverState *bs); |
341 | bool has_variable_length; |
342 | int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs); |
343 | BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs, |
344 | Error **errp); |
345 | |
346 | int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs, |
347 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov); |
348 | int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs, |
349 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, |
350 | size_t qiov_offset); |
351 | |
352 | int (*bdrv_snapshot_create)(BlockDriverState *bs, |
353 | QEMUSnapshotInfo *sn_info); |
354 | int (*bdrv_snapshot_goto)(BlockDriverState *bs, |
355 | const char *snapshot_id); |
356 | int (*bdrv_snapshot_delete)(BlockDriverState *bs, |
357 | const char *snapshot_id, |
358 | const char *name, |
359 | Error **errp); |
360 | int (*bdrv_snapshot_list)(BlockDriverState *bs, |
361 | QEMUSnapshotInfo **psn_info); |
362 | int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, |
363 | const char *snapshot_id, |
364 | const char *name, |
365 | Error **errp); |
366 | int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); |
367 | ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs, |
368 | Error **errp); |
369 | |
370 | int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, |
371 | QEMUIOVector *qiov, |
372 | int64_t pos); |
373 | int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, |
374 | QEMUIOVector *qiov, |
375 | int64_t pos); |
376 | |
377 | int (*bdrv_change_backing_file)(BlockDriverState *bs, |
378 | const char *backing_file, const char *backing_fmt); |
379 | |
380 | /* removable device specific */ |
381 | bool (*bdrv_is_inserted)(BlockDriverState *bs); |
382 | void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); |
383 | void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); |
384 | |
385 | /* to control generic scsi devices */ |
386 | BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, |
387 | unsigned long int req, void *buf, |
388 | BlockCompletionFunc *cb, void *opaque); |
389 | int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs, |
390 | unsigned long int req, void *buf); |
391 | |
392 | /* List of options for creating images, terminated by name == NULL */ |
393 | QemuOptsList *create_opts; |
394 | /* |
395 | * If this driver supports reopening images this contains a |
396 | * NULL-terminated list of the runtime options that can be |
397 | * modified. If an option in this list is unspecified during |
398 | * reopen then it _must_ be reset to its default value or return |
399 | * an error. |
400 | */ |
401 | const char *const *mutable_opts; |
402 | |
403 | /* |
404 | * Returns 0 for completed check, -errno for internal errors. |
405 | * The check results are stored in result. |
406 | */ |
407 | int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs, |
408 | BdrvCheckResult *result, |
409 | BdrvCheckMode fix); |
410 | |
411 | int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts, |
412 | BlockDriverAmendStatusCB *status_cb, |
413 | void *cb_opaque, |
414 | Error **errp); |
415 | |
416 | void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event); |
417 | |
418 | /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ |
419 | int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, |
420 | const char *tag); |
421 | int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs, |
422 | const char *tag); |
423 | int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); |
424 | bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); |
425 | |
426 | void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp); |
427 | |
428 | /* |
429 | * Returns 1 if newly created images are guaranteed to contain only |
430 | * zeros, 0 otherwise. |
431 | * Must return 0 if .bdrv_has_zero_init_truncate() returns 0. |
432 | */ |
433 | int (*bdrv_has_zero_init)(BlockDriverState *bs); |
434 | |
435 | /* |
436 | * Returns 1 if new areas added by growing the image with |
437 | * PREALLOC_MODE_OFF contain only zeros, 0 otherwise. |
438 | */ |
439 | int (*bdrv_has_zero_init_truncate)(BlockDriverState *bs); |
440 | |
441 | /* Remove fd handlers, timers, and other event loop callbacks so the event |
442 | * loop is no longer in use. Called with no in-flight requests and in |
443 | * depth-first traversal order with parents before child nodes. |
444 | */ |
445 | void (*bdrv_detach_aio_context)(BlockDriverState *bs); |
446 | |
447 | /* Add fd handlers, timers, and other event loop callbacks so I/O requests |
448 | * can be processed again. Called with no in-flight requests and in |
449 | * depth-first traversal order with child nodes before parent nodes. |
450 | */ |
451 | void (*bdrv_attach_aio_context)(BlockDriverState *bs, |
452 | AioContext *new_context); |
453 | |
454 | /* io queue for linux-aio */ |
455 | void (*bdrv_io_plug)(BlockDriverState *bs); |
456 | void (*bdrv_io_unplug)(BlockDriverState *bs); |
457 | |
458 | /** |
459 | * Try to get @bs's logical and physical block size. |
460 | * On success, store them in @bsz and return zero. |
461 | * On failure, return negative errno. |
462 | */ |
463 | int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz); |
464 | /** |
465 | * Try to get @bs's geometry (cyls, heads, sectors) |
466 | * On success, store them in @geo and return 0. |
467 | * On failure return -errno. |
468 | * Only drivers that want to override guest geometry implement this |
469 | * callback; see hd_geometry_guess(). |
470 | */ |
471 | int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); |
472 | |
473 | /** |
474 | * bdrv_co_drain_begin is called if implemented in the beginning of a |
475 | * drain operation to drain and stop any internal sources of requests in |
476 | * the driver. |
477 | * bdrv_co_drain_end is called if implemented at the end of the drain. |
478 | * |
479 | * They should be used by the driver to e.g. manage scheduled I/O |
480 | * requests, or toggle an internal state. After the end of the drain new |
481 | * requests will continue normally. |
482 | */ |
483 | void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); |
484 | void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); |
485 | |
486 | void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child, |
487 | Error **errp); |
488 | void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, |
489 | Error **errp); |
490 | |
491 | /** |
492 | * Informs the block driver that a permission change is intended. The |
493 | * driver checks whether the change is permissible and may take other |
494 | * preparations for the change (e.g. get file system locks). This operation |
495 | * is always followed either by a call to either .bdrv_set_perm or |
496 | * .bdrv_abort_perm_update. |
497 | * |
498 | * Checks whether the requested set of cumulative permissions in @perm |
499 | * can be granted for accessing @bs and whether no other users are using |
500 | * permissions other than those given in @shared (both arguments take |
501 | * BLK_PERM_* bitmasks). |
502 | * |
503 | * If both conditions are met, 0 is returned. Otherwise, -errno is returned |
504 | * and errp is set to an error describing the conflict. |
505 | */ |
506 | int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, |
507 | uint64_t shared, Error **errp); |
508 | |
509 | /** |
510 | * Called to inform the driver that the set of cumulative set of used |
511 | * permissions for @bs has changed to @perm, and the set of sharable |
512 | * permission to @shared. The driver can use this to propagate changes to |
513 | * its children (i.e. request permissions only if a parent actually needs |
514 | * them). |
515 | * |
516 | * This function is only invoked after bdrv_check_perm(), so block drivers |
517 | * may rely on preparations made in their .bdrv_check_perm implementation. |
518 | */ |
519 | void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared); |
520 | |
521 | /* |
522 | * Called to inform the driver that after a previous bdrv_check_perm() |
523 | * call, the permission update is not performed and any preparations made |
524 | * for it (e.g. taken file locks) need to be undone. |
525 | * |
526 | * This function can be called even for nodes that never saw a |
527 | * bdrv_check_perm() call. It is a no-op then. |
528 | */ |
529 | void (*bdrv_abort_perm_update)(BlockDriverState *bs); |
530 | |
531 | /** |
532 | * Returns in @nperm and @nshared the permissions that the driver for @bs |
533 | * needs on its child @c, based on the cumulative permissions requested by |
534 | * the parents in @parent_perm and @parent_shared. |
535 | * |
536 | * If @c is NULL, return the permissions for attaching a new child for the |
537 | * given @role. |
538 | * |
539 | * If @reopen_queue is non-NULL, don't return the currently needed |
540 | * permissions, but those that will be needed after applying the |
541 | * @reopen_queue. |
542 | */ |
543 | void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, |
544 | const BdrvChildRole *role, |
545 | BlockReopenQueue *reopen_queue, |
546 | uint64_t parent_perm, uint64_t parent_shared, |
547 | uint64_t *nperm, uint64_t *nshared); |
548 | |
549 | /** |
550 | * Bitmaps should be marked as 'IN_USE' in the image on reopening image |
551 | * as rw. This handler should realize it. It also should unset readonly |
552 | * field of BlockDirtyBitmap's in case of success. |
553 | */ |
554 | int (*bdrv_reopen_bitmaps_rw)(BlockDriverState *bs, Error **errp); |
555 | bool (*bdrv_can_store_new_dirty_bitmap)(BlockDriverState *bs, |
556 | const char *name, |
557 | uint32_t granularity, |
558 | Error **errp); |
559 | void (*bdrv_remove_persistent_dirty_bitmap)(BlockDriverState *bs, |
560 | const char *name, |
561 | Error **errp); |
562 | |
563 | /** |
564 | * Register/unregister a buffer for I/O. For example, when the driver is |
565 | * interested to know the memory areas that will later be used in iovs, so |
566 | * that it can do IOMMU mapping with VFIO etc., in order to get better |
567 | * performance. In the case of VFIO drivers, this callback is used to do |
568 | * DMA mapping for hot buffers. |
569 | */ |
570 | void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size); |
571 | void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host); |
572 | QLIST_ENTRY(BlockDriver) list; |
573 | |
574 | /* Pointer to a NULL-terminated array of names of strong options |
575 | * that can be specified for bdrv_open(). A strong option is one |
576 | * that changes the data of a BDS. |
577 | * If this pointer is NULL, the array is considered empty. |
578 | * "filename" and "driver" are always considered strong. */ |
579 | const char *const *strong_runtime_opts; |
580 | }; |
581 | |
582 | static inline bool block_driver_can_compress(BlockDriver *drv) |
583 | { |
584 | return drv->bdrv_co_pwritev_compressed || |
585 | drv->bdrv_co_pwritev_compressed_part; |
586 | } |
587 | |
588 | typedef struct BlockLimits { |
589 | /* Alignment requirement, in bytes, for offset/length of I/O |
590 | * requests. Must be a power of 2 less than INT_MAX; defaults to |
591 | * 1 for drivers with modern byte interfaces, and to 512 |
592 | * otherwise. */ |
593 | uint32_t request_alignment; |
594 | |
595 | /* Maximum number of bytes that can be discarded at once (since it |
596 | * is signed, it must be < 2G, if set). Must be multiple of |
597 | * pdiscard_alignment, but need not be power of 2. May be 0 if no |
598 | * inherent 32-bit limit */ |
599 | int32_t max_pdiscard; |
600 | |
601 | /* Optimal alignment for discard requests in bytes. A power of 2 |
602 | * is best but not mandatory. Must be a multiple of |
603 | * bl.request_alignment, and must be less than max_pdiscard if |
604 | * that is set. May be 0 if bl.request_alignment is good enough */ |
605 | uint32_t pdiscard_alignment; |
606 | |
607 | /* Maximum number of bytes that can zeroized at once (since it is |
608 | * signed, it must be < 2G, if set). Must be multiple of |
609 | * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */ |
610 | int32_t max_pwrite_zeroes; |
611 | |
612 | /* Optimal alignment for write zeroes requests in bytes. A power |
613 | * of 2 is best but not mandatory. Must be a multiple of |
614 | * bl.request_alignment, and must be less than max_pwrite_zeroes |
615 | * if that is set. May be 0 if bl.request_alignment is good |
616 | * enough */ |
617 | uint32_t pwrite_zeroes_alignment; |
618 | |
619 | /* Optimal transfer length in bytes. A power of 2 is best but not |
620 | * mandatory. Must be a multiple of bl.request_alignment, or 0 if |
621 | * no preferred size */ |
622 | uint32_t opt_transfer; |
623 | |
624 | /* Maximal transfer length in bytes. Need not be power of 2, but |
625 | * must be multiple of opt_transfer and bl.request_alignment, or 0 |
626 | * for no 32-bit limit. For now, anything larger than INT_MAX is |
627 | * clamped down. */ |
628 | uint32_t max_transfer; |
629 | |
630 | /* memory alignment, in bytes so that no bounce buffer is needed */ |
631 | size_t min_mem_alignment; |
632 | |
633 | /* memory alignment, in bytes, for bounce buffer */ |
634 | size_t opt_mem_alignment; |
635 | |
636 | /* maximum number of iovec elements */ |
637 | int max_iov; |
638 | } BlockLimits; |
639 | |
640 | typedef struct BdrvOpBlocker BdrvOpBlocker; |
641 | |
642 | typedef struct BdrvAioNotifier { |
643 | void (*attached_aio_context)(AioContext *new_context, void *opaque); |
644 | void (*detach_aio_context)(void *opaque); |
645 | |
646 | void *opaque; |
647 | bool deleted; |
648 | |
649 | QLIST_ENTRY(BdrvAioNotifier) list; |
650 | } BdrvAioNotifier; |
651 | |
652 | struct BdrvChildRole { |
653 | /* If true, bdrv_replace_node() doesn't change the node this BdrvChild |
654 | * points to. */ |
655 | bool stay_at_node; |
656 | |
657 | /* If true, the parent is a BlockDriverState and bdrv_next_all_states() |
658 | * will return it. This information is used for drain_all, where every node |
659 | * will be drained separately, so the drain only needs to be propagated to |
660 | * non-BDS parents. */ |
661 | bool parent_is_bds; |
662 | |
663 | void (*inherit_options)(int *child_flags, QDict *child_options, |
664 | int parent_flags, QDict *parent_options); |
665 | |
666 | void (*change_media)(BdrvChild *child, bool load); |
667 | void (*resize)(BdrvChild *child); |
668 | |
669 | /* Returns a name that is supposedly more useful for human users than the |
670 | * node name for identifying the node in question (in particular, a BB |
671 | * name), or NULL if the parent can't provide a better name. */ |
672 | const char *(*get_name)(BdrvChild *child); |
673 | |
674 | /* Returns a malloced string that describes the parent of the child for a |
675 | * human reader. This could be a node-name, BlockBackend name, qdev ID or |
676 | * QOM path of the device owning the BlockBackend, job type and ID etc. The |
677 | * caller is responsible for freeing the memory. */ |
678 | char *(*get_parent_desc)(BdrvChild *child); |
679 | |
680 | /* |
681 | * If this pair of functions is implemented, the parent doesn't issue new |
682 | * requests after returning from .drained_begin() until .drained_end() is |
683 | * called. |
684 | * |
685 | * These functions must not change the graph (and therefore also must not |
686 | * call aio_poll(), which could change the graph indirectly). |
687 | * |
688 | * If drained_end() schedules background operations, it must atomically |
689 | * increment *drained_end_counter for each such operation and atomically |
690 | * decrement it once the operation has settled. |
691 | * |
692 | * Note that this can be nested. If drained_begin() was called twice, new |
693 | * I/O is allowed only after drained_end() was called twice, too. |
694 | */ |
695 | void (*drained_begin)(BdrvChild *child); |
696 | void (*drained_end)(BdrvChild *child, int *drained_end_counter); |
697 | |
698 | /* |
699 | * Returns whether the parent has pending requests for the child. This |
700 | * callback is polled after .drained_begin() has been called until all |
701 | * activity on the child has stopped. |
702 | */ |
703 | bool (*drained_poll)(BdrvChild *child); |
704 | |
705 | /* Notifies the parent that the child has been activated/inactivated (e.g. |
706 | * when migration is completing) and it can start/stop requesting |
707 | * permissions and doing I/O on it. */ |
708 | void (*activate)(BdrvChild *child, Error **errp); |
709 | int (*inactivate)(BdrvChild *child); |
710 | |
711 | void (*attach)(BdrvChild *child); |
712 | void (*detach)(BdrvChild *child); |
713 | |
714 | /* Notifies the parent that the filename of its child has changed (e.g. |
715 | * because the direct child was removed from the backing chain), so that it |
716 | * can update its reference. */ |
717 | int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, |
718 | const char *filename, Error **errp); |
719 | |
720 | bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, |
721 | GSList **ignore, Error **errp); |
722 | void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); |
723 | }; |
724 | |
725 | extern const BdrvChildRole child_file; |
726 | extern const BdrvChildRole child_format; |
727 | extern const BdrvChildRole child_backing; |
728 | |
729 | struct BdrvChild { |
730 | BlockDriverState *bs; |
731 | char *name; |
732 | const BdrvChildRole *role; |
733 | void *opaque; |
734 | |
735 | /** |
736 | * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) |
737 | */ |
738 | uint64_t perm; |
739 | |
740 | /** |
741 | * Permissions that can still be granted to other users of @bs while this |
742 | * BdrvChild is still attached to it. (BLK_PERM_* bitmask) |
743 | */ |
744 | uint64_t shared_perm; |
745 | |
746 | /* backup of permissions during permission update procedure */ |
747 | bool has_backup_perm; |
748 | uint64_t backup_perm; |
749 | uint64_t backup_shared_perm; |
750 | |
751 | /* |
752 | * This link is frozen: the child can neither be replaced nor |
753 | * detached from the parent. |
754 | */ |
755 | bool frozen; |
756 | |
757 | /* |
758 | * How many times the parent of this child has been drained |
759 | * (through role->drained_*). |
760 | * Usually, this is equal to bs->quiesce_counter (potentially |
761 | * reduced by bdrv_drain_all_count). It may differ while the |
762 | * child is entering or leaving a drained section. |
763 | */ |
764 | int parent_quiesce_counter; |
765 | |
766 | QLIST_ENTRY(BdrvChild) next; |
767 | QLIST_ENTRY(BdrvChild) next_parent; |
768 | }; |
769 | |
770 | /* |
771 | * Note: the function bdrv_append() copies and swaps contents of |
772 | * BlockDriverStates, so if you add new fields to this struct, please |
773 | * inspect bdrv_append() to determine if the new fields need to be |
774 | * copied as well. |
775 | */ |
776 | struct BlockDriverState { |
777 | /* Protected by big QEMU lock or read-only after opening. No special |
778 | * locking needed during I/O... |
779 | */ |
780 | int open_flags; /* flags used to open the file, re-used for re-open */ |
781 | bool read_only; /* if true, the media is read only */ |
782 | bool encrypted; /* if true, the media is encrypted */ |
783 | bool sg; /* if true, the device is a /dev/sg* */ |
784 | bool probed; /* if true, format was probed rather than specified */ |
785 | bool force_share; /* if true, always allow all shared permissions */ |
786 | bool implicit; /* if true, this filter node was automatically inserted */ |
787 | |
788 | BlockDriver *drv; /* NULL means no media */ |
789 | void *opaque; |
790 | |
791 | AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ |
792 | /* long-running tasks intended to always use the same AioContext as this |
793 | * BDS may register themselves in this list to be notified of changes |
794 | * regarding this BDS's context */ |
795 | QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; |
796 | bool walking_aio_notifiers; /* to make removal during iteration safe */ |
797 | |
798 | char filename[PATH_MAX]; |
799 | char backing_file[PATH_MAX]; /* if non zero, the image is a diff of |
800 | this file image */ |
801 | /* The backing filename indicated by the image header; if we ever |
802 | * open this file, then this is replaced by the resulting BDS's |
803 | * filename (i.e. after a bdrv_refresh_filename() run). */ |
804 | char auto_backing_file[PATH_MAX]; |
805 | char backing_format[16]; /* if non-zero and backing_file exists */ |
806 | |
807 | QDict *full_open_options; |
808 | char exact_filename[PATH_MAX]; |
809 | |
810 | BdrvChild *backing; |
811 | BdrvChild *file; |
812 | |
813 | /* I/O Limits */ |
814 | BlockLimits bl; |
815 | |
816 | /* Flags honored during pwrite (so far: BDRV_REQ_FUA, |
817 | * BDRV_REQ_WRITE_UNCHANGED). |
818 | * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those |
819 | * writes will be issued as normal writes without the flag set. |
820 | * This is important to note for drivers that do not explicitly |
821 | * request a WRITE permission for their children and instead take |
822 | * the same permissions as their parent did (this is commonly what |
823 | * block filters do). Such drivers have to be aware that the |
824 | * parent may have taken a WRITE_UNCHANGED permission only and is |
825 | * issuing such requests. Drivers either must make sure that |
826 | * these requests do not result in plain WRITE accesses (usually |
827 | * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding |
828 | * every incoming write request as-is, including potentially that |
829 | * flag), or they have to explicitly take the WRITE permission for |
830 | * their children. */ |
831 | unsigned int supported_write_flags; |
832 | /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, |
833 | * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */ |
834 | unsigned int supported_zero_flags; |
835 | |
836 | /* the following member gives a name to every node on the bs graph. */ |
837 | char node_name[32]; |
838 | /* element of the list of named nodes building the graph */ |
839 | QTAILQ_ENTRY(BlockDriverState) node_list; |
840 | /* element of the list of all BlockDriverStates (all_bdrv_states) */ |
841 | QTAILQ_ENTRY(BlockDriverState) bs_list; |
842 | /* element of the list of monitor-owned BDS */ |
843 | QTAILQ_ENTRY(BlockDriverState) monitor_list; |
844 | int refcnt; |
845 | |
846 | /* operation blockers */ |
847 | QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX]; |
848 | |
849 | /* The node that this node inherited default options from (and a reopen on |
850 | * which can affect this node by changing these defaults). This is always a |
851 | * parent node of this node. */ |
852 | BlockDriverState *inherits_from; |
853 | QLIST_HEAD(, BdrvChild) children; |
854 | QLIST_HEAD(, BdrvChild) parents; |
855 | |
856 | QDict *options; |
857 | QDict *explicit_options; |
858 | BlockdevDetectZeroesOptions detect_zeroes; |
859 | |
860 | /* The error object in use for blocking operations on backing_hd */ |
861 | Error *backing_blocker; |
862 | |
863 | /* Protected by AioContext lock */ |
864 | |
865 | /* If we are reading a disk image, give its size in sectors. |
866 | * Generally read-only; it is written to by load_snapshot and |
867 | * save_snaphost, but the block layer is quiescent during those. |
868 | */ |
869 | int64_t total_sectors; |
870 | |
871 | /* Callback before write request is processed */ |
872 | NotifierWithReturnList before_write_notifiers; |
873 | |
874 | /* threshold limit for writes, in bytes. "High water mark". */ |
875 | uint64_t write_threshold_offset; |
876 | NotifierWithReturn write_threshold_notifier; |
877 | |
878 | /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. |
879 | * Reading from the list can be done with either the BQL or the |
880 | * dirty_bitmap_mutex. Modifying a bitmap only requires |
881 | * dirty_bitmap_mutex. */ |
882 | QemuMutex dirty_bitmap_mutex; |
883 | QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; |
884 | |
885 | /* Offset after the highest byte written to */ |
886 | Stat64 wr_highest_offset; |
887 | |
888 | /* If true, copy read backing sectors into image. Can be >1 if more |
889 | * than one client has requested copy-on-read. Accessed with atomic |
890 | * ops. |
891 | */ |
892 | int copy_on_read; |
893 | |
894 | /* number of in-flight requests; overall and serialising. |
895 | * Accessed with atomic ops. |
896 | */ |
897 | unsigned int in_flight; |
898 | unsigned int serialising_in_flight; |
899 | |
900 | /* counter for nested bdrv_io_plug. |
901 | * Accessed with atomic ops. |
902 | */ |
903 | unsigned io_plugged; |
904 | |
905 | /* do we need to tell the quest if we have a volatile write cache? */ |
906 | int enable_write_cache; |
907 | |
908 | /* Accessed with atomic ops. */ |
909 | int quiesce_counter; |
910 | int recursive_quiesce_counter; |
911 | |
912 | unsigned int write_gen; /* Current data generation */ |
913 | |
914 | /* Protected by reqs_lock. */ |
915 | CoMutex reqs_lock; |
916 | QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; |
917 | CoQueue flush_queue; /* Serializing flush queue */ |
918 | bool active_flush_req; /* Flush request in flight? */ |
919 | |
920 | /* Only read/written by whoever has set active_flush_req to true. */ |
921 | unsigned int flushed_gen; /* Flushed write generation */ |
922 | |
923 | /* BdrvChild links to this node may never be frozen */ |
924 | bool never_freeze; |
925 | }; |
926 | |
927 | struct BlockBackendRootState { |
928 | int open_flags; |
929 | bool read_only; |
930 | BlockdevDetectZeroesOptions detect_zeroes; |
931 | }; |
932 | |
933 | typedef enum BlockMirrorBackingMode { |
934 | /* Reuse the existing backing chain from the source for the target. |
935 | * - sync=full: Set backing BDS to NULL. |
936 | * - sync=top: Use source's backing BDS. |
937 | * - sync=none: Use source as the backing BDS. */ |
938 | MIRROR_SOURCE_BACKING_CHAIN, |
939 | |
940 | /* Open the target's backing chain completely anew */ |
941 | MIRROR_OPEN_BACKING_CHAIN, |
942 | |
943 | /* Do not change the target's backing BDS after job completion */ |
944 | MIRROR_LEAVE_BACKING_CHAIN, |
945 | } BlockMirrorBackingMode; |
946 | |
947 | static inline BlockDriverState *backing_bs(BlockDriverState *bs) |
948 | { |
949 | return bs->backing ? bs->backing->bs : NULL; |
950 | } |
951 | |
952 | |
953 | /* Essential block drivers which must always be statically linked into qemu, and |
954 | * which therefore can be accessed without using bdrv_find_format() */ |
955 | extern BlockDriver bdrv_file; |
956 | extern BlockDriver bdrv_raw; |
957 | extern BlockDriver bdrv_qcow2; |
958 | |
959 | int coroutine_fn bdrv_co_preadv(BdrvChild *child, |
960 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, |
961 | BdrvRequestFlags flags); |
962 | int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, |
963 | int64_t offset, unsigned int bytes, |
964 | QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); |
965 | int coroutine_fn bdrv_co_pwritev(BdrvChild *child, |
966 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, |
967 | BdrvRequestFlags flags); |
968 | int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, |
969 | int64_t offset, unsigned int bytes, |
970 | QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); |
971 | |
972 | static inline int coroutine_fn bdrv_co_pread(BdrvChild *child, |
973 | int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) |
974 | { |
975 | QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); |
976 | |
977 | return bdrv_co_preadv(child, offset, bytes, &qiov, flags); |
978 | } |
979 | |
980 | static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child, |
981 | int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) |
982 | { |
983 | QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); |
984 | |
985 | return bdrv_co_pwritev(child, offset, bytes, &qiov, flags); |
986 | } |
987 | |
988 | extern unsigned int bdrv_drain_all_count; |
989 | void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); |
990 | void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); |
991 | |
992 | int get_tmp_filename(char *filename, int size); |
993 | BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, |
994 | const char *filename); |
995 | |
996 | void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, |
997 | QDict *options); |
998 | |
999 | |
1000 | /** |
1001 | * bdrv_add_before_write_notifier: |
1002 | * |
1003 | * Register a callback that is invoked before write requests are processed but |
1004 | * after any throttling or waiting for overlapping requests. |
1005 | */ |
1006 | void bdrv_add_before_write_notifier(BlockDriverState *bs, |
1007 | NotifierWithReturn *notifier); |
1008 | |
1009 | /** |
1010 | * bdrv_add_aio_context_notifier: |
1011 | * |
1012 | * If a long-running job intends to be always run in the same AioContext as a |
1013 | * certain BDS, it may use this function to be notified of changes regarding the |
1014 | * association of the BDS to an AioContext. |
1015 | * |
1016 | * attached_aio_context() is called after the target BDS has been attached to a |
1017 | * new AioContext; detach_aio_context() is called before the target BDS is being |
1018 | * detached from its old AioContext. |
1019 | */ |
1020 | void bdrv_add_aio_context_notifier(BlockDriverState *bs, |
1021 | void (*attached_aio_context)(AioContext *new_context, void *opaque), |
1022 | void (*detach_aio_context)(void *opaque), void *opaque); |
1023 | |
1024 | /** |
1025 | * bdrv_remove_aio_context_notifier: |
1026 | * |
1027 | * Unsubscribe of change notifications regarding the BDS's AioContext. The |
1028 | * parameters given here have to be the same as those given to |
1029 | * bdrv_add_aio_context_notifier(). |
1030 | */ |
1031 | void bdrv_remove_aio_context_notifier(BlockDriverState *bs, |
1032 | void (*aio_context_attached)(AioContext *, |
1033 | void *), |
1034 | void (*aio_context_detached)(void *), |
1035 | void *opaque); |
1036 | |
1037 | /** |
1038 | * bdrv_wakeup: |
1039 | * @bs: The BlockDriverState for which an I/O operation has been completed. |
1040 | * |
1041 | * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During |
1042 | * synchronous I/O on a BlockDriverState that is attached to another |
1043 | * I/O thread, the main thread lets the I/O thread's event loop run, |
1044 | * waiting for the I/O operation to complete. A bdrv_wakeup will wake |
1045 | * up the main thread if necessary. |
1046 | * |
1047 | * Manual calls to bdrv_wakeup are rarely necessary, because |
1048 | * bdrv_dec_in_flight already calls it. |
1049 | */ |
1050 | void bdrv_wakeup(BlockDriverState *bs); |
1051 | |
1052 | #ifdef _WIN32 |
1053 | int is_windows_drive(const char *filename); |
1054 | #endif |
1055 | |
1056 | /** |
1057 | * stream_start: |
1058 | * @job_id: The id of the newly-created job, or %NULL to use the |
1059 | * device name of @bs. |
1060 | * @bs: Block device to operate on. |
1061 | * @base: Block device that will become the new base, or %NULL to |
1062 | * flatten the whole backing file chain onto @bs. |
1063 | * @backing_file_str: The file name that will be written to @bs as the |
1064 | * the new backing file if the job completes. Ignored if @base is %NULL. |
1065 | * @creation_flags: Flags that control the behavior of the Job lifetime. |
1066 | * See @BlockJobCreateFlags |
1067 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
1068 | * @on_error: The action to take upon error. |
1069 | * @errp: Error object. |
1070 | * |
1071 | * Start a streaming operation on @bs. Clusters that are unallocated |
1072 | * in @bs, but allocated in any image between @base and @bs (both |
1073 | * exclusive) will be written to @bs. At the end of a successful |
1074 | * streaming job, the backing file of @bs will be changed to |
1075 | * @backing_file_str in the written image and to @base in the live |
1076 | * BlockDriverState. |
1077 | */ |
1078 | void stream_start(const char *job_id, BlockDriverState *bs, |
1079 | BlockDriverState *base, const char *backing_file_str, |
1080 | int creation_flags, int64_t speed, |
1081 | BlockdevOnError on_error, Error **errp); |
1082 | |
1083 | /** |
1084 | * commit_start: |
1085 | * @job_id: The id of the newly-created job, or %NULL to use the |
1086 | * device name of @bs. |
1087 | * @bs: Active block device. |
1088 | * @top: Top block device to be committed. |
1089 | * @base: Block device that will be written into, and become the new top. |
1090 | * @creation_flags: Flags that control the behavior of the Job lifetime. |
1091 | * See @BlockJobCreateFlags |
1092 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
1093 | * @on_error: The action to take upon error. |
1094 | * @backing_file_str: String to use as the backing file in @top's overlay |
1095 | * @filter_node_name: The node name that should be assigned to the filter |
1096 | * driver that the commit job inserts into the graph above @top. NULL means |
1097 | * that a node name should be autogenerated. |
1098 | * @errp: Error object. |
1099 | * |
1100 | */ |
1101 | void commit_start(const char *job_id, BlockDriverState *bs, |
1102 | BlockDriverState *base, BlockDriverState *top, |
1103 | int creation_flags, int64_t speed, |
1104 | BlockdevOnError on_error, const char *backing_file_str, |
1105 | const char *filter_node_name, Error **errp); |
1106 | /** |
1107 | * commit_active_start: |
1108 | * @job_id: The id of the newly-created job, or %NULL to use the |
1109 | * device name of @bs. |
1110 | * @bs: Active block device to be committed. |
1111 | * @base: Block device that will be written into, and become the new top. |
1112 | * @creation_flags: Flags that control the behavior of the Job lifetime. |
1113 | * See @BlockJobCreateFlags |
1114 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
1115 | * @on_error: The action to take upon error. |
1116 | * @filter_node_name: The node name that should be assigned to the filter |
1117 | * driver that the commit job inserts into the graph above @bs. NULL means that |
1118 | * a node name should be autogenerated. |
1119 | * @cb: Completion function for the job. |
1120 | * @opaque: Opaque pointer value passed to @cb. |
1121 | * @auto_complete: Auto complete the job. |
1122 | * @errp: Error object. |
1123 | * |
1124 | */ |
1125 | BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, |
1126 | BlockDriverState *base, int creation_flags, |
1127 | int64_t speed, BlockdevOnError on_error, |
1128 | const char *filter_node_name, |
1129 | BlockCompletionFunc *cb, void *opaque, |
1130 | bool auto_complete, Error **errp); |
1131 | /* |
1132 | * mirror_start: |
1133 | * @job_id: The id of the newly-created job, or %NULL to use the |
1134 | * device name of @bs. |
1135 | * @bs: Block device to operate on. |
1136 | * @target: Block device to write to. |
1137 | * @replaces: Block graph node name to replace once the mirror is done. Can |
1138 | * only be used when full mirroring is selected. |
1139 | * @creation_flags: Flags that control the behavior of the Job lifetime. |
1140 | * See @BlockJobCreateFlags |
1141 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
1142 | * @granularity: The chosen granularity for the dirty bitmap. |
1143 | * @buf_size: The amount of data that can be in flight at one time. |
1144 | * @mode: Whether to collapse all images in the chain to the target. |
1145 | * @backing_mode: How to establish the target's backing chain after completion. |
1146 | * @zero_target: Whether the target should be explicitly zero-initialized |
1147 | * @on_source_error: The action to take upon error reading from the source. |
1148 | * @on_target_error: The action to take upon error writing to the target. |
1149 | * @unmap: Whether to unmap target where source sectors only contain zeroes. |
1150 | * @filter_node_name: The node name that should be assigned to the filter |
1151 | * driver that the mirror job inserts into the graph above @bs. NULL means that |
1152 | * a node name should be autogenerated. |
1153 | * @copy_mode: When to trigger writes to the target. |
1154 | * @errp: Error object. |
1155 | * |
1156 | * Start a mirroring operation on @bs. Clusters that are allocated |
1157 | * in @bs will be written to @target until the job is cancelled or |
1158 | * manually completed. At the end of a successful mirroring job, |
1159 | * @bs will be switched to read from @target. |
1160 | */ |
1161 | void mirror_start(const char *job_id, BlockDriverState *bs, |
1162 | BlockDriverState *target, const char *replaces, |
1163 | int creation_flags, int64_t speed, |
1164 | uint32_t granularity, int64_t buf_size, |
1165 | MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, |
1166 | bool zero_target, |
1167 | BlockdevOnError on_source_error, |
1168 | BlockdevOnError on_target_error, |
1169 | bool unmap, const char *filter_node_name, |
1170 | MirrorCopyMode copy_mode, Error **errp); |
1171 | |
1172 | /* |
1173 | * backup_job_create: |
1174 | * @job_id: The id of the newly-created job, or %NULL to use the |
1175 | * device name of @bs. |
1176 | * @bs: Block device to operate on. |
1177 | * @target: Block device to write to. |
1178 | * @speed: The maximum speed, in bytes per second, or 0 for unlimited. |
1179 | * @sync_mode: What parts of the disk image should be copied to the destination. |
1180 | * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental' |
1181 | * @bitmap_mode: The bitmap synchronization policy to use. |
1182 | * @on_source_error: The action to take upon error reading from the source. |
1183 | * @on_target_error: The action to take upon error writing to the target. |
1184 | * @creation_flags: Flags that control the behavior of the Job lifetime. |
1185 | * See @BlockJobCreateFlags |
1186 | * @cb: Completion function for the job. |
1187 | * @opaque: Opaque pointer value passed to @cb. |
1188 | * @txn: Transaction that this job is part of (may be NULL). |
1189 | * |
1190 | * Create a backup operation on @bs. Clusters in @bs are written to @target |
1191 | * until the job is cancelled or manually completed. |
1192 | */ |
1193 | BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, |
1194 | BlockDriverState *target, int64_t speed, |
1195 | MirrorSyncMode sync_mode, |
1196 | BdrvDirtyBitmap *sync_bitmap, |
1197 | BitmapSyncMode bitmap_mode, |
1198 | bool compress, |
1199 | BlockdevOnError on_source_error, |
1200 | BlockdevOnError on_target_error, |
1201 | int creation_flags, |
1202 | BlockCompletionFunc *cb, void *opaque, |
1203 | JobTxn *txn, Error **errp); |
1204 | |
1205 | void hmp_drive_add_node(Monitor *mon, const char *optstr); |
1206 | |
1207 | BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, |
1208 | const char *child_name, |
1209 | const BdrvChildRole *child_role, |
1210 | AioContext *ctx, |
1211 | uint64_t perm, uint64_t shared_perm, |
1212 | void *opaque, Error **errp); |
1213 | void bdrv_root_unref_child(BdrvChild *child); |
1214 | |
1215 | /** |
1216 | * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use |
1217 | * bdrv_child_refresh_perms() instead and make the parent's |
1218 | * .bdrv_child_perm() implementation return the correct values. |
1219 | */ |
1220 | int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, |
1221 | Error **errp); |
1222 | |
1223 | /** |
1224 | * Calls bs->drv->bdrv_child_perm() and updates the child's permission |
1225 | * masks with the result. |
1226 | * Drivers should invoke this function whenever an event occurs that |
1227 | * makes their .bdrv_child_perm() implementation return different |
1228 | * values than before, but which will not result in the block layer |
1229 | * automatically refreshing the permissions. |
1230 | */ |
1231 | int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp); |
1232 | |
1233 | /* Default implementation for BlockDriver.bdrv_child_perm() that can be used by |
1234 | * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to |
1235 | * all children */ |
1236 | void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, |
1237 | const BdrvChildRole *role, |
1238 | BlockReopenQueue *reopen_queue, |
1239 | uint64_t perm, uint64_t shared, |
1240 | uint64_t *nperm, uint64_t *nshared); |
1241 | |
1242 | /* Default implementation for BlockDriver.bdrv_child_perm() that can be used by |
1243 | * (non-raw) image formats: Like above for bs->backing, but for bs->file it |
1244 | * requires WRITE | RESIZE for read-write images, always requires |
1245 | * CONSISTENT_READ and doesn't share WRITE. */ |
1246 | void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, |
1247 | const BdrvChildRole *role, |
1248 | BlockReopenQueue *reopen_queue, |
1249 | uint64_t perm, uint64_t shared, |
1250 | uint64_t *nperm, uint64_t *nshared); |
1251 | |
1252 | /* |
1253 | * Default implementation for drivers to pass bdrv_co_block_status() to |
1254 | * their file. |
1255 | */ |
1256 | int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs, |
1257 | bool want_zero, |
1258 | int64_t offset, |
1259 | int64_t bytes, |
1260 | int64_t *pnum, |
1261 | int64_t *map, |
1262 | BlockDriverState **file); |
1263 | /* |
1264 | * Default implementation for drivers to pass bdrv_co_block_status() to |
1265 | * their backing file. |
1266 | */ |
1267 | int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs, |
1268 | bool want_zero, |
1269 | int64_t offset, |
1270 | int64_t bytes, |
1271 | int64_t *pnum, |
1272 | int64_t *map, |
1273 | BlockDriverState **file); |
1274 | const char *bdrv_get_parent_name(const BlockDriverState *bs); |
1275 | void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); |
1276 | bool blk_dev_has_removable_media(BlockBackend *blk); |
1277 | bool blk_dev_has_tray(BlockBackend *blk); |
1278 | void blk_dev_eject_request(BlockBackend *blk, bool force); |
1279 | bool blk_dev_is_tray_open(BlockBackend *blk); |
1280 | bool blk_dev_is_medium_locked(BlockBackend *blk); |
1281 | |
1282 | void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); |
1283 | |
1284 | void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); |
1285 | void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup); |
1286 | bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest, |
1287 | const BdrvDirtyBitmap *src, |
1288 | HBitmap **backup, bool lock); |
1289 | |
1290 | void bdrv_inc_in_flight(BlockDriverState *bs); |
1291 | void bdrv_dec_in_flight(BlockDriverState *bs); |
1292 | |
1293 | void blockdev_close_all_bdrv_states(void); |
1294 | |
1295 | int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset, |
1296 | BdrvChild *dst, uint64_t dst_offset, |
1297 | uint64_t bytes, |
1298 | BdrvRequestFlags read_flags, |
1299 | BdrvRequestFlags write_flags); |
1300 | int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, |
1301 | BdrvChild *dst, uint64_t dst_offset, |
1302 | uint64_t bytes, |
1303 | BdrvRequestFlags read_flags, |
1304 | BdrvRequestFlags write_flags); |
1305 | |
1306 | int refresh_total_sectors(BlockDriverState *bs, int64_t hint); |
1307 | |
1308 | #endif /* BLOCK_INT_H */ |
1309 | |