1 | /* BlockDriver implementation for "raw" format driver |
2 | * |
3 | * Copyright (C) 2010-2016 Red Hat, Inc. |
4 | * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com> |
5 | * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com> |
6 | * |
7 | * Author: |
8 | * Laszlo Ersek <lersek@redhat.com> |
9 | * |
10 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
11 | * of this software and associated documentation files (the "Software"), to |
12 | * deal in the Software without restriction, including without limitation the |
13 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
14 | * sell copies of the Software, and to permit persons to whom the Software is |
15 | * furnished to do so, subject to the following conditions: |
16 | * |
17 | * The above copyright notice and this permission notice shall be included in |
18 | * all copies or substantial portions of the Software. |
19 | * |
20 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
21 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
23 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
24 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
25 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
26 | * IN THE SOFTWARE. |
27 | */ |
28 | |
29 | #include "qemu/osdep.h" |
30 | #include "block/block_int.h" |
31 | #include "qapi/error.h" |
32 | #include "qemu/module.h" |
33 | #include "qemu/option.h" |
34 | |
35 | typedef struct BDRVRawState { |
36 | uint64_t offset; |
37 | uint64_t size; |
38 | bool has_size; |
39 | } BDRVRawState; |
40 | |
41 | static const char *const mutable_opts[] = { "offset" , "size" , NULL }; |
42 | |
43 | static QemuOptsList raw_runtime_opts = { |
44 | .name = "raw" , |
45 | .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head), |
46 | .desc = { |
47 | { |
48 | .name = "offset" , |
49 | .type = QEMU_OPT_SIZE, |
50 | .help = "offset in the disk where the image starts" , |
51 | }, |
52 | { |
53 | .name = "size" , |
54 | .type = QEMU_OPT_SIZE, |
55 | .help = "virtual disk size" , |
56 | }, |
57 | { /* end of list */ } |
58 | }, |
59 | }; |
60 | |
61 | static QemuOptsList raw_create_opts = { |
62 | .name = "raw-create-opts" , |
63 | .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head), |
64 | .desc = { |
65 | { |
66 | .name = BLOCK_OPT_SIZE, |
67 | .type = QEMU_OPT_SIZE, |
68 | .help = "Virtual disk size" |
69 | }, |
70 | { /* end of list */ } |
71 | } |
72 | }; |
73 | |
74 | static int raw_read_options(QDict *options, BlockDriverState *bs, |
75 | BDRVRawState *s, Error **errp) |
76 | { |
77 | Error *local_err = NULL; |
78 | QemuOpts *opts = NULL; |
79 | int64_t real_size = 0; |
80 | int ret; |
81 | |
82 | real_size = bdrv_getlength(bs->file->bs); |
83 | if (real_size < 0) { |
84 | error_setg_errno(errp, -real_size, "Could not get image size" ); |
85 | return real_size; |
86 | } |
87 | |
88 | opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); |
89 | qemu_opts_absorb_qdict(opts, options, &local_err); |
90 | if (local_err) { |
91 | error_propagate(errp, local_err); |
92 | ret = -EINVAL; |
93 | goto end; |
94 | } |
95 | |
96 | s->offset = qemu_opt_get_size(opts, "offset" , 0); |
97 | if (s->offset > real_size) { |
98 | error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than " |
99 | "size of the containing file (%" PRId64 ")" , |
100 | s->offset, real_size); |
101 | ret = -EINVAL; |
102 | goto end; |
103 | } |
104 | |
105 | if (qemu_opt_find(opts, "size" ) != NULL) { |
106 | s->size = qemu_opt_get_size(opts, "size" , 0); |
107 | s->has_size = true; |
108 | } else { |
109 | s->has_size = false; |
110 | s->size = real_size - s->offset; |
111 | } |
112 | |
113 | /* Check size and offset */ |
114 | if ((real_size - s->offset) < s->size) { |
115 | error_setg(errp, "The sum of offset (%" PRIu64 ") and size " |
116 | "(%" PRIu64 ") has to be smaller or equal to the " |
117 | " actual size of the containing file (%" PRId64 ")" , |
118 | s->offset, s->size, real_size); |
119 | ret = -EINVAL; |
120 | goto end; |
121 | } |
122 | |
123 | /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding |
124 | * up and leaking out of the specified area. */ |
125 | if (s->has_size && !QEMU_IS_ALIGNED(s->size, BDRV_SECTOR_SIZE)) { |
126 | error_setg(errp, "Specified size is not multiple of %llu" , |
127 | BDRV_SECTOR_SIZE); |
128 | ret = -EINVAL; |
129 | goto end; |
130 | } |
131 | |
132 | ret = 0; |
133 | |
134 | end: |
135 | |
136 | qemu_opts_del(opts); |
137 | |
138 | return ret; |
139 | } |
140 | |
141 | static int raw_reopen_prepare(BDRVReopenState *reopen_state, |
142 | BlockReopenQueue *queue, Error **errp) |
143 | { |
144 | assert(reopen_state != NULL); |
145 | assert(reopen_state->bs != NULL); |
146 | |
147 | reopen_state->opaque = g_new0(BDRVRawState, 1); |
148 | |
149 | return raw_read_options( |
150 | reopen_state->options, |
151 | reopen_state->bs, |
152 | reopen_state->opaque, |
153 | errp); |
154 | } |
155 | |
156 | static void raw_reopen_commit(BDRVReopenState *state) |
157 | { |
158 | BDRVRawState *new_s = state->opaque; |
159 | BDRVRawState *s = state->bs->opaque; |
160 | |
161 | memcpy(s, new_s, sizeof(BDRVRawState)); |
162 | |
163 | g_free(state->opaque); |
164 | state->opaque = NULL; |
165 | } |
166 | |
167 | static void raw_reopen_abort(BDRVReopenState *state) |
168 | { |
169 | g_free(state->opaque); |
170 | state->opaque = NULL; |
171 | } |
172 | |
173 | /* Check and adjust the offset, against 'offset' and 'size' options. */ |
174 | static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset, |
175 | uint64_t bytes, bool is_write) |
176 | { |
177 | BDRVRawState *s = bs->opaque; |
178 | |
179 | if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) { |
180 | /* There's not enough space for the write, or the read request is |
181 | * out-of-range. Don't read/write anything to prevent leaking out of |
182 | * the size specified in options. */ |
183 | return is_write ? -ENOSPC : -EINVAL; |
184 | } |
185 | |
186 | if (*offset > INT64_MAX - s->offset) { |
187 | return -EINVAL; |
188 | } |
189 | *offset += s->offset; |
190 | |
191 | return 0; |
192 | } |
193 | |
194 | static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, |
195 | uint64_t bytes, QEMUIOVector *qiov, |
196 | int flags) |
197 | { |
198 | int ret; |
199 | |
200 | ret = raw_adjust_offset(bs, &offset, bytes, false); |
201 | if (ret) { |
202 | return ret; |
203 | } |
204 | |
205 | BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); |
206 | return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); |
207 | } |
208 | |
209 | static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, |
210 | uint64_t bytes, QEMUIOVector *qiov, |
211 | int flags) |
212 | { |
213 | void *buf = NULL; |
214 | BlockDriver *drv; |
215 | QEMUIOVector local_qiov; |
216 | int ret; |
217 | |
218 | if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) { |
219 | /* Handling partial writes would be a pain - so we just |
220 | * require that guests have 512-byte request alignment if |
221 | * probing occurred */ |
222 | QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512); |
223 | QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512); |
224 | assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE); |
225 | |
226 | buf = qemu_try_blockalign(bs->file->bs, 512); |
227 | if (!buf) { |
228 | ret = -ENOMEM; |
229 | goto fail; |
230 | } |
231 | |
232 | ret = qemu_iovec_to_buf(qiov, 0, buf, 512); |
233 | if (ret != 512) { |
234 | ret = -EINVAL; |
235 | goto fail; |
236 | } |
237 | |
238 | drv = bdrv_probe_all(buf, 512, NULL); |
239 | if (drv != bs->drv) { |
240 | ret = -EPERM; |
241 | goto fail; |
242 | } |
243 | |
244 | /* Use the checked buffer, a malicious guest might be overwriting its |
245 | * original buffer in the background. */ |
246 | qemu_iovec_init(&local_qiov, qiov->niov + 1); |
247 | qemu_iovec_add(&local_qiov, buf, 512); |
248 | qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512); |
249 | qiov = &local_qiov; |
250 | } |
251 | |
252 | ret = raw_adjust_offset(bs, &offset, bytes, true); |
253 | if (ret) { |
254 | goto fail; |
255 | } |
256 | |
257 | BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); |
258 | ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); |
259 | |
260 | fail: |
261 | if (qiov == &local_qiov) { |
262 | qemu_iovec_destroy(&local_qiov); |
263 | } |
264 | qemu_vfree(buf); |
265 | return ret; |
266 | } |
267 | |
268 | static int coroutine_fn raw_co_block_status(BlockDriverState *bs, |
269 | bool want_zero, int64_t offset, |
270 | int64_t bytes, int64_t *pnum, |
271 | int64_t *map, |
272 | BlockDriverState **file) |
273 | { |
274 | BDRVRawState *s = bs->opaque; |
275 | *pnum = bytes; |
276 | *file = bs->file->bs; |
277 | *map = offset + s->offset; |
278 | return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; |
279 | } |
280 | |
281 | static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, |
282 | int64_t offset, int bytes, |
283 | BdrvRequestFlags flags) |
284 | { |
285 | int ret; |
286 | |
287 | ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); |
288 | if (ret) { |
289 | return ret; |
290 | } |
291 | return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); |
292 | } |
293 | |
294 | static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, |
295 | int64_t offset, int bytes) |
296 | { |
297 | int ret; |
298 | |
299 | ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true); |
300 | if (ret) { |
301 | return ret; |
302 | } |
303 | return bdrv_co_pdiscard(bs->file, offset, bytes); |
304 | } |
305 | |
306 | static int64_t raw_getlength(BlockDriverState *bs) |
307 | { |
308 | int64_t len; |
309 | BDRVRawState *s = bs->opaque; |
310 | |
311 | /* Update size. It should not change unless the file was externally |
312 | * modified. */ |
313 | len = bdrv_getlength(bs->file->bs); |
314 | if (len < 0) { |
315 | return len; |
316 | } |
317 | |
318 | if (len < s->offset) { |
319 | s->size = 0; |
320 | } else { |
321 | if (s->has_size) { |
322 | /* Try to honour the size */ |
323 | s->size = MIN(s->size, len - s->offset); |
324 | } else { |
325 | s->size = len - s->offset; |
326 | } |
327 | } |
328 | |
329 | return s->size; |
330 | } |
331 | |
332 | static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs, |
333 | Error **errp) |
334 | { |
335 | BlockMeasureInfo *info; |
336 | int64_t required; |
337 | |
338 | if (in_bs) { |
339 | required = bdrv_getlength(in_bs); |
340 | if (required < 0) { |
341 | error_setg_errno(errp, -required, "Unable to get image size" ); |
342 | return NULL; |
343 | } |
344 | } else { |
345 | required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), |
346 | BDRV_SECTOR_SIZE); |
347 | } |
348 | |
349 | info = g_new(BlockMeasureInfo, 1); |
350 | info->required = required; |
351 | |
352 | /* Unallocated sectors count towards the file size in raw images */ |
353 | info->fully_allocated = info->required; |
354 | return info; |
355 | } |
356 | |
357 | static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) |
358 | { |
359 | return bdrv_get_info(bs->file->bs, bdi); |
360 | } |
361 | |
362 | static void raw_refresh_limits(BlockDriverState *bs, Error **errp) |
363 | { |
364 | if (bs->probed) { |
365 | /* To make it easier to protect the first sector, any probed |
366 | * image is restricted to read-modify-write on sub-sector |
367 | * operations. */ |
368 | bs->bl.request_alignment = BDRV_SECTOR_SIZE; |
369 | } |
370 | } |
371 | |
372 | static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, |
373 | PreallocMode prealloc, Error **errp) |
374 | { |
375 | BDRVRawState *s = bs->opaque; |
376 | |
377 | if (s->has_size) { |
378 | error_setg(errp, "Cannot resize fixed-size raw disks" ); |
379 | return -ENOTSUP; |
380 | } |
381 | |
382 | if (INT64_MAX - offset < s->offset) { |
383 | error_setg(errp, "Disk size too large for the chosen offset" ); |
384 | return -EINVAL; |
385 | } |
386 | |
387 | s->size = offset; |
388 | offset += s->offset; |
389 | return bdrv_co_truncate(bs->file, offset, prealloc, errp); |
390 | } |
391 | |
392 | static void raw_eject(BlockDriverState *bs, bool eject_flag) |
393 | { |
394 | bdrv_eject(bs->file->bs, eject_flag); |
395 | } |
396 | |
397 | static void raw_lock_medium(BlockDriverState *bs, bool locked) |
398 | { |
399 | bdrv_lock_medium(bs->file->bs, locked); |
400 | } |
401 | |
402 | static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) |
403 | { |
404 | BDRVRawState *s = bs->opaque; |
405 | if (s->offset || s->has_size) { |
406 | return -ENOTSUP; |
407 | } |
408 | return bdrv_co_ioctl(bs->file->bs, req, buf); |
409 | } |
410 | |
411 | static int raw_has_zero_init(BlockDriverState *bs) |
412 | { |
413 | return bdrv_has_zero_init(bs->file->bs); |
414 | } |
415 | |
416 | static int raw_has_zero_init_truncate(BlockDriverState *bs) |
417 | { |
418 | return bdrv_has_zero_init_truncate(bs->file->bs); |
419 | } |
420 | |
421 | static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, |
422 | Error **errp) |
423 | { |
424 | return bdrv_create_file(filename, opts, errp); |
425 | } |
426 | |
427 | static int raw_open(BlockDriverState *bs, QDict *options, int flags, |
428 | Error **errp) |
429 | { |
430 | BDRVRawState *s = bs->opaque; |
431 | int ret; |
432 | |
433 | bs->file = bdrv_open_child(NULL, options, "file" , bs, &child_file, |
434 | false, errp); |
435 | if (!bs->file) { |
436 | return -EINVAL; |
437 | } |
438 | |
439 | bs->sg = bs->file->bs->sg; |
440 | bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | |
441 | (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); |
442 | bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | |
443 | ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & |
444 | bs->file->bs->supported_zero_flags); |
445 | |
446 | if (bs->probed && !bdrv_is_read_only(bs)) { |
447 | bdrv_refresh_filename(bs->file->bs); |
448 | fprintf(stderr, |
449 | "WARNING: Image format was not specified for '%s' and probing " |
450 | "guessed raw.\n" |
451 | " Automatically detecting the format is dangerous for " |
452 | "raw images, write operations on block 0 will be restricted.\n" |
453 | " Specify the 'raw' format explicitly to remove the " |
454 | "restrictions.\n" , |
455 | bs->file->bs->filename); |
456 | } |
457 | |
458 | ret = raw_read_options(options, bs, s, errp); |
459 | if (ret < 0) { |
460 | return ret; |
461 | } |
462 | |
463 | if (bs->sg && (s->offset || s->has_size)) { |
464 | error_setg(errp, "Cannot use offset/size with SCSI generic devices" ); |
465 | return -EINVAL; |
466 | } |
467 | |
468 | return 0; |
469 | } |
470 | |
471 | static int raw_probe(const uint8_t *buf, int buf_size, const char *filename) |
472 | { |
473 | /* smallest possible positive score so that raw is used if and only if no |
474 | * other block driver works |
475 | */ |
476 | return 1; |
477 | } |
478 | |
479 | static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) |
480 | { |
481 | BDRVRawState *s = bs->opaque; |
482 | int ret; |
483 | |
484 | ret = bdrv_probe_blocksizes(bs->file->bs, bsz); |
485 | if (ret < 0) { |
486 | return ret; |
487 | } |
488 | |
489 | if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) { |
490 | return -ENOTSUP; |
491 | } |
492 | |
493 | return 0; |
494 | } |
495 | |
496 | static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo) |
497 | { |
498 | BDRVRawState *s = bs->opaque; |
499 | if (s->offset || s->has_size) { |
500 | return -ENOTSUP; |
501 | } |
502 | return bdrv_probe_geometry(bs->file->bs, geo); |
503 | } |
504 | |
505 | static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs, |
506 | BdrvChild *src, |
507 | uint64_t src_offset, |
508 | BdrvChild *dst, |
509 | uint64_t dst_offset, |
510 | uint64_t bytes, |
511 | BdrvRequestFlags read_flags, |
512 | BdrvRequestFlags write_flags) |
513 | { |
514 | int ret; |
515 | |
516 | ret = raw_adjust_offset(bs, &src_offset, bytes, false); |
517 | if (ret) { |
518 | return ret; |
519 | } |
520 | return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset, |
521 | bytes, read_flags, write_flags); |
522 | } |
523 | |
524 | static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs, |
525 | BdrvChild *src, |
526 | uint64_t src_offset, |
527 | BdrvChild *dst, |
528 | uint64_t dst_offset, |
529 | uint64_t bytes, |
530 | BdrvRequestFlags read_flags, |
531 | BdrvRequestFlags write_flags) |
532 | { |
533 | int ret; |
534 | |
535 | ret = raw_adjust_offset(bs, &dst_offset, bytes, true); |
536 | if (ret) { |
537 | return ret; |
538 | } |
539 | return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes, |
540 | read_flags, write_flags); |
541 | } |
542 | |
543 | static const char *const raw_strong_runtime_opts[] = { |
544 | "offset" , |
545 | "size" , |
546 | |
547 | NULL |
548 | }; |
549 | |
550 | BlockDriver bdrv_raw = { |
551 | .format_name = "raw" , |
552 | .instance_size = sizeof(BDRVRawState), |
553 | .bdrv_probe = &raw_probe, |
554 | .bdrv_reopen_prepare = &raw_reopen_prepare, |
555 | .bdrv_reopen_commit = &raw_reopen_commit, |
556 | .bdrv_reopen_abort = &raw_reopen_abort, |
557 | .bdrv_open = &raw_open, |
558 | .bdrv_child_perm = bdrv_filter_default_perms, |
559 | .bdrv_co_create_opts = &raw_co_create_opts, |
560 | .bdrv_co_preadv = &raw_co_preadv, |
561 | .bdrv_co_pwritev = &raw_co_pwritev, |
562 | .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes, |
563 | .bdrv_co_pdiscard = &raw_co_pdiscard, |
564 | .bdrv_co_block_status = &raw_co_block_status, |
565 | .bdrv_co_copy_range_from = &raw_co_copy_range_from, |
566 | .bdrv_co_copy_range_to = &raw_co_copy_range_to, |
567 | .bdrv_co_truncate = &raw_co_truncate, |
568 | .bdrv_getlength = &raw_getlength, |
569 | .has_variable_length = true, |
570 | .bdrv_measure = &raw_measure, |
571 | .bdrv_get_info = &raw_get_info, |
572 | .bdrv_refresh_limits = &raw_refresh_limits, |
573 | .bdrv_probe_blocksizes = &raw_probe_blocksizes, |
574 | .bdrv_probe_geometry = &raw_probe_geometry, |
575 | .bdrv_eject = &raw_eject, |
576 | .bdrv_lock_medium = &raw_lock_medium, |
577 | .bdrv_co_ioctl = &raw_co_ioctl, |
578 | .create_opts = &raw_create_opts, |
579 | .bdrv_has_zero_init = &raw_has_zero_init, |
580 | .bdrv_has_zero_init_truncate = &raw_has_zero_init_truncate, |
581 | .strong_runtime_opts = raw_strong_runtime_opts, |
582 | .mutable_opts = mutable_opts, |
583 | }; |
584 | |
585 | static void bdrv_raw_init(void) |
586 | { |
587 | bdrv_register(&bdrv_raw); |
588 | } |
589 | |
590 | block_init(bdrv_raw_init); |
591 | |