1 | /* |
2 | * QEMU System Emulator block driver |
3 | * |
4 | * Copyright (c) 2011 IBM Corp. |
5 | * Copyright (c) 2012 Red Hat, Inc. |
6 | * |
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
8 | * of this software and associated documentation files (the "Software"), to deal |
9 | * in the Software without restriction, including without limitation the rights |
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | * copies of the Software, and to permit persons to whom the Software is |
12 | * furnished to do so, subject to the following conditions: |
13 | * |
14 | * The above copyright notice and this permission notice shall be included in |
15 | * all copies or substantial portions of the Software. |
16 | * |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
23 | * THE SOFTWARE. |
24 | */ |
25 | |
26 | #include "qemu/osdep.h" |
27 | #include "block/block.h" |
28 | #include "block/blockjob_int.h" |
29 | #include "block/block_int.h" |
30 | #include "block/trace.h" |
31 | #include "sysemu/block-backend.h" |
32 | #include "qapi/error.h" |
33 | #include "qapi/qapi-events-block-core.h" |
34 | #include "qapi/qmp/qerror.h" |
35 | #include "qemu/coroutine.h" |
36 | #include "qemu/main-loop.h" |
37 | #include "qemu/timer.h" |
38 | |
39 | /* |
40 | * The block job API is composed of two categories of functions. |
41 | * |
42 | * The first includes functions used by the monitor. The monitor is |
43 | * peculiar in that it accesses the block job list with block_job_get, and |
44 | * therefore needs consistency across block_job_get and the actual operation |
45 | * (e.g. block_job_set_speed). The consistency is achieved with |
46 | * aio_context_acquire/release. These functions are declared in blockjob.h. |
47 | * |
48 | * The second includes functions used by the block job drivers and sometimes |
49 | * by the core block layer. These do not care about locking, because the |
50 | * whole coroutine runs under the AioContext lock, and are declared in |
51 | * blockjob_int.h. |
52 | */ |
53 | |
54 | static bool is_block_job(Job *job) |
55 | { |
56 | return job_type(job) == JOB_TYPE_BACKUP || |
57 | job_type(job) == JOB_TYPE_COMMIT || |
58 | job_type(job) == JOB_TYPE_MIRROR || |
59 | job_type(job) == JOB_TYPE_STREAM; |
60 | } |
61 | |
62 | BlockJob *block_job_next(BlockJob *bjob) |
63 | { |
64 | Job *job = bjob ? &bjob->job : NULL; |
65 | |
66 | do { |
67 | job = job_next(job); |
68 | } while (job && !is_block_job(job)); |
69 | |
70 | return job ? container_of(job, BlockJob, job) : NULL; |
71 | } |
72 | |
73 | BlockJob *block_job_get(const char *id) |
74 | { |
75 | Job *job = job_get(id); |
76 | |
77 | if (job && is_block_job(job)) { |
78 | return container_of(job, BlockJob, job); |
79 | } else { |
80 | return NULL; |
81 | } |
82 | } |
83 | |
84 | void block_job_free(Job *job) |
85 | { |
86 | BlockJob *bjob = container_of(job, BlockJob, job); |
87 | |
88 | block_job_remove_all_bdrv(bjob); |
89 | blk_unref(bjob->blk); |
90 | error_free(bjob->blocker); |
91 | } |
92 | |
93 | void block_job_drain(Job *job) |
94 | { |
95 | BlockJob *bjob = container_of(job, BlockJob, job); |
96 | const JobDriver *drv = job->driver; |
97 | BlockJobDriver *bjdrv = container_of(drv, BlockJobDriver, job_driver); |
98 | |
99 | blk_drain(bjob->blk); |
100 | if (bjdrv->drain) { |
101 | bjdrv->drain(bjob); |
102 | } |
103 | } |
104 | |
105 | static char *child_job_get_parent_desc(BdrvChild *c) |
106 | { |
107 | BlockJob *job = c->opaque; |
108 | return g_strdup_printf("%s job '%s'" , job_type_str(&job->job), job->job.id); |
109 | } |
110 | |
111 | static void child_job_drained_begin(BdrvChild *c) |
112 | { |
113 | BlockJob *job = c->opaque; |
114 | job_pause(&job->job); |
115 | } |
116 | |
117 | static bool child_job_drained_poll(BdrvChild *c) |
118 | { |
119 | BlockJob *bjob = c->opaque; |
120 | Job *job = &bjob->job; |
121 | const BlockJobDriver *drv = block_job_driver(bjob); |
122 | |
123 | /* An inactive or completed job doesn't have any pending requests. Jobs |
124 | * with !job->busy are either already paused or have a pause point after |
125 | * being reentered, so no job driver code will run before they pause. */ |
126 | if (!job->busy || job_is_completed(job)) { |
127 | return false; |
128 | } |
129 | |
130 | /* Otherwise, assume that it isn't fully stopped yet, but allow the job to |
131 | * override this assumption. */ |
132 | if (drv->drained_poll) { |
133 | return drv->drained_poll(bjob); |
134 | } else { |
135 | return true; |
136 | } |
137 | } |
138 | |
139 | static void child_job_drained_end(BdrvChild *c, int *drained_end_counter) |
140 | { |
141 | BlockJob *job = c->opaque; |
142 | job_resume(&job->job); |
143 | } |
144 | |
145 | static bool child_job_can_set_aio_ctx(BdrvChild *c, AioContext *ctx, |
146 | GSList **ignore, Error **errp) |
147 | { |
148 | BlockJob *job = c->opaque; |
149 | GSList *l; |
150 | |
151 | for (l = job->nodes; l; l = l->next) { |
152 | BdrvChild *sibling = l->data; |
153 | if (!bdrv_child_can_set_aio_context(sibling, ctx, ignore, errp)) { |
154 | return false; |
155 | } |
156 | } |
157 | return true; |
158 | } |
159 | |
160 | static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx, |
161 | GSList **ignore) |
162 | { |
163 | BlockJob *job = c->opaque; |
164 | GSList *l; |
165 | |
166 | for (l = job->nodes; l; l = l->next) { |
167 | BdrvChild *sibling = l->data; |
168 | if (g_slist_find(*ignore, sibling)) { |
169 | continue; |
170 | } |
171 | *ignore = g_slist_prepend(*ignore, sibling); |
172 | bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore); |
173 | } |
174 | |
175 | job->job.aio_context = ctx; |
176 | } |
177 | |
178 | static const BdrvChildRole child_job = { |
179 | .get_parent_desc = child_job_get_parent_desc, |
180 | .drained_begin = child_job_drained_begin, |
181 | .drained_poll = child_job_drained_poll, |
182 | .drained_end = child_job_drained_end, |
183 | .can_set_aio_ctx = child_job_can_set_aio_ctx, |
184 | .set_aio_ctx = child_job_set_aio_ctx, |
185 | .stay_at_node = true, |
186 | }; |
187 | |
188 | void block_job_remove_all_bdrv(BlockJob *job) |
189 | { |
190 | GSList *l; |
191 | for (l = job->nodes; l; l = l->next) { |
192 | BdrvChild *c = l->data; |
193 | bdrv_op_unblock_all(c->bs, job->blocker); |
194 | bdrv_root_unref_child(c); |
195 | } |
196 | g_slist_free(job->nodes); |
197 | job->nodes = NULL; |
198 | } |
199 | |
200 | bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) |
201 | { |
202 | GSList *el; |
203 | |
204 | for (el = job->nodes; el; el = el->next) { |
205 | BdrvChild *c = el->data; |
206 | if (c->bs == bs) { |
207 | return true; |
208 | } |
209 | } |
210 | |
211 | return false; |
212 | } |
213 | |
214 | int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, |
215 | uint64_t perm, uint64_t shared_perm, Error **errp) |
216 | { |
217 | BdrvChild *c; |
218 | |
219 | bdrv_ref(bs); |
220 | if (job->job.aio_context != qemu_get_aio_context()) { |
221 | aio_context_release(job->job.aio_context); |
222 | } |
223 | c = bdrv_root_attach_child(bs, name, &child_job, job->job.aio_context, |
224 | perm, shared_perm, job, errp); |
225 | if (job->job.aio_context != qemu_get_aio_context()) { |
226 | aio_context_acquire(job->job.aio_context); |
227 | } |
228 | if (c == NULL) { |
229 | return -EPERM; |
230 | } |
231 | |
232 | job->nodes = g_slist_prepend(job->nodes, c); |
233 | bdrv_op_block_all(bs, job->blocker); |
234 | |
235 | return 0; |
236 | } |
237 | |
238 | static void block_job_on_idle(Notifier *n, void *opaque) |
239 | { |
240 | aio_wait_kick(); |
241 | } |
242 | |
243 | bool block_job_is_internal(BlockJob *job) |
244 | { |
245 | return (job->job.id == NULL); |
246 | } |
247 | |
248 | const BlockJobDriver *block_job_driver(BlockJob *job) |
249 | { |
250 | return container_of(job->job.driver, BlockJobDriver, job_driver); |
251 | } |
252 | |
253 | /* Assumes the job_mutex is held */ |
254 | static bool job_timer_pending(Job *job) |
255 | { |
256 | return timer_pending(&job->sleep_timer); |
257 | } |
258 | |
259 | void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) |
260 | { |
261 | int64_t old_speed = job->speed; |
262 | |
263 | if (job_apply_verb(&job->job, JOB_VERB_SET_SPEED, errp)) { |
264 | return; |
265 | } |
266 | if (speed < 0) { |
267 | error_setg(errp, QERR_INVALID_PARAMETER, "speed" ); |
268 | return; |
269 | } |
270 | |
271 | ratelimit_set_speed(&job->limit, speed, BLOCK_JOB_SLICE_TIME); |
272 | |
273 | job->speed = speed; |
274 | if (speed && speed <= old_speed) { |
275 | return; |
276 | } |
277 | |
278 | /* kick only if a timer is pending */ |
279 | job_enter_cond(&job->job, job_timer_pending); |
280 | } |
281 | |
282 | int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n) |
283 | { |
284 | if (!job->speed) { |
285 | return 0; |
286 | } |
287 | |
288 | return ratelimit_calculate_delay(&job->limit, n); |
289 | } |
290 | |
291 | BlockJobInfo *block_job_query(BlockJob *job, Error **errp) |
292 | { |
293 | BlockJobInfo *info; |
294 | |
295 | if (block_job_is_internal(job)) { |
296 | error_setg(errp, "Cannot query QEMU internal jobs" ); |
297 | return NULL; |
298 | } |
299 | info = g_new0(BlockJobInfo, 1); |
300 | info->type = g_strdup(job_type_str(&job->job)); |
301 | info->device = g_strdup(job->job.id); |
302 | info->busy = atomic_read(&job->job.busy); |
303 | info->paused = job->job.pause_count > 0; |
304 | info->offset = job->job.progress_current; |
305 | info->len = job->job.progress_total; |
306 | info->speed = job->speed; |
307 | info->io_status = job->iostatus; |
308 | info->ready = job_is_ready(&job->job), |
309 | info->status = job->job.status; |
310 | info->auto_finalize = job->job.auto_finalize; |
311 | info->auto_dismiss = job->job.auto_dismiss; |
312 | info->has_error = job->job.ret != 0; |
313 | info->error = job->job.ret ? g_strdup(strerror(-job->job.ret)) : NULL; |
314 | return info; |
315 | } |
316 | |
317 | static void block_job_iostatus_set_err(BlockJob *job, int error) |
318 | { |
319 | if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { |
320 | job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : |
321 | BLOCK_DEVICE_IO_STATUS_FAILED; |
322 | } |
323 | } |
324 | |
325 | static void block_job_event_cancelled(Notifier *n, void *opaque) |
326 | { |
327 | BlockJob *job = opaque; |
328 | |
329 | if (block_job_is_internal(job)) { |
330 | return; |
331 | } |
332 | |
333 | qapi_event_send_block_job_cancelled(job_type(&job->job), |
334 | job->job.id, |
335 | job->job.progress_total, |
336 | job->job.progress_current, |
337 | job->speed); |
338 | } |
339 | |
340 | static void block_job_event_completed(Notifier *n, void *opaque) |
341 | { |
342 | BlockJob *job = opaque; |
343 | const char *msg = NULL; |
344 | |
345 | if (block_job_is_internal(job)) { |
346 | return; |
347 | } |
348 | |
349 | if (job->job.ret < 0) { |
350 | msg = strerror(-job->job.ret); |
351 | } |
352 | |
353 | qapi_event_send_block_job_completed(job_type(&job->job), |
354 | job->job.id, |
355 | job->job.progress_total, |
356 | job->job.progress_current, |
357 | job->speed, |
358 | !!msg, |
359 | msg); |
360 | } |
361 | |
362 | static void block_job_event_pending(Notifier *n, void *opaque) |
363 | { |
364 | BlockJob *job = opaque; |
365 | |
366 | if (block_job_is_internal(job)) { |
367 | return; |
368 | } |
369 | |
370 | qapi_event_send_block_job_pending(job_type(&job->job), |
371 | job->job.id); |
372 | } |
373 | |
374 | static void block_job_event_ready(Notifier *n, void *opaque) |
375 | { |
376 | BlockJob *job = opaque; |
377 | |
378 | if (block_job_is_internal(job)) { |
379 | return; |
380 | } |
381 | |
382 | qapi_event_send_block_job_ready(job_type(&job->job), |
383 | job->job.id, |
384 | job->job.progress_total, |
385 | job->job.progress_current, |
386 | job->speed); |
387 | } |
388 | |
389 | |
390 | /* |
391 | * API for block job drivers and the block layer. These functions are |
392 | * declared in blockjob_int.h. |
393 | */ |
394 | |
395 | void *block_job_create(const char *job_id, const BlockJobDriver *driver, |
396 | JobTxn *txn, BlockDriverState *bs, uint64_t perm, |
397 | uint64_t shared_perm, int64_t speed, int flags, |
398 | BlockCompletionFunc *cb, void *opaque, Error **errp) |
399 | { |
400 | BlockBackend *blk; |
401 | BlockJob *job; |
402 | int ret; |
403 | |
404 | if (job_id == NULL && !(flags & JOB_INTERNAL)) { |
405 | job_id = bdrv_get_device_name(bs); |
406 | } |
407 | |
408 | blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm); |
409 | ret = blk_insert_bs(blk, bs, errp); |
410 | if (ret < 0) { |
411 | blk_unref(blk); |
412 | return NULL; |
413 | } |
414 | |
415 | job = job_create(job_id, &driver->job_driver, txn, blk_get_aio_context(blk), |
416 | flags, cb, opaque, errp); |
417 | if (job == NULL) { |
418 | blk_unref(blk); |
419 | return NULL; |
420 | } |
421 | |
422 | assert(is_block_job(&job->job)); |
423 | assert(job->job.driver->free == &block_job_free); |
424 | assert(job->job.driver->user_resume == &block_job_user_resume); |
425 | assert(job->job.driver->drain == &block_job_drain); |
426 | |
427 | job->blk = blk; |
428 | |
429 | job->finalize_cancelled_notifier.notify = block_job_event_cancelled; |
430 | job->finalize_completed_notifier.notify = block_job_event_completed; |
431 | job->pending_notifier.notify = block_job_event_pending; |
432 | job->ready_notifier.notify = block_job_event_ready; |
433 | job->idle_notifier.notify = block_job_on_idle; |
434 | |
435 | notifier_list_add(&job->job.on_finalize_cancelled, |
436 | &job->finalize_cancelled_notifier); |
437 | notifier_list_add(&job->job.on_finalize_completed, |
438 | &job->finalize_completed_notifier); |
439 | notifier_list_add(&job->job.on_pending, &job->pending_notifier); |
440 | notifier_list_add(&job->job.on_ready, &job->ready_notifier); |
441 | notifier_list_add(&job->job.on_idle, &job->idle_notifier); |
442 | |
443 | error_setg(&job->blocker, "block device is in use by block job: %s" , |
444 | job_type_str(&job->job)); |
445 | block_job_add_bdrv(job, "main node" , bs, 0, BLK_PERM_ALL, &error_abort); |
446 | |
447 | bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); |
448 | |
449 | /* Disable request queuing in the BlockBackend to avoid deadlocks on drain: |
450 | * The job reports that it's busy until it reaches a pause point. */ |
451 | blk_set_disable_request_queuing(blk, true); |
452 | blk_set_allow_aio_context_change(blk, true); |
453 | |
454 | /* Only set speed when necessary to avoid NotSupported error */ |
455 | if (speed != 0) { |
456 | Error *local_err = NULL; |
457 | |
458 | block_job_set_speed(job, speed, &local_err); |
459 | if (local_err) { |
460 | job_early_fail(&job->job); |
461 | error_propagate(errp, local_err); |
462 | return NULL; |
463 | } |
464 | } |
465 | |
466 | return job; |
467 | } |
468 | |
469 | void block_job_iostatus_reset(BlockJob *job) |
470 | { |
471 | if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { |
472 | return; |
473 | } |
474 | assert(job->job.user_paused && job->job.pause_count > 0); |
475 | job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; |
476 | } |
477 | |
478 | void block_job_user_resume(Job *job) |
479 | { |
480 | BlockJob *bjob = container_of(job, BlockJob, job); |
481 | block_job_iostatus_reset(bjob); |
482 | } |
483 | |
484 | BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err, |
485 | int is_read, int error) |
486 | { |
487 | BlockErrorAction action; |
488 | |
489 | switch (on_err) { |
490 | case BLOCKDEV_ON_ERROR_ENOSPC: |
491 | case BLOCKDEV_ON_ERROR_AUTO: |
492 | action = (error == ENOSPC) ? |
493 | BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; |
494 | break; |
495 | case BLOCKDEV_ON_ERROR_STOP: |
496 | action = BLOCK_ERROR_ACTION_STOP; |
497 | break; |
498 | case BLOCKDEV_ON_ERROR_REPORT: |
499 | action = BLOCK_ERROR_ACTION_REPORT; |
500 | break; |
501 | case BLOCKDEV_ON_ERROR_IGNORE: |
502 | action = BLOCK_ERROR_ACTION_IGNORE; |
503 | break; |
504 | default: |
505 | abort(); |
506 | } |
507 | if (!block_job_is_internal(job)) { |
508 | qapi_event_send_block_job_error(job->job.id, |
509 | is_read ? IO_OPERATION_TYPE_READ : |
510 | IO_OPERATION_TYPE_WRITE, |
511 | action); |
512 | } |
513 | if (action == BLOCK_ERROR_ACTION_STOP) { |
514 | if (!job->job.user_paused) { |
515 | job_pause(&job->job); |
516 | /* make the pause user visible, which will be resumed from QMP. */ |
517 | job->job.user_paused = true; |
518 | } |
519 | block_job_iostatus_set_err(job, error); |
520 | } |
521 | return action; |
522 | } |
523 | |