From bd6032470631d8d5de6db84ecb55398b70d9d2f3 Mon Sep 17 00:00:00 2001 From: Gregory Farnum Date: Wed, 11 Jan 2012 11:53:52 -0800 Subject: [PATCH 01/22] rbd: wire up snapshot removal and rollback functionality Signed-off-by: Greg Farnum Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/rbd.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/block/rbd.c b/block/rbd.c index db5abf240..46a857901 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -789,6 +789,26 @@ static int qemu_rbd_snap_create(BlockDriverState *bs, return 0; } +static int qemu_rbd_snap_remove(BlockDriverState *bs, + const char *snapshot_name) +{ + BDRVRBDState *s = bs->opaque; + int r; + + r = rbd_snap_remove(s->image, snapshot_name); + return r; +} + +static int qemu_rbd_snap_rollback(BlockDriverState *bs, + const char *snapshot_name) +{ + BDRVRBDState *s = bs->opaque; + int r; + + r = rbd_snap_rollback(s->image, snapshot_name); + return r; +} + static int qemu_rbd_snap_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) { @@ -862,7 +882,9 @@ static BlockDriver bdrv_rbd = { .bdrv_co_flush_to_disk = qemu_rbd_co_flush, .bdrv_snapshot_create = qemu_rbd_snap_create, + .bdrv_snapshot_delete = qemu_rbd_snap_remove, .bdrv_snapshot_list = qemu_rbd_snap_list, + .bdrv_snapshot_goto = qemu_rbd_snap_rollback, }; static void bdrv_rbd_init(void) From 031380d8770d2df6c386e4aeabd412007d3ebd54 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 16 Jan 2012 09:28:06 +0000 Subject: [PATCH 02/22] block: replace unchecked strdup/malloc/calloc with glib Most of the codebase as been converted to use glib memory allocation functions. There are still a few instances of malloc/calloc in the block layer and qemu-io. Replace them, especially since they do not check the strdup/malloc/calloc return value. Reported-by: Dr David Alan Gilbert Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/blkdebug.c | 4 ++-- block/blkverify.c | 4 ++-- qemu-io.c | 48 +++++++++++++++++++++++------------------------ 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index 9b885359e..a251802ad 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -292,10 +292,10 @@ static int blkdebug_open(BlockDriverState *bs, const char *filename, int flags) return -EINVAL; } - config = strdup(filename); + config = g_strdup(filename); config[c - filename] = '\0'; ret = read_config(s, config); - free(config); + g_free(config); if (ret < 0) { return ret; } diff --git a/block/blkverify.c b/block/blkverify.c index 4ca8584b8..9d5f1ec5b 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -87,10 +87,10 @@ static int blkverify_open(BlockDriverState *bs, const char *filename, int flags) return -EINVAL; } - raw = strdup(filename); + raw = g_strdup(filename); raw[c - filename] = '\0'; ret = bdrv_file_open(&bs->file, raw, flags); - free(raw); + g_free(raw); if (ret < 0) { return ret; } diff --git a/qemu-io.c b/qemu-io.c index ffa62fb46..7c446b61b 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -130,7 +130,7 @@ static void print_report(const char *op, struct timeval *t, int64_t offset, static void * create_iovec(QEMUIOVector *qiov, char **argv, int nr_iov, int pattern) { - size_t *sizes = calloc(nr_iov, sizeof(size_t)); + size_t *sizes = g_new0(size_t, nr_iov); size_t count = 0; void *buf = NULL; void *p; @@ -172,7 +172,7 @@ create_iovec(QEMUIOVector *qiov, char **argv, int nr_iov, int pattern) } fail: - free(sizes); + g_free(sizes); return buf; } @@ -471,14 +471,14 @@ static int read_f(int argc, char **argv) } if (Pflag) { - void *cmp_buf = malloc(pattern_count); + void *cmp_buf = g_malloc(pattern_count); memset(cmp_buf, pattern, pattern_count); if (memcmp(buf + pattern_offset, cmp_buf, pattern_count)) { printf("Pattern verification failed at offset %" PRId64 ", %d bytes\n", offset + pattern_offset, pattern_count); } - free(cmp_buf); + g_free(cmp_buf); } if (qflag) { @@ -601,13 +601,13 @@ static int readv_f(int argc, char **argv) } if (Pflag) { - void *cmp_buf = malloc(qiov.size); + void *cmp_buf = g_malloc(qiov.size); memset(cmp_buf, pattern, qiov.size); if (memcmp(buf, cmp_buf, qiov.size)) { printf("Pattern verification failed at offset %" PRId64 ", %zd bytes\n", offset, qiov.size); } - free(cmp_buf); + g_free(cmp_buf); } if (qflag) { @@ -1063,7 +1063,7 @@ static void aio_write_done(void *opaque, int ret) ctx->qiov.size, 1, ctx->Cflag); out: qemu_io_free(ctx->buf); - free(ctx); + g_free(ctx); } static void aio_read_done(void *opaque, int ret) @@ -1079,14 +1079,14 @@ static void aio_read_done(void *opaque, int ret) } if (ctx->Pflag) { - void *cmp_buf = malloc(ctx->qiov.size); + void *cmp_buf = g_malloc(ctx->qiov.size); memset(cmp_buf, ctx->pattern, ctx->qiov.size); if (memcmp(ctx->buf, cmp_buf, ctx->qiov.size)) { printf("Pattern verification failed at offset %" PRId64 ", %zd bytes\n", ctx->offset, ctx->qiov.size); } - free(cmp_buf); + g_free(cmp_buf); } if (ctx->qflag) { @@ -1103,7 +1103,7 @@ static void aio_read_done(void *opaque, int ret) ctx->qiov.size, 1, ctx->Cflag); out: qemu_io_free(ctx->buf); - free(ctx); + g_free(ctx); } static void aio_read_help(void) @@ -1141,7 +1141,7 @@ static const cmdinfo_t aio_read_cmd = { static int aio_read_f(int argc, char **argv) { int nr_iov, c; - struct aio_ctx *ctx = calloc(1, sizeof(struct aio_ctx)); + struct aio_ctx *ctx = g_new0(struct aio_ctx, 1); while ((c = getopt(argc, argv, "CP:qv")) != EOF) { switch (c) { @@ -1152,7 +1152,7 @@ static int aio_read_f(int argc, char **argv) ctx->Pflag = 1; ctx->pattern = parse_pattern(optarg); if (ctx->pattern < 0) { - free(ctx); + g_free(ctx); return 0; } break; @@ -1163,20 +1163,20 @@ static int aio_read_f(int argc, char **argv) ctx->vflag = 1; break; default: - free(ctx); + g_free(ctx); return command_usage(&aio_read_cmd); } } if (optind > argc - 2) { - free(ctx); + g_free(ctx); return command_usage(&aio_read_cmd); } ctx->offset = cvtnum(argv[optind]); if (ctx->offset < 0) { printf("non-numeric length argument -- %s\n", argv[optind]); - free(ctx); + g_free(ctx); return 0; } optind++; @@ -1184,14 +1184,14 @@ static int aio_read_f(int argc, char **argv) if (ctx->offset & 0x1ff) { printf("offset %" PRId64 " is not sector aligned\n", ctx->offset); - free(ctx); + g_free(ctx); return 0; } nr_iov = argc - optind; ctx->buf = create_iovec(&ctx->qiov, &argv[optind], nr_iov, 0xab); if (ctx->buf == NULL) { - free(ctx); + g_free(ctx); return 0; } @@ -1237,7 +1237,7 @@ static int aio_write_f(int argc, char **argv) { int nr_iov, c; int pattern = 0xcd; - struct aio_ctx *ctx = calloc(1, sizeof(struct aio_ctx)); + struct aio_ctx *ctx = g_new0(struct aio_ctx, 1); while ((c = getopt(argc, argv, "CqP:")) != EOF) { switch (c) { @@ -1250,25 +1250,25 @@ static int aio_write_f(int argc, char **argv) case 'P': pattern = parse_pattern(optarg); if (pattern < 0) { - free(ctx); + g_free(ctx); return 0; } break; default: - free(ctx); + g_free(ctx); return command_usage(&aio_write_cmd); } } if (optind > argc - 2) { - free(ctx); + g_free(ctx); return command_usage(&aio_write_cmd); } ctx->offset = cvtnum(argv[optind]); if (ctx->offset < 0) { printf("non-numeric length argument -- %s\n", argv[optind]); - free(ctx); + g_free(ctx); return 0; } optind++; @@ -1276,14 +1276,14 @@ static int aio_write_f(int argc, char **argv) if (ctx->offset & 0x1ff) { printf("offset %" PRId64 " is not sector aligned\n", ctx->offset); - free(ctx); + g_free(ctx); return 0; } nr_iov = argc - optind; ctx->buf = create_iovec(&ctx->qiov, &argv[optind], nr_iov, pattern); if (ctx->buf == NULL) { - free(ctx); + g_free(ctx); return 0; } From 7e6246670add62116729bd93811e41eb60f66b77 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jan 2012 14:40:40 +0000 Subject: [PATCH 03/22] coroutine: add co_sleep_ns() coroutine sleep function Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- Makefile.objs | 1 + qemu-coroutine-sleep.c | 38 ++++++++++++++++++++++++++++++++++++++ qemu-coroutine.h | 9 +++++++++ 3 files changed, 48 insertions(+) create mode 100644 qemu-coroutine-sleep.c diff --git a/Makefile.objs b/Makefile.objs index 9ca606356..48d14777a 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -13,6 +13,7 @@ oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o ####################################################################### # coroutines coroutine-obj-y = qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o +coroutine-obj-y += qemu-coroutine-sleep.o ifeq ($(CONFIG_UCONTEXT_COROUTINE),y) coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o else diff --git a/qemu-coroutine-sleep.c b/qemu-coroutine-sleep.c new file mode 100644 index 000000000..fd6527444 --- /dev/null +++ b/qemu-coroutine-sleep.c @@ -0,0 +1,38 @@ +/* + * QEMU coroutine sleep + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu-coroutine.h" +#include "qemu-timer.h" + +typedef struct CoSleepCB { + QEMUTimer *ts; + Coroutine *co; +} CoSleepCB; + +static void co_sleep_cb(void *opaque) +{ + CoSleepCB *sleep_cb = opaque; + + qemu_free_timer(sleep_cb->ts); + qemu_coroutine_enter(sleep_cb->co, NULL); +} + +void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns) +{ + CoSleepCB sleep_cb = { + .co = qemu_coroutine_self(), + }; + sleep_cb.ts = qemu_new_timer(clock, SCALE_NS, co_sleep_cb, &sleep_cb); + qemu_mod_timer(sleep_cb.ts, qemu_get_clock_ns(clock) + ns); + qemu_coroutine_yield(); +} diff --git a/qemu-coroutine.h b/qemu-coroutine.h index 8a55fe125..34c15d411 100644 --- a/qemu-coroutine.h +++ b/qemu-coroutine.h @@ -17,6 +17,7 @@ #include #include "qemu-queue.h" +#include "qemu-timer.h" /** * Coroutines are a mechanism for stack switching and can be used for @@ -199,4 +200,12 @@ void qemu_co_rwlock_wrlock(CoRwlock *lock); */ void qemu_co_rwlock_unlock(CoRwlock *lock); +/** + * Yield the coroutine for a given duration + * + * Note this function uses timers and hence only works when a main loop is in + * use. See main-loop.h and do not use from qemu-tool programs. + */ +void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns); + #endif /* QEMU_COROUTINE_H */ From 2d3735d3bf61d5c8e154a197a11535cc65044334 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jan 2012 14:40:41 +0000 Subject: [PATCH 04/22] block: check bdrv_in_use() before blockdev operations Long-running block operations like block migration and image streaming must have continual access to their block device. It is not safe to perform operations like hotplug, eject, change, resize, commit, or external snapshot while a long-running operation is in progress. This patch adds the missing bdrv_in_use() checks so that block migration and image streaming never have the rug pulled out from underneath them. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block.c | 4 ++++ blockdev.c | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 3f072f627..43f648437 100644 --- a/block.c +++ b/block.c @@ -1020,6 +1020,10 @@ int bdrv_commit(BlockDriverState *bs) return -EACCES; } + if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) { + return -EBUSY; + } + backing_drv = bs->backing_hd->drv; ro = bs->backing_hd->read_only; strncpy(filename, bs->backing_hd->filename, sizeof(filename)); diff --git a/blockdev.c b/blockdev.c index 1f83c888e..0499ee6ae 100644 --- a/blockdev.c +++ b/blockdev.c @@ -592,12 +592,18 @@ void do_commit(Monitor *mon, const QDict *qdict) if (!strcmp(device, "all")) { bdrv_commit_all(); } else { + int ret; + bs = bdrv_find(device); if (!bs) { qerror_report(QERR_DEVICE_NOT_FOUND, device); return; } - bdrv_commit(bs); + ret = bdrv_commit(bs); + if (ret == -EBUSY) { + qerror_report(QERR_DEVICE_IN_USE, device); + return; + } } } @@ -616,6 +622,10 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } + if (bdrv_in_use(bs)) { + error_set(errp, QERR_DEVICE_IN_USE, device); + return; + } pstrcpy(old_filename, sizeof(old_filename), bs->filename); @@ -667,6 +677,10 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, static void eject_device(BlockDriverState *bs, int force, Error **errp) { + if (bdrv_in_use(bs)) { + error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); + return; + } if (!bdrv_dev_has_removable_media(bs)) { error_set(errp, QERR_DEVICE_NOT_REMOVABLE, bdrv_get_device_name(bs)); return; From 470c05047a09cda3de16bb3f98a130d9537357a4 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jan 2012 14:40:42 +0000 Subject: [PATCH 05/22] block: make copy-on-read a per-request flag Previously copy-on-read could only be enabled for all requests to a block device. This means requests coming from the guest as well as QEMU's internal requests would perform copy-on-read when enabled. For image streaming we want to support finer-grained behavior than just populating the image file from its backing image. Image streaming supports partial streaming where a common backing image is preserved. In this case guest requests should not perform copy-on-read because they would indiscriminately copy data which should be left in a backing image from the backing chain. Introduce a per-request flag for copy-on-read so that a block device can process both regular and copy-on-read requests. Overlapping reads and writes still need to be serialized for correctness when copy-on-read is happening, so add an in-flight reference count to track this. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block.c | 49 ++++++++++++++++++++++++++++++++++++++----------- block.h | 2 ++ block_int.h | 3 +++ trace-events | 3 ++- 4 files changed, 45 insertions(+), 12 deletions(-) diff --git a/block.c b/block.c index 43f648437..edfab49e3 100644 --- a/block.c +++ b/block.c @@ -48,6 +48,10 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ +typedef enum { + BDRV_REQ_COPY_ON_READ = 0x1, +} BdrvRequestFlags; + static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load); static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, @@ -62,7 +66,8 @@ static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov); static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + BdrvRequestFlags flags); static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, @@ -1292,7 +1297,7 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque) if (!rwco->is_write) { rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num, - rwco->nb_sectors, rwco->qiov); + rwco->nb_sectors, rwco->qiov, 0); } else { rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num, rwco->nb_sectors, rwco->qiov); @@ -1500,7 +1505,7 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, return 0; } -static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, +static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { /* Perform I/O through a temporary buffer so that users who scribble over @@ -1523,8 +1528,8 @@ static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, round_to_clusters(bs, sector_num, nb_sectors, &cluster_sector_num, &cluster_nb_sectors); - trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, - cluster_sector_num, cluster_nb_sectors); + trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, + cluster_sector_num, cluster_nb_sectors); iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE; iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len); @@ -1559,7 +1564,8 @@ err: * Handle a read request in coroutine context */ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + BdrvRequestFlags flags) { BlockDriver *drv = bs->drv; BdrvTrackedRequest req; @@ -1578,12 +1584,19 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, } if (bs->copy_on_read) { + flags |= BDRV_REQ_COPY_ON_READ; + } + if (flags & BDRV_REQ_COPY_ON_READ) { + bs->copy_on_read_in_flight++; + } + + if (bs->copy_on_read_in_flight) { wait_for_overlapping_requests(bs, sector_num, nb_sectors); } tracked_request_begin(&req, bs, sector_num, nb_sectors, false); - if (bs->copy_on_read) { + if (flags & BDRV_REQ_COPY_ON_READ) { int pnum; ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum); @@ -1592,7 +1605,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, } if (!ret || pnum != nb_sectors) { - ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov); + ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov); goto out; } } @@ -1601,6 +1614,11 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, out: tracked_request_end(&req); + + if (flags & BDRV_REQ_COPY_ON_READ) { + bs->copy_on_read_in_flight--; + } + return ret; } @@ -1609,7 +1627,16 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, { trace_bdrv_co_readv(bs, sector_num, nb_sectors); - return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov); + return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); +} + +int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) +{ + trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors); + + return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, + BDRV_REQ_COPY_ON_READ); } /* @@ -1637,7 +1664,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, bdrv_io_limits_intercept(bs, true, nb_sectors); } - if (bs->copy_on_read) { + if (bs->copy_on_read_in_flight) { wait_for_overlapping_requests(bs, sector_num, nb_sectors); } @@ -3144,7 +3171,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque) if (!acb->is_write) { acb->req.error = bdrv_co_do_readv(bs, acb->req.sector, - acb->req.nb_sectors, acb->req.qiov); + acb->req.nb_sectors, acb->req.qiov, 0); } else { acb->req.error = bdrv_co_do_writev(bs, acb->req.sector, acb->req.nb_sectors, acb->req.qiov); diff --git a/block.h b/block.h index 3bd439860..a3b0b8085 100644 --- a/block.h +++ b/block.h @@ -142,6 +142,8 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, const void *buf, int count); int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); +int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, diff --git a/block_int.h b/block_int.h index 311bd2a6f..07d67ed28 100644 --- a/block_int.h +++ b/block_int.h @@ -218,6 +218,9 @@ struct BlockDriverState { BlockDriverState *backing_hd; BlockDriverState *file; + /* number of in-flight copy-on-read requests */ + unsigned int copy_on_read_in_flight; + /* async read/write emulation */ void *sync_aiocb; diff --git a/trace-events b/trace-events index d2b0c6181..035d08fe7 100644 --- a/trace-events +++ b/trace-events @@ -65,9 +65,10 @@ bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs % bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d" bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" +bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_io_em(void *bs, int64_t sector_num, int nb_sectors, int is_write, void *acb) "bs %p sector_num %"PRId64" nb_sectors %d is_write %d acb %p" -bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d" +bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d" # hw/virtio-blk.c virtio_blk_req_complete(void *req, int status) "req %p status %d" From eeec61f291399115ba757421fd631e3414726f6f Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jan 2012 14:40:43 +0000 Subject: [PATCH 06/22] block: add BlockJob interface for long-running operations Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ block_int.h | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/block.c b/block.c index edfab49e3..2baac952f 100644 --- a/block.c +++ b/block.c @@ -3858,3 +3858,51 @@ out: return ret; } + +void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BlockJob *job; + + if (bs->job || bdrv_in_use(bs)) { + return NULL; + } + bdrv_set_in_use(bs, 1); + + job = g_malloc0(job_type->instance_size); + job->job_type = job_type; + job->bs = bs; + job->cb = cb; + job->opaque = opaque; + bs->job = job; + return job; +} + +void block_job_complete(BlockJob *job, int ret) +{ + BlockDriverState *bs = job->bs; + + assert(bs->job == job); + job->cb(job->opaque, ret); + bs->job = NULL; + g_free(job); + bdrv_set_in_use(bs, 0); +} + +int block_job_set_speed(BlockJob *job, int64_t value) +{ + if (!job->job_type->set_speed) { + return -ENOTSUP; + } + return job->job_type->set_speed(job, value); +} + +void block_job_cancel(BlockJob *job) +{ + job->cancelled = true; +} + +bool block_job_is_cancelled(BlockJob *job) +{ + return job->cancelled; +} diff --git a/block_int.h b/block_int.h index 07d67ed28..3566acb89 100644 --- a/block_int.h +++ b/block_int.h @@ -69,6 +69,36 @@ typedef struct BlockIOBaseValue { uint64_t ios[2]; } BlockIOBaseValue; +typedef void BlockJobCancelFunc(void *opaque); +typedef struct BlockJob BlockJob; +typedef struct BlockJobType { + /** Derived BlockJob struct size */ + size_t instance_size; + + /** String describing the operation, part of query-block-jobs QMP API */ + const char *job_type; + + /** Optional callback for job types that support setting a speed limit */ + int (*set_speed)(BlockJob *job, int64_t value); +} BlockJobType; + +/** + * Long-running operation on a BlockDriverState + */ +struct BlockJob { + const BlockJobType *job_type; + BlockDriverState *bs; + bool cancelled; + + /* These fields are published by the query-block-jobs QMP API */ + int64_t offset; + int64_t len; + int64_t speed; + + BlockDriverCompletionFunc *cb; + void *opaque; +}; + struct BlockDriver { const char *format_name; int instance_size; @@ -264,6 +294,9 @@ struct BlockDriverState { void *private; QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; + + /* long-running background operation */ + BlockJob *job; }; struct BlockDriverAIOCB { @@ -287,4 +320,11 @@ void bdrv_set_io_limits(BlockDriverState *bs, int is_windows_drive(const char *filename); #endif +void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque); +void block_job_complete(BlockJob *job, int ret); +int block_job_set_speed(BlockJob *job, int64_t value); +void block_job_cancel(BlockJob *job); +bool block_job_is_cancelled(BlockJob *job); + #endif /* BLOCK_INT_H */ From 4f1043b4ffdd6f130cab7c1d8d59d833a475adac Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jan 2012 14:40:44 +0000 Subject: [PATCH 07/22] block: add image streaming block job Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- Makefile.objs | 1 + block/stream.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++ block_int.h | 3 ++ trace-events | 4 ++ 4 files changed, 141 insertions(+) create mode 100644 block/stream.c diff --git a/Makefile.objs b/Makefile.objs index 48d14777a..06a147b0b 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -35,6 +35,7 @@ block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o block-nested-$(CONFIG_LIBISCSI) += iscsi.o diff --git a/block/stream.c b/block/stream.c new file mode 100644 index 000000000..3a9909480 --- /dev/null +++ b/block/stream.c @@ -0,0 +1,133 @@ +/* + * Image streaming + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "block_int.h" + +enum { + /* + * Size of data buffer for populating the image file. This should be large + * enough to process multiple clusters in a single call, so that populating + * contiguous regions of the image is efficient. + */ + STREAM_BUFFER_SIZE = 512 * 1024, /* in bytes */ +}; + +typedef struct StreamBlockJob { + BlockJob common; + BlockDriverState *base; +} StreamBlockJob; + +static int coroutine_fn stream_populate(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + void *buf) +{ + struct iovec iov = { + .iov_base = buf, + .iov_len = nb_sectors * BDRV_SECTOR_SIZE, + }; + QEMUIOVector qiov; + + qemu_iovec_init_external(&qiov, &iov, 1); + + /* Copy-on-read the unallocated clusters */ + return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov); +} + +static void coroutine_fn stream_run(void *opaque) +{ + StreamBlockJob *s = opaque; + BlockDriverState *bs = s->common.bs; + int64_t sector_num, end; + int ret = 0; + int n; + void *buf; + + s->common.len = bdrv_getlength(bs); + if (s->common.len < 0) { + block_job_complete(&s->common, s->common.len); + return; + } + + end = s->common.len >> BDRV_SECTOR_BITS; + buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE); + + /* Turn on copy-on-read for the whole block device so that guest read + * requests help us make progress. Only do this when copying the entire + * backing chain since the copy-on-read operation does not take base into + * account. + */ + if (!base) { + bdrv_enable_copy_on_read(bs); + } + + for (sector_num = 0; sector_num < end; sector_num += n) { + if (block_job_is_cancelled(&s->common)) { + break; + } + + /* TODO rate-limit */ + /* Note that even when no rate limit is applied we need to yield with + * no pending I/O here so that qemu_aio_flush() is able to return. + */ + co_sleep_ns(rt_clock, 0); + + ret = bdrv_co_is_allocated(bs, sector_num, + STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n); + trace_stream_one_iteration(s, sector_num, n, ret); + if (ret == 0) { + ret = stream_populate(bs, sector_num, n, buf); + } + if (ret < 0) { + break; + } + + /* Publish progress */ + s->common.offset += n * BDRV_SECTOR_SIZE; + } + + if (!base) { + bdrv_disable_copy_on_read(bs); + } + + if (sector_num == end && ret == 0) { + ret = bdrv_change_backing_file(bs, NULL, NULL); + } + + qemu_vfree(buf); + block_job_complete(&s->common, ret); +} + +static BlockJobType stream_job_type = { + .instance_size = sizeof(StreamBlockJob), + .job_type = "stream", +}; + +int stream_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverCompletionFunc *cb, void *opaque) +{ + StreamBlockJob *s; + Coroutine *co; + + s = block_job_create(&stream_job_type, bs, cb, opaque); + if (!s) { + return -EBUSY; /* bs must already be in use */ + } + + s->base = base; + + co = qemu_coroutine_create(stream_run); + trace_stream_start(bs, base, s, co, opaque); + qemu_coroutine_enter(co, s); + return 0; +} diff --git a/block_int.h b/block_int.h index 3566acb89..4f638705e 100644 --- a/block_int.h +++ b/block_int.h @@ -327,4 +327,7 @@ int block_job_set_speed(BlockJob *job, int64_t value); void block_job_cancel(BlockJob *job); bool block_job_is_cancelled(BlockJob *job); +int stream_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverCompletionFunc *cb, void *opaque); + #endif /* BLOCK_INT_H */ diff --git a/trace-events b/trace-events index 035d08fe7..f6a3cd1aa 100644 --- a/trace-events +++ b/trace-events @@ -70,6 +70,10 @@ bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %" bdrv_co_io_em(void *bs, int64_t sector_num, int nb_sectors, int is_write, void *acb) "bs %p sector_num %"PRId64" nb_sectors %d is_write %d acb %p" bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d" +# block/stream.c +stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" +stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p" + # hw/virtio-blk.c virtio_blk_req_complete(void *req, int status) "req %p status %d" virtio_blk_rw_complete(void *req, int ret) "req %p ret %d" From 5094a6c016f6e7a4fc800816d716e10ce2331396 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jan 2012 14:40:45 +0000 Subject: [PATCH 08/22] block: rate-limit streaming operations This patch implements rate-limiting for image streaming. If we've exceeded the bandwidth quota for a 100 ms time slice we sleep the coroutine until the next slice begins. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/stream.c | 65 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/block/stream.c b/block/stream.c index 3a9909480..b54b0b73c 100644 --- a/block/stream.c +++ b/block/stream.c @@ -23,8 +23,39 @@ enum { STREAM_BUFFER_SIZE = 512 * 1024, /* in bytes */ }; +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct { + int64_t next_slice_time; + uint64_t slice_quota; + uint64_t dispatched; +} RateLimit; + +static int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n) +{ + int64_t delay_ns = 0; + int64_t now = qemu_get_clock_ns(rt_clock); + + if (limit->next_slice_time < now) { + limit->next_slice_time = now + SLICE_TIME; + limit->dispatched = 0; + } + if (limit->dispatched + n > limit->slice_quota) { + delay_ns = limit->next_slice_time - now; + } else { + limit->dispatched += n; + } + return delay_ns; +} + +static void ratelimit_set_speed(RateLimit *limit, uint64_t speed) +{ + limit->slice_quota = speed / (1000000000ULL / SLICE_TIME); +} + typedef struct StreamBlockJob { BlockJob common; + RateLimit limit; BlockDriverState *base; } StreamBlockJob; @@ -72,20 +103,24 @@ static void coroutine_fn stream_run(void *opaque) } for (sector_num = 0; sector_num < end; sector_num += n) { +retry: if (block_job_is_cancelled(&s->common)) { break; } - /* TODO rate-limit */ - /* Note that even when no rate limit is applied we need to yield with - * no pending I/O here so that qemu_aio_flush() is able to return. - */ - co_sleep_ns(rt_clock, 0); - ret = bdrv_co_is_allocated(bs, sector_num, STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n); trace_stream_one_iteration(s, sector_num, n, ret); if (ret == 0) { + if (s->common.speed) { + uint64_t delay_ns = ratelimit_calculate_delay(&s->limit, n); + if (delay_ns > 0) { + co_sleep_ns(rt_clock, delay_ns); + + /* Recheck cancellation and that sectors are unallocated */ + goto retry; + } + } ret = stream_populate(bs, sector_num, n, buf); } if (ret < 0) { @@ -94,6 +129,11 @@ static void coroutine_fn stream_run(void *opaque) /* Publish progress */ s->common.offset += n * BDRV_SECTOR_SIZE; + + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that qemu_aio_flush() returns. + */ + co_sleep_ns(rt_clock, 0); } if (!base) { @@ -108,9 +148,22 @@ static void coroutine_fn stream_run(void *opaque) block_job_complete(&s->common, ret); } +static int stream_set_speed(BlockJob *job, int64_t value) +{ + StreamBlockJob *s = container_of(job, StreamBlockJob, common); + + if (value < 0) { + return -EINVAL; + } + job->speed = value; + ratelimit_set_speed(&s->limit, value / BDRV_SECTOR_SIZE); + return 0; +} + static BlockJobType stream_job_type = { .instance_size = sizeof(StreamBlockJob), .job_type = "stream", + .set_speed = stream_set_speed, }; int stream_start(BlockDriverState *bs, BlockDriverState *base, From 12bd451fe0be83474910bb63b5874458141d4230 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jan 2012 14:40:46 +0000 Subject: [PATCH 09/22] qmp: add block_stream command Add the block_stream command, which starts copy backing file contents into the image file. Also add the BLOCK_JOB_COMPLETED QMP event which is emitted when image streaming completes. Later patches add control over the background copy speed, cancelation, and querying running streaming operations. Signed-off-by: Stefan Hajnoczi Acked-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- QMP/qmp-events.txt | 29 ++++++++++++++++++++ blockdev.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++ hmp-commands.hx | 13 +++++++++ hmp.c | 11 ++++++++ hmp.h | 1 + monitor.c | 3 +++ monitor.h | 1 + qapi-schema.json | 31 +++++++++++++++++++++ qerror.c | 4 +++ qerror.h | 3 +++ qmp-commands.hx | 6 +++++ trace-events | 4 +++ 12 files changed, 173 insertions(+) diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt index af586ec85..0cd2275c3 100644 --- a/QMP/qmp-events.txt +++ b/QMP/qmp-events.txt @@ -264,3 +264,32 @@ Example: Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is followed respectively by the RESET, SHUTDOWN, or STOP events. + + +BLOCK_JOB_COMPLETED +------------------- + +Emitted when a block job has completed. + +Data: + +- "type": Job type ("stream" for image streaming, json-string) +- "device": Device name (json-string) +- "len": Maximum progress value (json-int) +- "offset": Current progress value (json-int) + On success this is equal to len. + On failure this is less than len. +- "speed": Rate limit, bytes per second (json-int) +- "error": Error message (json-string, optional) + Only present on failure. This field contains a human-readable + error message. There are no semantics other than that streaming + has failed and clients should not try to interpret the error + string. + +Example: + +{ "event": "BLOCK_JOB_COMPLETED", + "data": { "type": "stream", "device": "virtio-disk0", + "len": 10737418240, "offset": 10737418240, + "speed": 0 }, + "timestamp": { "seconds": 1267061043, "microseconds": 959568 } } diff --git a/blockdev.c b/blockdev.c index 0499ee6ae..06f179ef8 100644 --- a/blockdev.c +++ b/blockdev.c @@ -13,9 +13,11 @@ #include "qerror.h" #include "qemu-option.h" #include "qemu-config.h" +#include "qemu-objects.h" #include "sysemu.h" #include "block_int.h" #include "qmp-commands.h" +#include "trace.h" static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives); @@ -897,3 +899,68 @@ void qmp_block_resize(const char *device, int64_t size, Error **errp) break; } } + +static QObject *qobject_from_block_job(BlockJob *job) +{ + return qobject_from_jsonf("{ 'type': %s," + "'device': %s," + "'len': %" PRId64 "," + "'offset': %" PRId64 "," + "'speed': %" PRId64 " }", + job->job_type->job_type, + bdrv_get_device_name(job->bs), + job->len, + job->offset, + job->speed); +} + +static void block_stream_cb(void *opaque, int ret) +{ + BlockDriverState *bs = opaque; + QObject *obj; + + trace_block_stream_cb(bs, bs->job, ret); + + assert(bs->job); + obj = qobject_from_block_job(bs->job); + if (ret < 0) { + QDict *dict = qobject_to_qdict(obj); + qdict_put(dict, "error", qstring_from_str(strerror(-ret))); + } + + monitor_protocol_event(QEVENT_BLOCK_JOB_COMPLETED, obj); + qobject_decref(obj); +} + +void qmp_block_stream(const char *device, bool has_base, + const char *base, Error **errp) +{ + BlockDriverState *bs; + int ret; + + bs = bdrv_find(device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + + /* Base device not supported */ + if (base) { + error_set(errp, QERR_NOT_SUPPORTED); + return; + } + + ret = stream_start(bs, NULL, block_stream_cb, bs); + if (ret < 0) { + switch (ret) { + case -EBUSY: + error_set(errp, QERR_DEVICE_IN_USE, device); + return; + default: + error_set(errp, QERR_NOT_SUPPORTED); + return; + } + } + + trace_qmp_block_stream(bs, bs->job); +} diff --git a/hmp-commands.hx b/hmp-commands.hx index e6506fc9d..5f0959475 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -69,6 +69,19 @@ but should be used with extreme caution. Note that this command only resizes image files, it can not resize block devices like LVM volumes. ETEXI + { + .name = "block_stream", + .args_type = "device:B,base:s?", + .params = "device [base]", + .help = "copy data from a backing file into a block device", + .mhandler.cmd = hmp_block_stream, + }, + +STEXI +@item block_stream +@findex block_stream +Copy data from a backing file into a block device. +ETEXI { .name = "eject", diff --git a/hmp.c b/hmp.c index 4664dbe8e..cde8d7a5c 100644 --- a/hmp.c +++ b/hmp.c @@ -783,3 +783,14 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict) qdict_get_int(qdict, "iops_wr"), &err); hmp_handle_error(mon, &err); } + +void hmp_block_stream(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + const char *base = qdict_get_try_str(qdict, "base"); + + qmp_block_stream(device, base != NULL, base, &error); + + hmp_handle_error(mon, &error); +} diff --git a/hmp.h b/hmp.h index aab0b1f50..9234f28db 100644 --- a/hmp.h +++ b/hmp.h @@ -54,5 +54,6 @@ void hmp_expire_password(Monitor *mon, const QDict *qdict); void hmp_eject(Monitor *mon, const QDict *qdict); void hmp_change(Monitor *mon, const QDict *qdict); void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict); +void hmp_block_stream(Monitor *mon, const QDict *qdict); #endif diff --git a/monitor.c b/monitor.c index 187083c45..3d8cbfbee 100644 --- a/monitor.c +++ b/monitor.c @@ -479,6 +479,9 @@ void monitor_protocol_event(MonitorEvent event, QObject *data) case QEVENT_SPICE_DISCONNECTED: event_name = "SPICE_DISCONNECTED"; break; + case QEVENT_BLOCK_JOB_COMPLETED: + event_name = "BLOCK_JOB_COMPLETED"; + break; default: abort(); break; diff --git a/monitor.h b/monitor.h index 887c472a9..34d00d107 100644 --- a/monitor.h +++ b/monitor.h @@ -36,6 +36,7 @@ typedef enum MonitorEvent { QEVENT_SPICE_CONNECTED, QEVENT_SPICE_INITIALIZED, QEVENT_SPICE_DISCONNECTED, + QEVENT_BLOCK_JOB_COMPLETED, QEVENT_MAX, } MonitorEvent; diff --git a/qapi-schema.json b/qapi-schema.json index 735eb352b..9a1d9a936 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1434,3 +1434,34 @@ { 'command': 'block_set_io_throttle', 'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int' } } + +# @block_stream: +# +# Copy data from a backing file into a block device. +# +# The block streaming operation is performed in the background until the entire +# backing file has been copied. This command returns immediately once streaming +# has started. The status of ongoing block streaming operations can be checked +# with query-block-jobs. The operation can be stopped before it has completed +# using the block_job_cancel command. +# +# If a base file is specified then sectors are not copied from that base file and +# its backing chain. When streaming completes the image file will have the base +# file as its backing file. This can be used to stream a subset of the backing +# file chain instead of flattening the entire image. +# +# On successful completion the image file is updated to drop the backing file +# and the BLOCK_JOB_COMPLETED event is emitted. +# +# @device: the device name +# +# @base: #optional the common backing file name +# +# Returns: Nothing on success +# If streaming is already active on this device, DeviceInUse +# If @device does not exist, DeviceNotFound +# If image streaming is not supported by this device, NotSupported +# +# Since: 1.1 +## +{ 'command': 'block_stream', 'data': { 'device': 'str', '*base': 'str' } } diff --git a/qerror.c b/qerror.c index 3d9538394..2c60e1063 100644 --- a/qerror.c +++ b/qerror.c @@ -196,6 +196,10 @@ static const QErrorStringTable qerror_table[] = { .error_fmt = QERR_NO_BUS_FOR_DEVICE, .desc = "No '%(bus)' bus found for device '%(device)'", }, + { + .error_fmt = QERR_NOT_SUPPORTED, + .desc = "Not supported", + }, { .error_fmt = QERR_OPEN_FILE_FAILED, .desc = "Could not open '%(filename)'", diff --git a/qerror.h b/qerror.h index 89160dd78..b530bc880 100644 --- a/qerror.h +++ b/qerror.h @@ -168,6 +168,9 @@ QError *qobject_to_qerror(const QObject *obj); #define QERR_NO_BUS_FOR_DEVICE \ "{ 'class': 'NoBusForDevice', 'data': { 'device': %s, 'bus': %s } }" +#define QERR_NOT_SUPPORTED \ + "{ 'class': 'NotSupported', 'data': {} }" + #define QERR_OPEN_FILE_FAILED \ "{ 'class': 'OpenFileFailed', 'data': { 'filename': %s } }" diff --git a/qmp-commands.hx b/qmp-commands.hx index 799e65598..0dfc80e26 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -648,6 +648,12 @@ Example: EQMP + { + .name = "block_stream", + .args_type = "device:B,base:s?", + .mhandler.cmd_new = qmp_marshal_input_block_stream, + }, + { .name = "blockdev-snapshot-sync", .args_type = "device:B,snapshot-file:s,format:s?", diff --git a/trace-events b/trace-events index f6a3cd1aa..5edba21a8 100644 --- a/trace-events +++ b/trace-events @@ -74,6 +74,10 @@ bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t c stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p" +# blockdev.c +block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" +qmp_block_stream(void *bs, void *job) "bs %p job %p" + # hw/virtio-blk.c virtio_blk_req_complete(void *req, int status) "req %p status %d" virtio_blk_rw_complete(void *req, int ret) "req %p ret %d" From 2d47c6e9aa2475807913bd46dfca55980cca9fb4 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jan 2012 14:40:47 +0000 Subject: [PATCH 10/22] qmp: add block_job_set_speed command Add block_job_set_speed, which sets the maximum speed for a background block operation. Signed-off-by: Stefan Hajnoczi Acked-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- blockdev.c | 25 +++++++++++++++++++++++++ hmp-commands.hx | 14 ++++++++++++++ hmp.c | 11 +++++++++++ hmp.h | 1 + qapi-schema.json | 22 ++++++++++++++++++++++ qmp-commands.hx | 6 ++++++ 6 files changed, 79 insertions(+) diff --git a/blockdev.c b/blockdev.c index 06f179ef8..4c8fcdd31 100644 --- a/blockdev.c +++ b/blockdev.c @@ -964,3 +964,28 @@ void qmp_block_stream(const char *device, bool has_base, trace_qmp_block_stream(bs, bs->job); } + +static BlockJob *find_block_job(const char *device) +{ + BlockDriverState *bs; + + bs = bdrv_find(device); + if (!bs || !bs->job) { + return NULL; + } + return bs->job; +} + +void qmp_block_job_set_speed(const char *device, int64_t value, Error **errp) +{ + BlockJob *job = find_block_job(device); + + if (!job) { + error_set(errp, QERR_DEVICE_NOT_ACTIVE, device); + return; + } + + if (block_job_set_speed(job, value) < 0) { + error_set(errp, QERR_NOT_SUPPORTED); + } +} diff --git a/hmp-commands.hx b/hmp-commands.hx index 5f0959475..813705ee3 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -81,6 +81,20 @@ STEXI @item block_stream @findex block_stream Copy data from a backing file into a block device. +ETEXI + + { + .name = "block_job_set_speed", + .args_type = "device:B,value:o", + .params = "device value", + .help = "set maximum speed for a background block operation", + .mhandler.cmd = hmp_block_job_set_speed, + }, + +STEXI +@item block_job_set_stream +@findex block_job_set_stream +Set maximum speed for a background block operation. ETEXI { diff --git a/hmp.c b/hmp.c index cde8d7a5c..bf3d9df88 100644 --- a/hmp.c +++ b/hmp.c @@ -794,3 +794,14 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &error); } + +void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + int64_t value = qdict_get_int(qdict, "value"); + + qmp_block_job_set_speed(device, value, &error); + + hmp_handle_error(mon, &error); +} diff --git a/hmp.h b/hmp.h index 9234f28db..edd902077 100644 --- a/hmp.h +++ b/hmp.h @@ -55,5 +55,6 @@ void hmp_eject(Monitor *mon, const QDict *qdict); void hmp_change(Monitor *mon, const QDict *qdict); void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict); void hmp_block_stream(Monitor *mon, const QDict *qdict); +void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict); #endif diff --git a/qapi-schema.json b/qapi-schema.json index 9a1d9a936..d9f66c67f 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1465,3 +1465,25 @@ # Since: 1.1 ## { 'command': 'block_stream', 'data': { 'device': 'str', '*base': 'str' } } + +## +# @block_job_set_speed: +# +# Set maximum speed for a background block operation. +# +# This command can only be issued when there is an active block job. +# +# Throttling can be disabled by setting the speed to 0. +# +# @device: the device name +# +# @value: the maximum speed, in bytes per second +# +# Returns: Nothing on success +# If the job type does not support throttling, NotSupported +# If streaming is not active on this device, DeviceNotActive +# +# Since: 1.1 +## +{ 'command': 'block_job_set_speed', + 'data': { 'device': 'str', 'value': 'int' } } diff --git a/qmp-commands.hx b/qmp-commands.hx index 0dfc80e26..e4615ca79 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -654,6 +654,12 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_block_stream, }, + { + .name = "block_job_set_speed", + .args_type = "device:B,value:o", + .mhandler.cmd_new = qmp_marshal_input_block_job_set_speed, + }, + { .name = "blockdev-snapshot-sync", .args_type = "device:B,snapshot-file:s,format:s?", From 370521a1d6f5537ea7271c119f3fbb7b0fa57063 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jan 2012 14:40:48 +0000 Subject: [PATCH 11/22] qmp: add block_job_cancel command Add block_job_cancel, which stops an active block streaming operation. When the operation has been cancelled the new BLOCK_JOB_CANCELLED event is emitted. Signed-off-by: Stefan Hajnoczi Acked-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- QMP/qmp-events.txt | 24 ++++++++++++++++++++++++ blockdev.c | 19 ++++++++++++++++++- hmp-commands.hx | 14 ++++++++++++++ hmp.c | 10 ++++++++++ hmp.h | 1 + monitor.c | 3 +++ monitor.h | 1 + qapi-schema.json | 29 +++++++++++++++++++++++++++++ qmp-commands.hx | 6 ++++++ trace-events | 1 + 10 files changed, 107 insertions(+), 1 deletion(-) diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt index 0cd2275c3..06cb40483 100644 --- a/QMP/qmp-events.txt +++ b/QMP/qmp-events.txt @@ -293,3 +293,27 @@ Example: "len": 10737418240, "offset": 10737418240, "speed": 0 }, "timestamp": { "seconds": 1267061043, "microseconds": 959568 } } + + +BLOCK_JOB_CANCELLED +------------------- + +Emitted when a block job has been cancelled. + +Data: + +- "type": Job type ("stream" for image streaming, json-string) +- "device": Device name (json-string) +- "len": Maximum progress value (json-int) +- "offset": Current progress value (json-int) + On success this is equal to len. + On failure this is less than len. +- "speed": Rate limit, bytes per second (json-int) + +Example: + +{ "event": "BLOCK_JOB_CANCELLED", + "data": { "type": "stream", "device": "virtio-disk0", + "len": 10737418240, "offset": 134217728, + "speed": 0 }, + "timestamp": { "seconds": 1267061043, "microseconds": 959568 } } diff --git a/blockdev.c b/blockdev.c index 4c8fcdd31..d3d671805 100644 --- a/blockdev.c +++ b/blockdev.c @@ -928,7 +928,11 @@ static void block_stream_cb(void *opaque, int ret) qdict_put(dict, "error", qstring_from_str(strerror(-ret))); } - monitor_protocol_event(QEVENT_BLOCK_JOB_COMPLETED, obj); + if (block_job_is_cancelled(bs->job)) { + monitor_protocol_event(QEVENT_BLOCK_JOB_CANCELLED, obj); + } else { + monitor_protocol_event(QEVENT_BLOCK_JOB_COMPLETED, obj); + } qobject_decref(obj); } @@ -989,3 +993,16 @@ void qmp_block_job_set_speed(const char *device, int64_t value, Error **errp) error_set(errp, QERR_NOT_SUPPORTED); } } + +void qmp_block_job_cancel(const char *device, Error **errp) +{ + BlockJob *job = find_block_job(device); + + if (!job) { + error_set(errp, QERR_DEVICE_NOT_ACTIVE, device); + return; + } + + trace_qmp_block_job_cancel(job); + block_job_cancel(job); +} diff --git a/hmp-commands.hx b/hmp-commands.hx index 813705ee3..573b82334 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -95,6 +95,20 @@ STEXI @item block_job_set_stream @findex block_job_set_stream Set maximum speed for a background block operation. +ETEXI + + { + .name = "block_job_cancel", + .args_type = "device:B", + .params = "device", + .help = "stop an active block streaming operation", + .mhandler.cmd = hmp_block_job_cancel, + }, + +STEXI +@item block_job_cancel +@findex block_job_cancel +Stop an active block streaming operation. ETEXI { diff --git a/hmp.c b/hmp.c index bf3d9df88..867c33878 100644 --- a/hmp.c +++ b/hmp.c @@ -805,3 +805,13 @@ void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &error); } + +void hmp_block_job_cancel(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_cancel(device, &error); + + hmp_handle_error(mon, &error); +} diff --git a/hmp.h b/hmp.h index edd902077..eb4ca8285 100644 --- a/hmp.h +++ b/hmp.h @@ -56,5 +56,6 @@ void hmp_change(Monitor *mon, const QDict *qdict); void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict); void hmp_block_stream(Monitor *mon, const QDict *qdict); void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict); +void hmp_block_job_cancel(Monitor *mon, const QDict *qdict); #endif diff --git a/monitor.c b/monitor.c index 3d8cbfbee..9424c31ed 100644 --- a/monitor.c +++ b/monitor.c @@ -482,6 +482,9 @@ void monitor_protocol_event(MonitorEvent event, QObject *data) case QEVENT_BLOCK_JOB_COMPLETED: event_name = "BLOCK_JOB_COMPLETED"; break; + case QEVENT_BLOCK_JOB_CANCELLED: + event_name = "BLOCK_JOB_CANCELLED"; + break; default: abort(); break; diff --git a/monitor.h b/monitor.h index 34d00d107..b72ea0705 100644 --- a/monitor.h +++ b/monitor.h @@ -37,6 +37,7 @@ typedef enum MonitorEvent { QEVENT_SPICE_INITIALIZED, QEVENT_SPICE_DISCONNECTED, QEVENT_BLOCK_JOB_COMPLETED, + QEVENT_BLOCK_JOB_CANCELLED, QEVENT_MAX, } MonitorEvent; diff --git a/qapi-schema.json b/qapi-schema.json index d9f66c67f..02c4e2257 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1487,3 +1487,32 @@ ## { 'command': 'block_job_set_speed', 'data': { 'device': 'str', 'value': 'int' } } + +## +# @block_job_cancel: +# +# Stop an active block streaming operation. +# +# This command returns immediately after marking the active block streaming +# operation for cancellation. It is an error to call this command if no +# operation is in progress. +# +# The operation will cancel as soon as possible and then emit the +# BLOCK_JOB_CANCELLED event. Before that happens the job is still visible when +# enumerated using query-block-jobs. +# +# The image file retains its backing file unless the streaming operation happens +# to complete just as it is being cancelled. +# +# A new block streaming operation can be started at a later time to finish +# copying all data from the backing file. +# +# @device: the device name +# +# Returns: Nothing on success +# If streaming is not active on this device, DeviceNotActive +# If cancellation already in progress, DeviceInUse +# +# Since: 1.1 +## +{ 'command': 'block_job_cancel', 'data': { 'device': 'str' } } diff --git a/qmp-commands.hx b/qmp-commands.hx index e4615ca79..0795c5d48 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -660,6 +660,12 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_block_job_set_speed, }, + { + .name = "block_job_cancel", + .args_type = "device:B", + .mhandler.cmd_new = qmp_marshal_input_block_job_cancel, + }, + { .name = "blockdev-snapshot-sync", .args_type = "device:B,snapshot-file:s,format:s?", diff --git a/trace-events b/trace-events index 5edba21a8..ad77e0a61 100644 --- a/trace-events +++ b/trace-events @@ -75,6 +75,7 @@ stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocat stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p" # blockdev.c +qmp_block_job_cancel(void *job) "job %p" block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" qmp_block_stream(void *bs, void *job) "bs %p job %p" From fb5458cd10a199e55e622a906b24f8085d922c0f Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jan 2012 14:40:49 +0000 Subject: [PATCH 12/22] qmp: add query-block-jobs Add query-block-jobs, which shows the progress of ongoing block device operations. Signed-off-by: Stefan Hajnoczi Acked-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- blockdev.c | 33 +++++++++++++++++++++++++++++++++ hmp.c | 36 ++++++++++++++++++++++++++++++++++++ hmp.h | 1 + monitor.c | 7 +++++++ qapi-schema.json | 32 ++++++++++++++++++++++++++++++++ qmp-commands.hx | 6 ++++++ 6 files changed, 115 insertions(+) diff --git a/blockdev.c b/blockdev.c index d3d671805..d026cd4fd 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1006,3 +1006,36 @@ void qmp_block_job_cancel(const char *device, Error **errp) trace_qmp_block_job_cancel(job); block_job_cancel(job); } + +static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs) +{ + BlockJobInfoList **prev = opaque; + BlockJob *job = bs->job; + + if (job) { + BlockJobInfoList *elem; + BlockJobInfo *info = g_new(BlockJobInfo, 1); + *info = (BlockJobInfo){ + .type = g_strdup(job->job_type->job_type), + .device = g_strdup(bdrv_get_device_name(bs)), + .len = job->len, + .offset = job->offset, + .speed = job->speed, + }; + + elem = g_new0(BlockJobInfoList, 1); + elem->value = info; + + (*prev)->next = elem; + *prev = elem; + } +} + +BlockJobInfoList *qmp_query_block_jobs(Error **errp) +{ + /* Dummy is a fake list element for holding the head pointer */ + BlockJobInfoList dummy = {}; + BlockJobInfoList *prev = &dummy; + bdrv_iterate(do_qmp_query_block_jobs_one, &prev); + return dummy.next; +} diff --git a/hmp.c b/hmp.c index 867c33878..8ff8c9434 100644 --- a/hmp.c +++ b/hmp.c @@ -509,6 +509,42 @@ void hmp_info_pci(Monitor *mon) qapi_free_PciInfoList(info_list); } +void hmp_info_block_jobs(Monitor *mon) +{ + BlockJobInfoList *list; + Error *err = NULL; + + list = qmp_query_block_jobs(&err); + assert(!err); + + if (!list) { + monitor_printf(mon, "No active jobs\n"); + return; + } + + while (list) { + if (strcmp(list->value->type, "stream") == 0) { + monitor_printf(mon, "Streaming device %s: Completed %" PRId64 + " of %" PRId64 " bytes, speed limit %" PRId64 + " bytes/s\n", + list->value->device, + list->value->offset, + list->value->len, + list->value->speed); + } else { + monitor_printf(mon, "Type %s, device %s: Completed %" PRId64 + " of %" PRId64 " bytes, speed limit %" PRId64 + " bytes/s\n", + list->value->type, + list->value->device, + list->value->offset, + list->value->len, + list->value->speed); + } + list = list->next; + } +} + void hmp_quit(Monitor *mon, const QDict *qdict) { monitor_suspend(mon); diff --git a/hmp.h b/hmp.h index eb4ca8285..18eecbdcb 100644 --- a/hmp.h +++ b/hmp.h @@ -32,6 +32,7 @@ void hmp_info_vnc(Monitor *mon); void hmp_info_spice(Monitor *mon); void hmp_info_balloon(Monitor *mon); void hmp_info_pci(Monitor *mon); +void hmp_info_block_jobs(Monitor *mon); void hmp_quit(Monitor *mon, const QDict *qdict); void hmp_stop(Monitor *mon, const QDict *qdict); void hmp_system_reset(Monitor *mon, const QDict *qdict); diff --git a/monitor.c b/monitor.c index 9424c31ed..aadbdcbf3 100644 --- a/monitor.c +++ b/monitor.c @@ -2317,6 +2317,13 @@ static mon_cmd_t info_cmds[] = { .help = "show block device statistics", .mhandler.info = hmp_info_blockstats, }, + { + .name = "block-jobs", + .args_type = "", + .params = "", + .help = "show progress of ongoing block device operations", + .mhandler.info = hmp_info_block_jobs, + }, { .name = "registers", .args_type = "", diff --git a/qapi-schema.json b/qapi-schema.json index 02c4e2257..3f72c2cfd 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -844,6 +844,38 @@ ## { 'command': 'query-pci', 'returns': ['PciInfo'] } +## +# @BlockJobInfo: +# +# Information about a long-running block device operation. +# +# @type: the job type ('stream' for image streaming) +# +# @device: the block device name +# +# @len: the maximum progress value +# +# @offset: the current progress value +# +# @speed: the rate limit, bytes per second +# +# Since: 1.1 +## +{ 'type': 'BlockJobInfo', + 'data': {'type': 'str', 'device': 'str', 'len': 'int', + 'offset': 'int', 'speed': 'int'} } + +## +# @query-block-jobs: +# +# Return information about long-running block device operations. +# +# Returns: a list of @BlockJobInfo for each active block job +# +# Since: 1.1 +## +{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] } + ## # @quit: # diff --git a/qmp-commands.hx b/qmp-commands.hx index 0795c5d48..bd6b6410a 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2013,6 +2013,12 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_query_balloon, }, + { + .name = "query-block-jobs", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_block_jobs, + }, + { .name = "qom-list", .args_type = "path:s", From aa398a5c3a4c0fc29baf02aee5283a7fa0f202a3 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 18 Jan 2012 14:40:50 +0000 Subject: [PATCH 13/22] blockdev: make image streaming safe across hotplug Unplugging a storage interface like virtio-blk causes the host block device to be deleted too. Long-running operations like block migration must take a DriveInfo reference to prevent the BlockDriverState from being freed. For image streaming we can do the same thing. Note that it is not possible to acquire/release the drive reference in block.c where the block job functions live because drive_get_ref()/drive_put_ref() are blockdev.c functions. Calling them from block.c would be a layering violation - tools like qemu-img don't even link against blockdev.c. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- blockdev.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/blockdev.c b/blockdev.c index d026cd4fd..2fa01516f 100644 --- a/blockdev.c +++ b/blockdev.c @@ -202,6 +202,37 @@ void drive_get_ref(DriveInfo *dinfo) dinfo->refcount++; } +typedef struct { + QEMUBH *bh; + DriveInfo *dinfo; +} DrivePutRefBH; + +static void drive_put_ref_bh(void *opaque) +{ + DrivePutRefBH *s = opaque; + + drive_put_ref(s->dinfo); + qemu_bh_delete(s->bh); + g_free(s); +} + +/* + * Release a drive reference in a BH + * + * It is not possible to use drive_put_ref() from a callback function when the + * callers still need the drive. In such cases we schedule a BH to release the + * reference. + */ +static void drive_put_ref_bh_schedule(DriveInfo *dinfo) +{ + DrivePutRefBH *s; + + s = g_new(DrivePutRefBH, 1); + s->bh = qemu_bh_new(drive_put_ref_bh, s); + s->dinfo = dinfo; + qemu_bh_schedule(s->bh); +} + static int parse_block_error_action(const char *buf, int is_read) { if (!strcmp(buf, "ignore")) { @@ -934,6 +965,8 @@ static void block_stream_cb(void *opaque, int ret) monitor_protocol_event(QEVENT_BLOCK_JOB_COMPLETED, obj); } qobject_decref(obj); + + drive_put_ref_bh_schedule(drive_get_by_blockdev(bs)); } void qmp_block_stream(const char *device, bool has_base, @@ -966,6 +999,11 @@ void qmp_block_stream(const char *device, bool has_base, } } + /* Grab a reference so hotplug does not delete the BlockDriverState from + * underneath us. + */ + drive_get_ref(drive_get_by_blockdev(bs)); + trace_qmp_block_stream(bs, bs->job); } From e8a6bb9caa5379b0fcaca4e0a12aa6d2913961f3 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 18 Jan 2012 14:40:51 +0000 Subject: [PATCH 14/22] block: add bdrv_find_backing_image Add bdrv_find_backing_image: given a BlockDriverState pointer, and an id, traverse the backing image chain to locate the id. Signed-off-by: Marcelo Tosatti Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block.c | 18 ++++++++++++++++++ block.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/block.c b/block.c index 2baac952f..3621d11de 100644 --- a/block.c +++ b/block.c @@ -2595,6 +2595,24 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, return -ENOTSUP; } +BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, + const char *backing_file) +{ + if (!bs->drv) { + return NULL; + } + + if (bs->backing_hd) { + if (strcmp(bs->backing_file, backing_file) == 0) { + return bs->backing_hd; + } else { + return bdrv_find_backing_image(bs->backing_hd, backing_file); + } + } + + return NULL; +} + #define NB_SUFFIXES 4 char *get_human_readable_size(char *buf, int buf_size, int64_t size) diff --git a/block.h b/block.h index a3b0b8085..cae289b2f 100644 --- a/block.h +++ b/block.h @@ -148,6 +148,8 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); +BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, + const char *backing_file); int bdrv_truncate(BlockDriverState *bs, int64_t offset); int64_t bdrv_getlength(BlockDriverState *bs); int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); From 019b8cbf76fc3126d300c401acca3102c69e7876 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 18 Jan 2012 14:40:52 +0000 Subject: [PATCH 15/22] add QERR_BASE_NOT_FOUND This qerror will be raised when a given streaming base (backing file) cannot be found. Signed-off-by: Marcelo Tosatti Signed-off-by: Stefan Hajnoczi Acked-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- qapi-schema.json | 1 + qerror.c | 4 ++++ qerror.h | 3 +++ 3 files changed, 8 insertions(+) diff --git a/qapi-schema.json b/qapi-schema.json index 3f72c2cfd..80debe679 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1493,6 +1493,7 @@ # If streaming is already active on this device, DeviceInUse # If @device does not exist, DeviceNotFound # If image streaming is not supported by this device, NotSupported +# If @base does not exist, BaseNotFound # # Since: 1.1 ## diff --git a/qerror.c b/qerror.c index 2c60e1063..637eca793 100644 --- a/qerror.c +++ b/qerror.c @@ -51,6 +51,10 @@ static const QErrorStringTable qerror_table[] = { .error_fmt = QERR_BAD_BUS_FOR_DEVICE, .desc = "Device '%(device)' can't go on a %(bad_bus_type) bus", }, + { + .error_fmt = QERR_BASE_NOT_FOUND, + .desc = "Base '%(base)' not found", + }, { .error_fmt = QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, .desc = "Block format '%(format)' used by device '%(name)' does not support feature '%(feature)'", diff --git a/qerror.h b/qerror.h index b530bc880..8c36ddb7e 100644 --- a/qerror.h +++ b/qerror.h @@ -57,6 +57,9 @@ QError *qobject_to_qerror(const QObject *obj); #define QERR_BAD_BUS_FOR_DEVICE \ "{ 'class': 'BadBusForDevice', 'data': { 'device': %s, 'bad_bus_type': %s } }" +#define QERR_BASE_NOT_FOUND \ + "{ 'class': 'BaseNotFound', 'data': { 'base': %s } }" + #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \ "{ 'class': 'BlockFormatFeatureNotSupported', 'data': { 'format': %s, 'name': %s, 'feature': %s } }" From c8c3080f4a6fbbf3c9c5d6efd1b49e7ca6479d1e Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 18 Jan 2012 14:40:53 +0000 Subject: [PATCH 16/22] block: add support for partial streaming Add support for streaming data from an intermediate section of the image chain (see patch and documentation for details). Signed-off-by: Marcelo Tosatti Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/stream.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++--- block_int.h | 3 +- blockdev.c | 11 +++--- 3 files changed, 96 insertions(+), 9 deletions(-) diff --git a/block/stream.c b/block/stream.c index b54b0b73c..d1b3986a8 100644 --- a/block/stream.c +++ b/block/stream.c @@ -57,6 +57,7 @@ typedef struct StreamBlockJob { BlockJob common; RateLimit limit; BlockDriverState *base; + char backing_file_id[1024]; } StreamBlockJob; static int coroutine_fn stream_populate(BlockDriverState *bs, @@ -75,10 +76,76 @@ static int coroutine_fn stream_populate(BlockDriverState *bs, return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov); } +/* + * Given an image chain: [BASE] -> [INTER1] -> [INTER2] -> [TOP] + * + * Return true if the given sector is allocated in top. + * Return false if the given sector is allocated in intermediate images. + * Return true otherwise. + * + * 'pnum' is set to the number of sectors (including and immediately following + * the specified sector) that are known to be in the same + * allocated/unallocated state. + * + */ +static int coroutine_fn is_allocated_base(BlockDriverState *top, + BlockDriverState *base, + int64_t sector_num, + int nb_sectors, int *pnum) +{ + BlockDriverState *intermediate; + int ret, n; + + ret = bdrv_co_is_allocated(top, sector_num, nb_sectors, &n); + if (ret) { + *pnum = n; + return ret; + } + + /* + * Is the unallocated chunk [sector_num, n] also + * unallocated between base and top? + */ + intermediate = top->backing_hd; + + while (intermediate) { + int pnum_inter; + + /* reached base */ + if (intermediate == base) { + *pnum = n; + return 1; + } + ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors, + &pnum_inter); + if (ret < 0) { + return ret; + } else if (ret) { + *pnum = pnum_inter; + return 0; + } + + /* + * [sector_num, nb_sectors] is unallocated on top but intermediate + * might have + * + * [sector_num+x, nr_sectors] allocated. + */ + if (n > pnum_inter) { + n = pnum_inter; + } + + intermediate = intermediate->backing_hd; + } + + return 1; +} + static void coroutine_fn stream_run(void *opaque) { StreamBlockJob *s = opaque; BlockDriverState *bs = s->common.bs; + BlockDriverState *base = s->base; int64_t sector_num, end; int ret = 0; int n; @@ -108,8 +175,15 @@ retry: break; } - ret = bdrv_co_is_allocated(bs, sector_num, - STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n); + + if (base) { + ret = is_allocated_base(bs, base, sector_num, + STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n); + } else { + ret = bdrv_co_is_allocated(bs, sector_num, + STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, + &n); + } trace_stream_one_iteration(s, sector_num, n, ret); if (ret == 0) { if (s->common.speed) { @@ -126,6 +200,7 @@ retry: if (ret < 0) { break; } + ret = 0; /* Publish progress */ s->common.offset += n * BDRV_SECTOR_SIZE; @@ -141,7 +216,11 @@ retry: } if (sector_num == end && ret == 0) { - ret = bdrv_change_backing_file(bs, NULL, NULL); + const char *base_id = NULL; + if (base) { + base_id = s->backing_file_id; + } + ret = bdrv_change_backing_file(bs, base_id, NULL); } qemu_vfree(buf); @@ -167,7 +246,8 @@ static BlockJobType stream_job_type = { }; int stream_start(BlockDriverState *bs, BlockDriverState *base, - BlockDriverCompletionFunc *cb, void *opaque) + const char *base_id, BlockDriverCompletionFunc *cb, + void *opaque) { StreamBlockJob *s; Coroutine *co; @@ -178,6 +258,9 @@ int stream_start(BlockDriverState *bs, BlockDriverState *base, } s->base = base; + if (base_id) { + pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id); + } co = qemu_coroutine_create(stream_run); trace_stream_start(bs, base, s, co, opaque); diff --git a/block_int.h b/block_int.h index 4f638705e..7be2988ca 100644 --- a/block_int.h +++ b/block_int.h @@ -328,6 +328,7 @@ void block_job_cancel(BlockJob *job); bool block_job_is_cancelled(BlockJob *job); int stream_start(BlockDriverState *bs, BlockDriverState *base, - BlockDriverCompletionFunc *cb, void *opaque); + const char *base_id, BlockDriverCompletionFunc *cb, + void *opaque); #endif /* BLOCK_INT_H */ diff --git a/blockdev.c b/blockdev.c index 2fa01516f..7e4c54842 100644 --- a/blockdev.c +++ b/blockdev.c @@ -973,6 +973,7 @@ void qmp_block_stream(const char *device, bool has_base, const char *base, Error **errp) { BlockDriverState *bs; + BlockDriverState *base_bs = NULL; int ret; bs = bdrv_find(device); @@ -981,13 +982,15 @@ void qmp_block_stream(const char *device, bool has_base, return; } - /* Base device not supported */ if (base) { - error_set(errp, QERR_NOT_SUPPORTED); - return; + base_bs = bdrv_find_backing_image(bs, base); + if (base_bs == NULL) { + error_set(errp, QERR_BASE_NOT_FOUND, base); + return; + } } - ret = stream_start(bs, NULL, block_stream_cb, bs); + ret = stream_start(bs, base_bs, base, block_stream_cb, bs); if (ret < 0) { switch (ret) { case -EBUSY: From 094f1ba10ab341b8d1306e863698a07b2afa6b09 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 18 Jan 2012 14:40:54 +0000 Subject: [PATCH 17/22] docs: describe live block operations Signed-off-by: Marcelo Tosatti Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- docs/live-block-ops.txt | 58 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 docs/live-block-ops.txt diff --git a/docs/live-block-ops.txt b/docs/live-block-ops.txt new file mode 100644 index 000000000..a25708740 --- /dev/null +++ b/docs/live-block-ops.txt @@ -0,0 +1,58 @@ +LIVE BLOCK OPERATIONS +===================== + +High level description of live block operations. Note these are not +supported for use with the raw format at the moment. + +Snapshot live merge +=================== + +Given a snapshot chain, described in this document in the following +format: + +[A] -> [B] -> [C] -> [D] + +Where the rightmost object ([D] in the example) described is the current +image which the guest OS has write access to. To the left of it is its base +image, and so on accordingly until the leftmost image, which has no +base. + +The snapshot live merge operation transforms such a chain into a +smaller one with fewer elements, such as this transformation relative +to the first example: + +[A] -> [D] + +Currently only forward merge with target being the active image is +supported, that is, data copy is performed in the right direction with +destination being the rightmost image. + +The operation is implemented in QEMU through image streaming facilities. + +The basic idea is to execute 'block_stream virtio0' while the guest is +running. Progress can be monitored using 'info block-jobs'. When the +streaming operation completes it raises a QMP event. 'block_stream' +copies data from the backing file(s) into the active image. When finished, +it adjusts the backing file pointer. + +The 'base' parameter specifies an image which data need not be streamed from. +This image will be used as the backing file for the active image when the +operation is finished. + +In the example above, the command would be: + +(qemu) block_stream virtio0 A + + +Live block copy +=============== + +To copy an in use image to another destination in the filesystem, one +should create a live snapshot in the desired destination, then stream +into that image. Example: + +(qemu) snapshot_blkdev ide0-hd0 /new-path/disk.img qcow2 + +(qemu) block_stream ide0-hd0 + + From 81b6b9faef111f4493addc2ed19903feace332bf Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 22 Dec 2011 13:17:02 +0000 Subject: [PATCH 18/22] virtio-blk: add virtio_blk_handle_read trace event There already exists a virtio_blk_handle_write trace event as well as completion events. Add the virtio_blk_handle_read event so it's easy to trace virtio-blk requests for both read and write operations. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/virtio-blk.c | 2 ++ trace-events | 1 + 2 files changed, 3 insertions(+) diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 5b416c36e..a5a439668 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -346,6 +346,8 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req) bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ); + trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512); + if (sector & req->dev->sector_mask) { virtio_blk_rw_complete(req, -EIO); return; diff --git a/trace-events b/trace-events index ad77e0a61..75f6e17ab 100644 --- a/trace-events +++ b/trace-events @@ -83,6 +83,7 @@ qmp_block_stream(void *bs, void *job) "bs %p job %p" virtio_blk_req_complete(void *req, int status) "req %p status %d" virtio_blk_rw_complete(void *req, int ret) "req %p ret %d" virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" +virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" # posix-aio-compat.c paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d" From 641543b76b82a8b361482b727e08de0c8ec093b0 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 21 Jan 2012 13:54:24 +0100 Subject: [PATCH 19/22] block/vdi: Zero unused parts when allocating a new block (fix #919242) The new block was filled with zero when it was allocated by g_malloc0, but when it was reused later and only partially used, data from the previously allocated block were still present and written to the new block. This caused the problems reported by bug #919242 (https://bugs.launchpad.net/qemu/+bug/919242). Now the unused parts of the new block which are before and after the data are always filled with zero, so it is no longer necessary to zero the whole block with g_malloc0. I also updated the copyright comment. Signed-off-by: Stefan Weil Signed-off-by: Kevin Wolf --- block/vdi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/block/vdi.c b/block/vdi.c index 31cdfabde..6a0011fbc 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -1,7 +1,7 @@ /* * Block driver for the Virtual Disk Image (VDI) format * - * Copyright (c) 2009 Stefan Weil + * Copyright (c) 2009, 2012 Stefan Weil * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -756,15 +756,19 @@ static void vdi_aio_write_cb(void *opaque, int ret) (uint64_t)bmap_entry * s->block_sectors; block = acb->block_buffer; if (block == NULL) { - block = g_malloc0(s->block_size); + block = g_malloc(s->block_size); acb->block_buffer = block; acb->bmap_first = block_index; assert(!acb->header_modified); acb->header_modified = 1; } acb->bmap_last = block_index; + /* Copy data to be written to new block and zero unused parts. */ + memset(block, 0, sector_in_block * SECTOR_SIZE); memcpy(block + sector_in_block * SECTOR_SIZE, acb->buf, n_sectors * SECTOR_SIZE); + memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0, + (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE); acb->hd_iov.iov_base = (void *)block; acb->hd_iov.iov_len = s->block_size; qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); From 84b0ec020f0189b2c73437919df21c87be74b91f Mon Sep 17 00:00:00 2001 From: Li Zhi Hui Date: Thu, 15 Dec 2011 18:14:00 +0800 Subject: [PATCH 20/22] qcow: Return real error code in qcow_open Signed-off-by: Li Zhi Hui Signed-off-by: Kevin Wolf --- block/qcow.c | 56 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/block/qcow.c b/block/qcow.c index b16955d76..e0d0b8870 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -95,11 +95,13 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) static int qcow_open(BlockDriverState *bs, int flags) { BDRVQcowState *s = bs->opaque; - int len, i, shift; + int len, i, shift, ret; QCowHeader header; - if (bdrv_pread(bs->file, 0, &header, sizeof(header)) != sizeof(header)) + ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); + if (ret < 0) { goto fail; + } be32_to_cpus(&header.magic); be32_to_cpus(&header.version); be64_to_cpus(&header.backing_file_offset); @@ -109,15 +111,31 @@ static int qcow_open(BlockDriverState *bs, int flags) be32_to_cpus(&header.crypt_method); be64_to_cpus(&header.l1_table_offset); - if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION) + if (header.magic != QCOW_MAGIC) { + ret = -EINVAL; goto fail; - if (header.size <= 1 || header.cluster_bits < 9) + } + if (header.version != QCOW_VERSION) { + char version[64]; + snprintf(version, sizeof(version), "QCOW version %d", header.version); + qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, + bs->device_name, "qcow", version); + ret = -ENOTSUP; goto fail; - if (header.crypt_method > QCOW_CRYPT_AES) + } + + if (header.size <= 1 || header.cluster_bits < 9) { + ret = -EINVAL; goto fail; + } + if (header.crypt_method > QCOW_CRYPT_AES) { + ret = -EINVAL; + goto fail; + } s->crypt_method_header = header.crypt_method; - if (s->crypt_method_header) + if (s->crypt_method_header) { bs->encrypted = 1; + } s->cluster_bits = header.cluster_bits; s->cluster_size = 1 << s->cluster_bits; s->cluster_sectors = 1 << (s->cluster_bits - 9); @@ -132,33 +150,33 @@ static int qcow_open(BlockDriverState *bs, int flags) s->l1_table_offset = header.l1_table_offset; s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t)); - if (!s->l1_table) - goto fail; - if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != - s->l1_size * sizeof(uint64_t)) + + ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, + s->l1_size * sizeof(uint64_t)); + if (ret < 0) { goto fail; + } + for(i = 0;i < s->l1_size; i++) { be64_to_cpus(&s->l1_table[i]); } /* alloc L2 cache */ s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); - if (!s->l2_cache) - goto fail; s->cluster_cache = g_malloc(s->cluster_size); - if (!s->cluster_cache) - goto fail; s->cluster_data = g_malloc(s->cluster_size); - if (!s->cluster_data) - goto fail; s->cluster_cache_offset = -1; /* read the backing file name */ if (header.backing_file_offset != 0) { len = header.backing_file_size; - if (len > 1023) + if (len > 1023) { len = 1023; - if (bdrv_pread(bs->file, header.backing_file_offset, bs->backing_file, len) != len) + } + ret = bdrv_pread(bs->file, header.backing_file_offset, + bs->backing_file, len); + if (ret < 0) { goto fail; + } bs->backing_file[len] = '\0'; } @@ -176,7 +194,7 @@ static int qcow_open(BlockDriverState *bs, int flags) g_free(s->l2_cache); g_free(s->cluster_cache); g_free(s->cluster_data); - return -1; + return ret; } static int qcow_set_key(BlockDriverState *bs, const char *key) From 2b16c9ffb2432f2d0f63647df6735a67b0333a51 Mon Sep 17 00:00:00 2001 From: Li Zhi Hui Date: Mon, 21 Nov 2011 15:40:39 +0800 Subject: [PATCH 21/22] qcow: Use bdrv functions to replace file operation Since common file operation functions lack of error detection and use much more I/O syscalls, so change them to bdrv series functions and reduce I/O request. Signed-off-by: Li Zhi Hui Signed-off-by: Kevin Wolf --- block/qcow.c | 54 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/block/qcow.c b/block/qcow.c index e0d0b8870..b1cfe1f69 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -644,13 +644,14 @@ static void qcow_close(BlockDriverState *bs) static int qcow_create(const char *filename, QEMUOptionParameter *options) { - int fd, header_size, backing_filename_len, l1_size, i, shift; + int header_size, backing_filename_len, l1_size, shift, i; QCowHeader header; - uint64_t tmp; + uint8_t *tmp; int64_t total_size = 0; const char *backing_file = NULL; int flags = 0; int ret; + BlockDriverState *qcow_bs; /* Read out options */ while (options && options->name) { @@ -664,9 +665,21 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options) options++; } - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); - if (fd < 0) - return -errno; + ret = bdrv_create_file(filename, options); + if (ret < 0) { + return ret; + } + + ret = bdrv_file_open(&qcow_bs, filename, BDRV_O_RDWR); + if (ret < 0) { + return ret; + } + + ret = bdrv_truncate(qcow_bs, 0); + if (ret < 0) { + goto exit; + } + memset(&header, 0, sizeof(header)); header.magic = cpu_to_be32(QCOW_MAGIC); header.version = cpu_to_be32(QCOW_VERSION); @@ -702,33 +715,34 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options) } /* write all the data */ - ret = qemu_write_full(fd, &header, sizeof(header)); + ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header)); if (ret != sizeof(header)) { - ret = -errno; goto exit; } if (backing_file) { - ret = qemu_write_full(fd, backing_file, backing_filename_len); + ret = bdrv_pwrite(qcow_bs, sizeof(header), + backing_file, backing_filename_len); if (ret != backing_filename_len) { - ret = -errno; - goto exit; - } - - } - lseek(fd, header_size, SEEK_SET); - tmp = 0; - for(i = 0;i < l1_size; i++) { - ret = qemu_write_full(fd, &tmp, sizeof(tmp)); - if (ret != sizeof(tmp)) { - ret = -errno; goto exit; } } + tmp = g_malloc0(BDRV_SECTOR_SIZE); + for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/ + BDRV_SECTOR_SIZE); i++) { + ret = bdrv_pwrite(qcow_bs, header_size + + BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE); + if (ret != BDRV_SECTOR_SIZE) { + g_free(tmp); + goto exit; + } + } + + g_free(tmp); ret = 0; exit: - close(fd); + bdrv_delete(qcow_bs); return ret; } From e2f0c49ffae8d3a00272c3cbc68850cc5aafbffa Mon Sep 17 00:00:00 2001 From: Thomas Higdon Date: Tue, 24 Jan 2012 12:19:44 -0500 Subject: [PATCH 22/22] scsi: Guard against buflen exceeding req->cmd.xfer in scsi_disk_emulate_command Limit the return value (corresponding to the length of the buffer to be DMAed back to the intiator) to the value in req->cmd.xfer, which is the amount of data that the initiator expects. Eliminate now-duplicate code that does this guarding in the functions for individual commands. Without this, the SCRIPTS code in the emulated LSI device eventually raises a DMA interrupt for a data overrun when an INQUIRY command whose buflen exceeds req->cmd.xfer is processed. It's the responsibility of the client to provide a request buffer and allocation length that are large enough for the result of the command. Signed-off-by: Thomas Higdon Reviewed-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- hw/scsi-disk.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 5d8bf5358..11cfe73df 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -391,9 +391,6 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) } l = strlen(s->serial); - if (l > req->cmd.xfer) { - l = req->cmd.xfer; - } if (l > 20) { l = 20; } @@ -1002,9 +999,6 @@ static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf) outbuf[0] = ((buflen - 2) >> 8) & 0xff; outbuf[1] = (buflen - 2) & 0xff; } - if (buflen > r->req.cmd.xfer) { - buflen = r->req.cmd.xfer; - } return buflen; } @@ -1038,9 +1032,6 @@ static int scsi_disk_emulate_read_toc(SCSIRequest *req, uint8_t *outbuf) default: return -1; } - if (toclen > req->cmd.xfer) { - toclen = req->cmd.xfer; - } return toclen; } @@ -1251,6 +1242,7 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r) scsi_check_condition(r, SENSE_CODE(INVALID_OPCODE)); return -1; } + buflen = MIN(buflen, req->cmd.xfer); return buflen; not_ready: