diff --git a/block.c b/block.c index 3621d11de..ae297bb84 100644 --- a/block.c +++ b/block.c @@ -50,6 +50,7 @@ typedef enum { BDRV_REQ_COPY_ON_READ = 0x1, + BDRV_REQ_ZERO_WRITE = 0x2, } BdrvRequestFlags; static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load); @@ -69,7 +70,8 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, BdrvRequestFlags flags); static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + BdrvRequestFlags flags); static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, @@ -1300,7 +1302,7 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque) rwco->nb_sectors, rwco->qiov, 0); } else { rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num, - rwco->nb_sectors, rwco->qiov); + rwco->nb_sectors, rwco->qiov, 0); } } @@ -1515,6 +1517,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, */ void *bounce_buffer; + BlockDriver *drv = bs->drv; struct iovec iov; QEMUIOVector bounce_qiov; int64_t cluster_sector_num; @@ -1535,14 +1538,21 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len); qemu_iovec_init_external(&bounce_qiov, &iov, 1); - ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors, - &bounce_qiov); + ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors, + &bounce_qiov); if (ret < 0) { goto err; } - ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors, + if (drv->bdrv_co_write_zeroes && + buffer_is_zero(bounce_buffer, iov.iov_len)) { + ret = drv->bdrv_co_write_zeroes(bs, cluster_sector_num, + cluster_nb_sectors); + } else { + ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors, &bounce_qiov); + } + if (ret < 0) { /* It might be okay to ignore write errors for guest requests. If this * is a deliberate copy-on-read then we don't want to ignore the error. @@ -1639,11 +1649,37 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, BDRV_REQ_COPY_ON_READ); } +static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + BlockDriver *drv = bs->drv; + QEMUIOVector qiov; + struct iovec iov; + int ret; + + /* First try the efficient write zeroes operation */ + if (drv->bdrv_co_write_zeroes) { + return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors); + } + + /* Fall back to bounce buffer if write zeroes is unsupported */ + iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE; + iov.iov_base = qemu_blockalign(bs, iov.iov_len); + memset(iov.iov_base, 0, iov.iov_len); + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov); + + qemu_vfree(iov.iov_base); + return ret; +} + /* * Handle a write request in coroutine context */ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + BdrvRequestFlags flags) { BlockDriver *drv = bs->drv; BdrvTrackedRequest req; @@ -1670,7 +1706,11 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, tracked_request_begin(&req, bs, sector_num, nb_sectors, true); - ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); + if (flags & BDRV_REQ_ZERO_WRITE) { + ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors); + } else { + ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); + } if (bs->dirty_bitmap) { set_dirty_bitmap(bs, sector_num, nb_sectors, 1); @@ -1690,7 +1730,16 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, { trace_bdrv_co_writev(bs, sector_num, nb_sectors); - return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov); + return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0); +} + +int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors); + + return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, + BDRV_REQ_ZERO_WRITE); } /** @@ -3192,7 +3241,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque) acb->req.nb_sectors, acb->req.qiov, 0); } else { acb->req.error = bdrv_co_do_writev(bs, acb->req.sector, - acb->req.nb_sectors, acb->req.qiov); + acb->req.nb_sectors, acb->req.qiov, 0); } acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); diff --git a/block.h b/block.h index cae289b2f..60ea7308c 100644 --- a/block.h +++ b/block.h @@ -146,6 +146,14 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); +/* + * Efficiently zero a region of the disk image. Note that this is a regular + * I/O request like read or write and should have a reasonable size. This + * function is not suitable for zeroing the entire image in a single request + * because it may allocate memory for the entire region. + */ +int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors); int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, diff --git a/block/iscsi.c b/block/iscsi.c index 938c56807..bd3ca11b2 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -455,6 +455,109 @@ iscsi_connect_cb(struct iscsi_context *iscsi, int status, void *command_data, } } +static int parse_chap(struct iscsi_context *iscsi, const char *target) +{ + QemuOptsList *list; + QemuOpts *opts; + const char *user = NULL; + const char *password = NULL; + + list = qemu_find_opts("iscsi"); + if (!list) { + return 0; + } + + opts = qemu_opts_find(list, target); + if (opts == NULL) { + opts = QTAILQ_FIRST(&list->head); + if (!opts) { + return 0; + } + } + + user = qemu_opt_get(opts, "user"); + if (!user) { + return 0; + } + + password = qemu_opt_get(opts, "password"); + if (!password) { + error_report("CHAP username specified but no password was given"); + return -1; + } + + if (iscsi_set_initiator_username_pwd(iscsi, user, password)) { + error_report("Failed to set initiator username and password"); + return -1; + } + + return 0; +} + +static void parse_header_digest(struct iscsi_context *iscsi, const char *target) +{ + QemuOptsList *list; + QemuOpts *opts; + const char *digest = NULL; + + list = qemu_find_opts("iscsi"); + if (!list) { + return; + } + + opts = qemu_opts_find(list, target); + if (opts == NULL) { + opts = QTAILQ_FIRST(&list->head); + if (!opts) { + return; + } + } + + digest = qemu_opt_get(opts, "header-digest"); + if (!digest) { + return; + } + + if (!strcmp(digest, "CRC32C")) { + iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C); + } else if (!strcmp(digest, "NONE")) { + iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE); + } else if (!strcmp(digest, "CRC32C-NONE")) { + iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE); + } else if (!strcmp(digest, "NONE-CRC32C")) { + iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C); + } else { + error_report("Invalid header-digest setting : %s", digest); + } +} + +static char *parse_initiator_name(const char *target) +{ + QemuOptsList *list; + QemuOpts *opts; + const char *name = NULL; + + list = qemu_find_opts("iscsi"); + if (!list) { + return g_strdup("iqn.2008-11.org.linux-kvm"); + } + + opts = qemu_opts_find(list, target); + if (opts == NULL) { + opts = QTAILQ_FIRST(&list->head); + if (!opts) { + return g_strdup("iqn.2008-11.org.linux-kvm"); + } + } + + name = qemu_opt_get(opts, "initiator-name"); + if (!name) { + return g_strdup("iqn.2008-11.org.linux-kvm"); + } + + return g_strdup(name); +} + /* * We support iscsi url's on the form * iscsi://[%@][:]// @@ -465,6 +568,7 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) struct iscsi_context *iscsi = NULL; struct iscsi_url *iscsi_url = NULL; struct IscsiTask task; + char *initiator_name = NULL; int ret; if ((BDRV_SECTOR_SIZE % 512) != 0) { @@ -474,16 +578,6 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) return -EINVAL; } - memset(iscsilun, 0, sizeof(IscsiLun)); - - /* Should really append the KVM name after the ':' here */ - iscsi = iscsi_create_context("iqn.2008-11.org.linux-kvm:"); - if (iscsi == NULL) { - error_report("iSCSI: Failed to create iSCSI context."); - ret = -ENOMEM; - goto failed; - } - iscsi_url = iscsi_parse_full_url(iscsi, filename); if (iscsi_url == NULL) { error_report("Failed to parse URL : %s %s", filename, @@ -492,6 +586,17 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) goto failed; } + memset(iscsilun, 0, sizeof(IscsiLun)); + + initiator_name = parse_initiator_name(iscsi_url->target); + + iscsi = iscsi_create_context(initiator_name); + if (iscsi == NULL) { + error_report("iSCSI: Failed to create iSCSI context."); + ret = -ENOMEM; + goto failed; + } + if (iscsi_set_targetname(iscsi, iscsi_url->target)) { error_report("iSCSI: Failed to set target name."); ret = -EINVAL; @@ -507,6 +612,14 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) goto failed; } } + + /* check if we got CHAP username/password via the options */ + if (parse_chap(iscsi, iscsi_url->target) != 0) { + error_report("iSCSI: Failed to set CHAP user/password"); + ret = -EINVAL; + goto failed; + } + if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) { error_report("iSCSI: Failed to set session type to normal."); ret = -EINVAL; @@ -515,6 +628,9 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C); + /* check if we got HEADER_DIGEST via the options */ + parse_header_digest(iscsi, iscsi_url->target); + task.iscsilun = iscsilun; task.status = 0; task.complete = 0; @@ -548,6 +664,9 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags) return 0; failed: + if (initiator_name != NULL) { + g_free(initiator_name); + } if (iscsi_url != NULL) { iscsi_destroy_url(iscsi_url); } diff --git a/block/qcow2.c b/block/qcow2.c index aa32e8d01..3692b4523 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -77,8 +77,10 @@ static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, uint64_t end_offset) { + BDRVQcowState *s = bs->opaque; QCowExtension ext; uint64_t offset; + int ret; #ifdef DEBUG_EXT printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); @@ -129,8 +131,22 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, break; default: - /* unknown magic -- just skip it */ - offset = ((offset + ext.len + 7) & ~7); + /* unknown magic - save it in case we need to rewrite the header */ + { + Qcow2UnknownHeaderExtension *uext; + + uext = g_malloc0(sizeof(*uext) + ext.len); + uext->magic = ext.magic; + uext->len = ext.len; + QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); + + ret = bdrv_pread(bs->file, offset , uext->data, uext->len); + if (ret < 0) { + return ret; + } + + offset = ((offset + ext.len + 7) & ~7); + } break; } } @@ -138,6 +154,16 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, return 0; } +static void cleanup_unknown_header_ext(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + Qcow2UnknownHeaderExtension *uext, *next; + + QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { + QLIST_REMOVE(uext, next); + g_free(uext); + } +} static int qcow2_open(BlockDriverState *bs, int flags) { @@ -291,6 +317,7 @@ static int qcow2_open(BlockDriverState *bs, int flags) return ret; fail: + cleanup_unknown_header_ext(bs); qcow2_free_snapshots(bs); qcow2_refcount_close(bs); g_free(s->l1_table); @@ -632,6 +659,7 @@ static void qcow2_close(BlockDriverState *bs) qcow2_cache_destroy(bs, s->l2_table_cache); qcow2_cache_destroy(bs, s->refcount_block_cache); + cleanup_unknown_header_ext(bs); g_free(s->cluster_cache); qemu_vfree(s->cluster_data); qcow2_refcount_close(bs); @@ -669,103 +697,149 @@ static void qcow2_invalidate_cache(BlockDriverState *bs) } } -/* - * Updates the variable length parts of the qcow2 header, i.e. the backing file - * name and all extensions. qcow2 was not designed to allow such changes, so if - * we run out of space (we can only use the first cluster) this function may - * fail. - * - * Returns 0 on success, -errno in error cases. - */ -static int qcow2_update_ext_header(BlockDriverState *bs, - const char *backing_file, const char *backing_fmt) +static size_t header_ext_add(char *buf, uint32_t magic, const void *s, + size_t len, size_t buflen) { - size_t backing_file_len = 0; - size_t backing_fmt_len = 0; - BDRVQcowState *s = bs->opaque; - QCowExtension ext_backing_fmt = {0, 0}; - int ret; + QCowExtension *ext_backing_fmt = (QCowExtension*) buf; + size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); - /* Backing file format doesn't make sense without a backing file */ - if (backing_fmt && !backing_file) { - return -EINVAL; - } - - /* Prepare the backing file format extension if needed */ - if (backing_fmt) { - ext_backing_fmt.len = cpu_to_be32(strlen(backing_fmt)); - ext_backing_fmt.magic = cpu_to_be32(QCOW2_EXT_MAGIC_BACKING_FORMAT); - backing_fmt_len = ((sizeof(ext_backing_fmt) - + strlen(backing_fmt) + 7) & ~7); - } - - /* Check if we can fit the new header into the first cluster */ - if (backing_file) { - backing_file_len = strlen(backing_file); - } - - size_t header_size = sizeof(QCowHeader) + backing_file_len - + backing_fmt_len; - - if (header_size > s->cluster_size) { + if (buflen < ext_len) { return -ENOSPC; } - /* Rewrite backing file name and qcow2 extensions */ - size_t ext_size = header_size - sizeof(QCowHeader); - uint8_t buf[ext_size]; - size_t offset = 0; - size_t backing_file_offset = 0; + *ext_backing_fmt = (QCowExtension) { + .magic = cpu_to_be32(magic), + .len = cpu_to_be32(len), + }; + memcpy(buf + sizeof(QCowExtension), s, len); - if (backing_file) { - if (backing_fmt) { - int padding = backing_fmt_len - - (sizeof(ext_backing_fmt) + strlen(backing_fmt)); + return ext_len; +} - memcpy(buf + offset, &ext_backing_fmt, sizeof(ext_backing_fmt)); - offset += sizeof(ext_backing_fmt); +/* + * Updates the qcow2 header, including the variable length parts of it, i.e. + * the backing file name and all extensions. qcow2 was not designed to allow + * such changes, so if we run out of space (we can only use the first cluster) + * this function may fail. + * + * Returns 0 on success, -errno in error cases. + */ +int qcow2_update_header(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + QCowHeader *header; + char *buf; + size_t buflen = s->cluster_size; + int ret; + uint64_t total_size; + uint32_t refcount_table_clusters; + Qcow2UnknownHeaderExtension *uext; - memcpy(buf + offset, backing_fmt, strlen(backing_fmt)); - offset += strlen(backing_fmt); + buf = qemu_blockalign(bs, buflen); + memset(buf, 0, s->cluster_size); - memset(buf + offset, 0, padding); - offset += padding; + /* Header structure */ + header = (QCowHeader*) buf; + + if (buflen < sizeof(*header)) { + ret = -ENOSPC; + goto fail; + } + + total_size = bs->total_sectors * BDRV_SECTOR_SIZE; + refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); + + *header = (QCowHeader) { + .magic = cpu_to_be32(QCOW_MAGIC), + .version = cpu_to_be32(QCOW_VERSION), + .backing_file_offset = 0, + .backing_file_size = 0, + .cluster_bits = cpu_to_be32(s->cluster_bits), + .size = cpu_to_be64(total_size), + .crypt_method = cpu_to_be32(s->crypt_method_header), + .l1_size = cpu_to_be32(s->l1_size), + .l1_table_offset = cpu_to_be64(s->l1_table_offset), + .refcount_table_offset = cpu_to_be64(s->refcount_table_offset), + .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), + .nb_snapshots = cpu_to_be32(s->nb_snapshots), + .snapshots_offset = cpu_to_be64(s->snapshots_offset), + }; + + buf += sizeof(*header); + buflen -= sizeof(*header); + + /* Backing file format header extension */ + if (*bs->backing_format) { + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, + bs->backing_format, strlen(bs->backing_format), + buflen); + if (ret < 0) { + goto fail; } - memcpy(buf + offset, backing_file, backing_file_len); - backing_file_offset = sizeof(QCowHeader) + offset; + buf += ret; + buflen -= ret; } - ret = bdrv_pwrite_sync(bs->file, sizeof(QCowHeader), buf, ext_size); + /* Keep unknown header extensions */ + QLIST_FOREACH(uext, &s->unknown_header_ext, next) { + ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); + if (ret < 0) { + goto fail; + } + + buf += ret; + buflen -= ret; + } + + /* End of header extensions */ + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); if (ret < 0) { goto fail; } - /* Update header fields */ - uint64_t be_backing_file_offset = cpu_to_be64(backing_file_offset); - uint32_t be_backing_file_size = cpu_to_be32(backing_file_len); + buf += ret; + buflen -= ret; - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, backing_file_offset), - &be_backing_file_offset, sizeof(uint64_t)); - if (ret < 0) { - goto fail; + /* Backing file name */ + if (*bs->backing_file) { + size_t backing_file_len = strlen(bs->backing_file); + + if (buflen < backing_file_len) { + ret = -ENOSPC; + goto fail; + } + + strncpy(buf, bs->backing_file, buflen); + + header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); + header->backing_file_size = cpu_to_be32(backing_file_len); } - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, backing_file_size), - &be_backing_file_size, sizeof(uint32_t)); + /* Write the new header */ + ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); if (ret < 0) { goto fail; } ret = 0; fail: + qemu_vfree(header); return ret; } static int qcow2_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt) { - return qcow2_update_ext_header(bs, backing_file, backing_fmt); + /* Backing file format doesn't make sense without a backing file */ + if (backing_fmt && !backing_file) { + return -EINVAL; + } + + pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); + pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); + + return qcow2_update_header(bs); } static int preallocate(BlockDriverState *bs) diff --git a/block/qcow2.h b/block/qcow2.h index 99e45361f..fc3583817 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -87,6 +87,13 @@ typedef struct QCowSnapshot { struct Qcow2Cache; typedef struct Qcow2Cache Qcow2Cache; +typedef struct Qcow2UnknownHeaderExtension { + uint32_t magic; + uint32_t len; + QLIST_ENTRY(Qcow2UnknownHeaderExtension) next; + uint8_t data[]; +} Qcow2UnknownHeaderExtension; + typedef struct BDRVQcowState { int cluster_bits; int cluster_size; @@ -127,6 +134,7 @@ typedef struct BDRVQcowState { QCowSnapshot *snapshots; int flags; + QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext; } BDRVQcowState; /* XXX: use std qcow open function ? */ @@ -178,6 +186,7 @@ static inline int64_t align_offset(int64_t offset, int n) /* qcow2.c functions */ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors); +int qcow2_update_header(BlockDriverState *bs); /* qcow2-refcount.c functions */ int qcow2_refcount_init(BlockDriverState *bs); diff --git a/block/qed.c b/block/qed.c index 8da3ebe9d..a041d31e6 100644 --- a/block/qed.c +++ b/block/qed.c @@ -875,6 +875,12 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret) qemu_iovec_destroy(&acb->cur_qiov); qed_unref_l2_cache_entry(acb->request.l2_table); + /* Free the buffer we may have allocated for zero writes */ + if (acb->flags & QED_AIOCB_ZERO) { + qemu_vfree(acb->qiov->iov[0].iov_base); + acb->qiov->iov[0].iov_base = NULL; + } + /* Arrange for a bh to invoke the completion function */ acb->bh_ret = ret; acb->bh = qemu_bh_new(qed_aio_complete_bh, acb); @@ -941,9 +947,8 @@ static void qed_aio_write_l1_update(void *opaque, int ret) /** * Update L2 table with new cluster offsets and write them out */ -static void qed_aio_write_l2_update(void *opaque, int ret) +static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset) { - QEDAIOCB *acb = opaque; BDRVQEDState *s = acb_to_s(acb); bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1; int index; @@ -959,7 +964,7 @@ static void qed_aio_write_l2_update(void *opaque, int ret) index = qed_l2_index(s, acb->cur_pos); qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters, - acb->cur_cluster); + offset); if (need_alloc) { /* Write out the whole new L2 table */ @@ -976,6 +981,12 @@ err: qed_aio_complete(acb, ret); } +static void qed_aio_write_l2_update_cb(void *opaque, int ret) +{ + QEDAIOCB *acb = opaque; + qed_aio_write_l2_update(acb, ret, acb->cur_cluster); +} + /** * Flush new data clusters before updating the L2 table * @@ -990,7 +1001,7 @@ static void qed_aio_write_flush_before_l2_update(void *opaque, int ret) QEDAIOCB *acb = opaque; BDRVQEDState *s = acb_to_s(acb); - if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update, opaque)) { + if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) { qed_aio_complete(acb, -EIO); } } @@ -1019,7 +1030,7 @@ static void qed_aio_write_main(void *opaque, int ret) if (s->bs->backing_hd) { next_fn = qed_aio_write_flush_before_l2_update; } else { - next_fn = qed_aio_write_l2_update; + next_fn = qed_aio_write_l2_update_cb; } } @@ -1081,6 +1092,18 @@ static bool qed_should_set_need_check(BDRVQEDState *s) return !(s->header.features & QED_F_NEED_CHECK); } +static void qed_aio_write_zero_cluster(void *opaque, int ret) +{ + QEDAIOCB *acb = opaque; + + if (ret) { + qed_aio_complete(acb, ret); + return; + } + + qed_aio_write_l2_update(acb, 0, 1); +} + /** * Write new data cluster * @@ -1092,6 +1115,7 @@ static bool qed_should_set_need_check(BDRVQEDState *s) static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) { BDRVQEDState *s = acb_to_s(acb); + BlockDriverCompletionFunc *cb; /* Cancel timer when the first allocating request comes in */ if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) { @@ -1109,14 +1133,26 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) acb->cur_nclusters = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, acb->cur_pos) + len); - acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + if (acb->flags & QED_AIOCB_ZERO) { + /* Skip ahead if the clusters are already zero */ + if (acb->find_cluster_ret == QED_CLUSTER_ZERO) { + qed_aio_next_io(acb, 0); + return; + } + + cb = qed_aio_write_zero_cluster; + } else { + cb = qed_aio_write_prefill; + acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); + } + if (qed_should_set_need_check(s)) { s->header.features |= QED_F_NEED_CHECK; - qed_write_header(s, qed_aio_write_prefill, acb); + qed_write_header(s, cb, acb); } else { - qed_aio_write_prefill(acb, 0); + cb(acb, 0); } } @@ -1131,6 +1167,16 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) */ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) { + /* Allocate buffer for zero writes */ + if (acb->flags & QED_AIOCB_ZERO) { + struct iovec *iov = acb->qiov->iov; + + if (!iov->iov_base) { + iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len); + memset(iov->iov_base, 0, iov->iov_len); + } + } + /* Calculate the I/O vector */ acb->cur_cluster = offset; qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); @@ -1233,8 +1279,8 @@ static void qed_aio_next_io(void *opaque, int ret) { QEDAIOCB *acb = opaque; BDRVQEDState *s = acb_to_s(acb); - QEDFindClusterFunc *io_fn = - acb->is_write ? qed_aio_write_data : qed_aio_read_data; + QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ? + qed_aio_write_data : qed_aio_read_data; trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size); @@ -1264,14 +1310,14 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, - void *opaque, bool is_write) + void *opaque, int flags) { QEDAIOCB *acb = qemu_aio_get(&qed_aio_pool, bs, cb, opaque); trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, - opaque, is_write); + opaque, flags); - acb->is_write = is_write; + acb->flags = flags; acb->finished = NULL; acb->qiov = qiov; acb->qiov_offset = 0; @@ -1291,7 +1337,7 @@ static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { - return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, false); + return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); } static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs, @@ -1300,7 +1346,8 @@ static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { - return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, true); + return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, + opaque, QED_AIOCB_WRITE); } static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs, @@ -1310,6 +1357,53 @@ static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs, return bdrv_aio_flush(bs->file, cb, opaque); } +typedef struct { + Coroutine *co; + int ret; + bool done; +} QEDWriteZeroesCB; + +static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret) +{ + QEDWriteZeroesCB *cb = opaque; + + cb->done = true; + cb->ret = ret; + if (cb->co) { + qemu_coroutine_enter(cb->co, NULL); + } +} + +static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors) +{ + BlockDriverAIOCB *blockacb; + QEDWriteZeroesCB cb = { .done = false }; + QEMUIOVector qiov; + struct iovec iov; + + /* Zero writes start without an I/O buffer. If a buffer becomes necessary + * then it will be allocated during request processing. + */ + iov.iov_base = NULL, + iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE, + + qemu_iovec_init_external(&qiov, &iov, 1); + blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors, + qed_co_write_zeroes_cb, &cb, + QED_AIOCB_WRITE | QED_AIOCB_ZERO); + if (!blockacb) { + return -EIO; + } + if (!cb.done) { + cb.co = qemu_coroutine_self(); + qemu_coroutine_yield(); + } + assert(cb.done); + return cb.ret; +} + static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset) { BDRVQEDState *s = bs->opaque; @@ -1469,6 +1563,7 @@ static BlockDriver bdrv_qed = { .bdrv_aio_readv = bdrv_qed_aio_readv, .bdrv_aio_writev = bdrv_qed_aio_writev, .bdrv_aio_flush = bdrv_qed_aio_flush, + .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes, .bdrv_truncate = bdrv_qed_truncate, .bdrv_getlength = bdrv_qed_getlength, .bdrv_get_info = bdrv_qed_get_info, diff --git a/block/qed.h b/block/qed.h index 62cbd3b89..62624a1f3 100644 --- a/block/qed.h +++ b/block/qed.h @@ -123,12 +123,17 @@ typedef struct QEDRequest { CachedL2Table *l2_table; } QEDRequest; +enum { + QED_AIOCB_WRITE = 0x0001, /* read or write? */ + QED_AIOCB_ZERO = 0x0002, /* zero write, used with QED_AIOCB_WRITE */ +}; + typedef struct QEDAIOCB { BlockDriverAIOCB common; QEMUBH *bh; int bh_ret; /* final return status for completion bh */ QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */ - bool is_write; /* false - read, true - write */ + int flags; /* QED_AIOCB_* bits ORed together */ bool *finished; /* signal for cancel completion */ uint64_t end_pos; /* request end on block device, in bytes */ diff --git a/block/sheepdog.c b/block/sheepdog.c index 941640016..00276f6f4 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -629,6 +629,9 @@ static void coroutine_fn aio_read_response(void *opaque) switch (acb->aiocb_type) { case AIOCB_WRITE_UDATA: + /* this coroutine context is no longer suitable for co_recv + * because we may send data to update vdi objects */ + s->co_recv = NULL; if (!is_data_obj(aio_req->oid)) { break; } diff --git a/block/vpc.c b/block/vpc.c index 89a5ee266..6b4816f56 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -161,13 +161,27 @@ static int vpc_open(BlockDriverState *bs, int flags) uint8_t buf[HEADER_SIZE]; uint32_t checksum; int err = -1; + int disk_type = VHD_DYNAMIC; if (bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE) goto fail; footer = (struct vhd_footer*) s->footer_buf; - if (strncmp(footer->creator, "conectix", 8)) - goto fail; + if (strncmp(footer->creator, "conectix", 8)) { + int64_t offset = bdrv_getlength(bs->file); + if (offset < HEADER_SIZE) { + goto fail; + } + /* If a fixed disk, the footer is found only at the end of the file */ + if (bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf, HEADER_SIZE) + != HEADER_SIZE) { + goto fail; + } + if (strncmp(footer->creator, "conectix", 8)) { + goto fail; + } + disk_type = VHD_FIXED; + } checksum = be32_to_cpu(footer->checksum); footer->checksum = 0; @@ -186,49 +200,54 @@ static int vpc_open(BlockDriverState *bs, int flags) goto fail; } - if (bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE) - != HEADER_SIZE) - goto fail; - - dyndisk_header = (struct vhd_dyndisk_header*) buf; - - if (strncmp(dyndisk_header->magic, "cxsparse", 8)) - goto fail; - - - s->block_size = be32_to_cpu(dyndisk_header->block_size); - s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511; - - s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries); - s->pagetable = g_malloc(s->max_table_entries * 4); - - s->bat_offset = be64_to_cpu(dyndisk_header->table_offset); - if (bdrv_pread(bs->file, s->bat_offset, s->pagetable, - s->max_table_entries * 4) != s->max_table_entries * 4) - goto fail; - - s->free_data_block_offset = - (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511; - - for (i = 0; i < s->max_table_entries; i++) { - be32_to_cpus(&s->pagetable[i]); - if (s->pagetable[i] != 0xFFFFFFFF) { - int64_t next = (512 * (int64_t) s->pagetable[i]) + - s->bitmap_size + s->block_size; - - if (next> s->free_data_block_offset) - s->free_data_block_offset = next; + if (disk_type == VHD_DYNAMIC) { + if (bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, + HEADER_SIZE) != HEADER_SIZE) { + goto fail; } - } - s->last_bitmap_offset = (int64_t) -1; + dyndisk_header = (struct vhd_dyndisk_header *) buf; + + if (strncmp(dyndisk_header->magic, "cxsparse", 8)) { + goto fail; + } + + s->block_size = be32_to_cpu(dyndisk_header->block_size); + s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511; + + s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries); + s->pagetable = g_malloc(s->max_table_entries * 4); + + s->bat_offset = be64_to_cpu(dyndisk_header->table_offset); + if (bdrv_pread(bs->file, s->bat_offset, s->pagetable, + s->max_table_entries * 4) != s->max_table_entries * 4) { + goto fail; + } + + s->free_data_block_offset = + (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511; + + for (i = 0; i < s->max_table_entries; i++) { + be32_to_cpus(&s->pagetable[i]); + if (s->pagetable[i] != 0xFFFFFFFF) { + int64_t next = (512 * (int64_t) s->pagetable[i]) + + s->bitmap_size + s->block_size; + + if (next > s->free_data_block_offset) { + s->free_data_block_offset = next; + } + } + } + + s->last_bitmap_offset = (int64_t) -1; #ifdef CACHE - s->pageentry_u8 = g_malloc(512); - s->pageentry_u32 = s->pageentry_u8; - s->pageentry_u16 = s->pageentry_u8; - s->last_pagetable = -1; + s->pageentry_u8 = g_malloc(512); + s->pageentry_u32 = s->pageentry_u8; + s->pageentry_u16 = s->pageentry_u8; + s->last_pagetable = -1; #endif + } qemu_co_mutex_init(&s->lock); @@ -395,7 +414,11 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num, int ret; int64_t offset; int64_t sectors, sectors_per_block; + struct vhd_footer *footer = (struct vhd_footer *) s->footer_buf; + if (cpu_to_be32(footer->type) == VHD_FIXED) { + return bdrv_read(bs->file, sector_num, buf, nb_sectors); + } while (nb_sectors > 0) { offset = get_sector_offset(bs, sector_num, 0); @@ -440,7 +463,11 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num, int64_t offset; int64_t sectors, sectors_per_block; int ret; + struct vhd_footer *footer = (struct vhd_footer *) s->footer_buf; + if (cpu_to_be32(footer->type) == VHD_FIXED) { + return bdrv_write(bs->file, sector_num, buf, nb_sectors); + } while (nb_sectors > 0) { offset = get_sector_offset(bs, sector_num, 1); @@ -533,70 +560,14 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, return 0; } -static int vpc_create(const char *filename, QEMUOptionParameter *options) +static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors) { - uint8_t buf[1024]; - struct vhd_footer* footer = (struct vhd_footer*) buf; struct vhd_dyndisk_header* dyndisk_header = (struct vhd_dyndisk_header*) buf; - int fd, i; - uint16_t cyls = 0; - uint8_t heads = 0; - uint8_t secs_per_cyl = 0; size_t block_size, num_bat_entries; - int64_t total_sectors = 0; + int i; int ret = -EIO; - // Read out options - total_sectors = get_option_parameter(options, BLOCK_OPT_SIZE)->value.n / - BDRV_SECTOR_SIZE; - - // Create the file - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); - if (fd < 0) - return -EIO; - - /* Calculate matching total_size and geometry. Increase the number of - sectors requested until we get enough (or fail). */ - for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { - if (calculate_geometry(total_sectors + i, - &cyls, &heads, &secs_per_cyl)) { - ret = -EFBIG; - goto fail; - } - } - total_sectors = (int64_t) cyls * heads * secs_per_cyl; - - // Prepare the Hard Disk Footer - memset(buf, 0, 1024); - - memcpy(footer->creator, "conectix", 8); - // TODO Check if "qemu" creator_app is ok for VPC - memcpy(footer->creator_app, "qemu", 4); - memcpy(footer->creator_os, "Wi2k", 4); - - footer->features = be32_to_cpu(0x02); - footer->version = be32_to_cpu(0x00010000); - footer->data_offset = be64_to_cpu(HEADER_SIZE); - footer->timestamp = be32_to_cpu(time(NULL) - VHD_TIMESTAMP_BASE); - - // Version of Virtual PC 2007 - footer->major = be16_to_cpu(0x0005); - footer->minor =be16_to_cpu(0x0003); - - footer->orig_size = be64_to_cpu(total_sectors * 512); - footer->size = be64_to_cpu(total_sectors * 512); - - footer->cyls = be16_to_cpu(cyls); - footer->heads = heads; - footer->secs_per_cyl = secs_per_cyl; - - footer->type = be32_to_cpu(VHD_DYNAMIC); - - // TODO uuid is missing - - footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE)); - // Write the footer (twice: at the beginning and at the end) block_size = 0x200000; num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512); @@ -624,7 +595,6 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options) } } - // Prepare the Dynamic Disk Header memset(buf, 0, 1024); @@ -652,6 +622,129 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options) } ret = 0; + fail: + return ret; +} + +static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size) +{ + int ret = -EIO; + + /* Add footer to total size */ + total_size += 512; + if (ftruncate(fd, total_size) != 0) { + ret = -errno; + goto fail; + } + if (lseek(fd, -512, SEEK_END) < 0) { + goto fail; + } + if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) { + goto fail; + } + + ret = 0; + + fail: + return ret; +} + +static int vpc_create(const char *filename, QEMUOptionParameter *options) +{ + uint8_t buf[1024]; + struct vhd_footer *footer = (struct vhd_footer *) buf; + QEMUOptionParameter *disk_type_param; + int fd, i; + uint16_t cyls = 0; + uint8_t heads = 0; + uint8_t secs_per_cyl = 0; + int64_t total_sectors; + int64_t total_size; + int disk_type; + int ret = -EIO; + + /* Read out options */ + total_size = get_option_parameter(options, BLOCK_OPT_SIZE)->value.n; + + disk_type_param = get_option_parameter(options, BLOCK_OPT_SUBFMT); + if (disk_type_param && disk_type_param->value.s) { + if (!strcmp(disk_type_param->value.s, "dynamic")) { + disk_type = VHD_DYNAMIC; + } else if (!strcmp(disk_type_param->value.s, "fixed")) { + disk_type = VHD_FIXED; + } else { + return -EINVAL; + } + } else { + disk_type = VHD_DYNAMIC; + } + + /* Create the file */ + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); + if (fd < 0) { + return -EIO; + } + + /* + * Calculate matching total_size and geometry. Increase the number of + * sectors requested until we get enough (or fail). This ensures that + * qemu-img convert doesn't truncate images, but rather rounds up. + */ + total_sectors = total_size / BDRV_SECTOR_SIZE; + for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { + if (calculate_geometry(total_sectors + i, &cyls, &heads, + &secs_per_cyl)) + { + ret = -EFBIG; + goto fail; + } + } + + total_sectors = (int64_t) cyls * heads * secs_per_cyl; + + /* Prepare the Hard Disk Footer */ + memset(buf, 0, 1024); + + memcpy(footer->creator, "conectix", 8); + /* TODO Check if "qemu" creator_app is ok for VPC */ + memcpy(footer->creator_app, "qemu", 4); + memcpy(footer->creator_os, "Wi2k", 4); + + footer->features = be32_to_cpu(0x02); + footer->version = be32_to_cpu(0x00010000); + if (disk_type == VHD_DYNAMIC) { + footer->data_offset = be64_to_cpu(HEADER_SIZE); + } else { + footer->data_offset = be64_to_cpu(0xFFFFFFFFFFFFFFFFULL); + } + footer->timestamp = be32_to_cpu(time(NULL) - VHD_TIMESTAMP_BASE); + + /* Version of Virtual PC 2007 */ + footer->major = be16_to_cpu(0x0005); + footer->minor = be16_to_cpu(0x0003); + if (disk_type == VHD_DYNAMIC) { + footer->orig_size = be64_to_cpu(total_sectors * 512); + footer->size = be64_to_cpu(total_sectors * 512); + } else { + footer->orig_size = be64_to_cpu(total_size); + footer->size = be64_to_cpu(total_size); + } + footer->cyls = be16_to_cpu(cyls); + footer->heads = heads; + footer->secs_per_cyl = secs_per_cyl; + + footer->type = be32_to_cpu(disk_type); + + /* TODO uuid is missing */ + + footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE)); + + if (disk_type == VHD_DYNAMIC) { + ret = create_dynamic_disk(fd, buf, total_sectors); + } else { + ret = create_fixed_disk(fd, buf, total_size); + } + fail: close(fd); return ret; @@ -675,6 +768,13 @@ static QEMUOptionParameter vpc_create_options[] = { .type = OPT_SIZE, .help = "Virtual disk size" }, + { + .name = BLOCK_OPT_SUBFMT, + .type = OPT_STRING, + .help = + "Type of virtual hard disk format. Supported formats are " + "{dynamic (default) | fixed} " + }, { NULL } }; diff --git a/block_int.h b/block_int.h index 7be2988ca..7946cf66a 100644 --- a/block_int.h +++ b/block_int.h @@ -131,6 +131,14 @@ struct BlockDriver { int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + /* + * Efficiently zero a region of the disk image. Typically an image format + * would use a compact metadata representation to implement this. This + * function pointer may be NULL and .bdrv_co_writev() will be called + * instead. + */ + int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors); int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs, diff --git a/compiler.h b/compiler.h index a1c079494..736e77075 100644 --- a/compiler.h +++ b/compiler.h @@ -30,8 +30,10 @@ # define QEMU_PACKED __attribute__((packed)) #endif +#define cat(x,y) x ## y +#define cat2(x,y) cat(x,y) #define QEMU_BUILD_BUG_ON(x) \ - typedef char qemu_build_bug_on__##__LINE__[(x)?-1:1]; + typedef char cat2(qemu_build_bug_on__,__LINE__)[(x)?-1:1]; #if defined __GNUC__ # if !QEMU_GNUC_PREREQ(4, 4) diff --git a/cutils.c b/cutils.c index a6ffd4644..af308cd7b 100644 --- a/cutils.c +++ b/cutils.c @@ -303,6 +303,41 @@ void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count, } } +/* + * Checks if a buffer is all zeroes + * + * Attention! The len must be a multiple of 4 * sizeof(long) due to + * restriction of optimizations in this function. + */ +bool buffer_is_zero(const void *buf, size_t len) +{ + /* + * Use long as the biggest available internal data type that fits into the + * CPU register and unroll the loop to smooth out the effect of memory + * latency. + */ + + size_t i; + long d0, d1, d2, d3; + const long * const data = buf; + + assert(len % (4 * sizeof(long)) == 0); + len /= sizeof(long); + + for (i = 0; i < len; i += 4) { + d0 = data[i + 0]; + d1 = data[i + 1]; + d2 = data[i + 2]; + d3 = data[i + 3]; + + if (d0 || d1 || d2 || d3) { + return false; + } + } + + return true; +} + #ifndef _WIN32 /* Sets a specific flag */ int fcntl_setfl(int fd, int flag) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index c3603999d..b515f417b 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -146,6 +146,7 @@ static void ahci_check_irq(AHCIState *s) DPRINTF(-1, "check irq %#x\n", s->control_regs.irqstatus); + s->control_regs.irqstatus = 0; for (i = 0; i < s->ports; i++) { AHCIPortRegs *pr = &s->dev[i].port_regs; if (pr->irq_stat & pr->irq_mask) { @@ -216,6 +217,7 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val) break; case PORT_IRQ_STAT: pr->irq_stat &= ~val; + ahci_check_irq(s); break; case PORT_IRQ_MASK: pr->irq_mask = val & 0xfdc000ff; @@ -560,6 +562,11 @@ static void ahci_reset_port(AHCIState *s, int port) ncq_tfs->aiocb = NULL; } + /* Maybe we just finished the request thanks to bdrv_aio_cancel() */ + if (!ncq_tfs->used) { + continue; + } + qemu_sglist_destroy(&ncq_tfs->sglist); ncq_tfs->used = 0; } diff --git a/qemu-common.h b/qemu-common.h index 9b997f883..c5e9cad35 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -335,6 +335,8 @@ void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count); void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count, size_t skip); +bool buffer_is_zero(const void *buf, size_t len); + void qemu_progress_init(int enabled, float min_skip); void qemu_progress_end(void); void qemu_progress_print(float delta, int max); diff --git a/qemu-config.c b/qemu-config.c index b030205e2..e980fd68d 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -118,6 +118,32 @@ static QemuOptsList qemu_drive_opts = { }, }; +static QemuOptsList qemu_iscsi_opts = { + .name = "iscsi", + .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head), + .desc = { + { + .name = "user", + .type = QEMU_OPT_STRING, + .help = "username for CHAP authentication to target", + },{ + .name = "password", + .type = QEMU_OPT_STRING, + .help = "password for CHAP authentication to target", + },{ + .name = "header-digest", + .type = QEMU_OPT_STRING, + .help = "HeaderDigest setting. " + "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}", + },{ + .name = "initiator-name", + .type = QEMU_OPT_STRING, + .help = "Initiator iqn name to use when connecting", + }, + { /* end of list */ } + }, +}; + static QemuOptsList qemu_chardev_opts = { .name = "chardev", .implied_opt_name = "backend", @@ -580,6 +606,7 @@ static QemuOptsList *vm_config_groups[32] = { &qemu_option_rom_opts, &qemu_machine_opts, &qemu_boot_opts, + &qemu_iscsi_opts, NULL, }; diff --git a/qemu-doc.texi b/qemu-doc.texi index 11f416699..83b2ad523 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -730,6 +730,57 @@ export LIBISCSI_CHAP_PASSWORD= iscsi://// @end example +Various session related parameters can be set via special options, either +in a configuration file provided via '-readconfig' or directly on the +command line. + +@example +Setting a specific initiator name to use when logging in to the target +-iscsi initiator-name=iqn.qemu.test:my-initiator +@end example + +@example +Controlling which type of header digest to negotiate with the target +-iscsi header-digest=CRC32C|CRC32C-NONE|NONE-CRC32C|NONE +@end example + +These can also be set via a configuration file +@example +[iscsi] + user = "CHAP username" + password = "CHAP password" + initiator-name = "iqn.qemu.test:my-initiator" + # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE + header-digest = "CRC32C" +@end example + + +Setting the target name allows different options for different targets +@example +[iscsi "iqn.target.name"] + user = "CHAP username" + password = "CHAP password" + initiator-name = "iqn.qemu.test:my-initiator" + # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE + header-digest = "CRC32C" +@end example + + +Howto use a configuration file to set iSCSI configuration options: +@example +cat >iscsi.conf <ret = bdrv_co_write_zeroes(bs, data->offset / BDRV_SECTOR_SIZE, + data->count / BDRV_SECTOR_SIZE); + data->done = true; + if (data->ret < 0) { + *data->total = data->ret; + return; + } + + *data->total = data->count; +} + +static int do_co_write_zeroes(int64_t offset, int count, int *total) +{ + Coroutine *co; + CoWriteZeroes data = { + .offset = offset, + .count = count, + .total = total, + .done = false, + }; + + co = qemu_coroutine_create(co_write_zeroes_entry); + qemu_coroutine_enter(co, &data); + while (!data.done) { + qemu_aio_wait(); + } + if (data.ret < 0) { + return data.ret; + } else { + return 1; + } +} + static int do_load_vmstate(char *buf, int64_t offset, int count, int *total) { *total = bdrv_load_vmstate(bs, (uint8_t *)buf, offset, count); @@ -643,6 +688,7 @@ static void write_help(void) " -P, -- use different pattern to fill file\n" " -C, -- report statistics in a machine parsable format\n" " -q, -- quiet mode, do not show I/O statistics\n" +" -z, -- write zeroes using bdrv_co_write_zeroes\n" "\n"); } @@ -654,7 +700,7 @@ static const cmdinfo_t write_cmd = { .cfunc = write_f, .argmin = 2, .argmax = -1, - .args = "[-abCpq] [-P pattern ] off len", + .args = "[-bCpqz] [-P pattern ] off len", .oneline = "writes a number of bytes at a specified offset", .help = write_help, }; @@ -662,16 +708,16 @@ static const cmdinfo_t write_cmd = { static int write_f(int argc, char **argv) { struct timeval t1, t2; - int Cflag = 0, pflag = 0, qflag = 0, bflag = 0; + int Cflag = 0, pflag = 0, qflag = 0, bflag = 0, Pflag = 0, zflag = 0; int c, cnt; - char *buf; + char *buf = NULL; int64_t offset; int count; /* Some compilers get confused and warn if this is not initialized. */ int total = 0; int pattern = 0xcd; - while ((c = getopt(argc, argv, "bCpP:q")) != EOF) { + while ((c = getopt(argc, argv, "bCpP:qz")) != EOF) { switch (c) { case 'b': bflag = 1; @@ -683,6 +729,7 @@ static int write_f(int argc, char **argv) pflag = 1; break; case 'P': + Pflag = 1; pattern = parse_pattern(optarg); if (pattern < 0) { return 0; @@ -691,6 +738,9 @@ static int write_f(int argc, char **argv) case 'q': qflag = 1; break; + case 'z': + zflag = 1; + break; default: return command_usage(&write_cmd); } @@ -700,8 +750,13 @@ static int write_f(int argc, char **argv) return command_usage(&write_cmd); } - if (bflag && pflag) { - printf("-b and -p cannot be specified at the same time\n"); + if (bflag + pflag + zflag > 1) { + printf("-b, -p, or -z cannot be specified at the same time\n"); + return 0; + } + + if (zflag && Pflag) { + printf("-z and -P cannot be specified at the same time\n"); return 0; } @@ -732,13 +787,17 @@ static int write_f(int argc, char **argv) } } - buf = qemu_io_alloc(count, pattern); + if (!zflag) { + buf = qemu_io_alloc(count, pattern); + } gettimeofday(&t1, NULL); if (pflag) { cnt = do_pwrite(buf, offset, count, &total); } else if (bflag) { cnt = do_save_vmstate(buf, offset, count, &total); + } else if (zflag) { + cnt = do_co_write_zeroes(offset, count, &total); } else { cnt = do_write(buf, offset, count, &total); } @@ -758,7 +817,9 @@ static int write_f(int argc, char **argv) print_report("wrote", &t2, offset, count, total, cnt, Cflag); out: - qemu_io_free(buf); + if (!zflag) { + qemu_io_free(buf); + } return 0; } diff --git a/qemu-options.hx b/qemu-options.hx index f577cc847..42d22aebf 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1829,24 +1829,32 @@ Syntax for specifying iSCSI LUNs is Example (without authentication): @example -qemu -cdrom iscsi://192.0.2.1/iqn.2001-04.com.example/2 \ ---drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 +qemu -iscsi initiator-name=iqn.2001-04.com.example:my-initiator \ +-cdrom iscsi://192.0.2.1/iqn.2001-04.com.example/2 \ +-drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 @end example Example (CHAP username/password via URL): @example -qemu --drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1 +qemu -drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1 @end example Example (CHAP username/password via environment variables): @example LIBISCSI_CHAP_USERNAME="user" \ LIBISCSI_CHAP_PASSWORD="password" \ -qemu --drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 +qemu -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 @end example iSCSI support is an optional feature of QEMU and only available when compiled and linked against libiscsi. +ETEXI +DEF("iscsi", HAS_ARG, QEMU_OPTION_iscsi, + "-iscsi [user=user][,password=password]\n" + " [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE\n" + " [,initiator-name=iqn]\n" + " iSCSI session parameters\n", QEMU_ARCH_ALL) +STEXI @item NBD QEMU supports NBD (Network Block Devices) both using TCP protocol as well diff --git a/trace-events b/trace-events index 5d0574964..9b26ce230 100644 --- a/trace-events +++ b/trace-events @@ -67,6 +67,7 @@ bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d" bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" +bdrv_co_write_zeroes(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_io_em(void *bs, int64_t sector_num, int nb_sectors, int is_write, void *acb) "bs %p sector_num %"PRId64" nb_sectors %d is_write %d acb %p" bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d" @@ -320,7 +321,7 @@ qed_need_check_timer_cb(void *s) "s %p" qed_start_need_check_timer(void *s) "s %p" qed_cancel_need_check_timer(void *s) "s %p" qed_aio_complete(void *s, void *acb, int ret) "s %p acb %p ret %d" -qed_aio_setup(void *s, void *acb, int64_t sector_num, int nb_sectors, void *opaque, int is_write) "s %p acb %p sector_num %"PRId64" nb_sectors %d opaque %p is_write %d" +qed_aio_setup(void *s, void *acb, int64_t sector_num, int nb_sectors, void *opaque, int flags) "s %p acb %p sector_num %"PRId64" nb_sectors %d opaque %p flags %#x" qed_aio_next_io(void *s, void *acb, int ret, uint64_t cur_pos) "s %p acb %p ret %d cur_pos %"PRIu64 qed_aio_read_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu" qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu" diff --git a/vl.c b/vl.c index 9b7ddfe41..0e8425e35 100644 --- a/vl.c +++ b/vl.c @@ -2529,6 +2529,14 @@ int main(int argc, char **argv, char **envp) exit(1); } break; +#ifdef CONFIG_LIBISCSI + case QEMU_OPTION_iscsi: + opts = qemu_opts_parse(qemu_find_opts("iscsi"), optarg, 0); + if (!opts) { + exit(1); + } + break; +#endif #ifdef CONFIG_SLIRP case QEMU_OPTION_tftp: legacy_tftp_prefix = optarg;