From 69740c8ba878f58bc3c71f74618fc2cd1da990da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 Feb 2010 14:22:25 -0600 Subject: [PATCH] virtio_blk: add block topology support Allow reading various alignment values from the config page. This allows the guest to much better align I/O requests depending on the storage topology. Note that the formats for the config values appear a bit messed up, but we follow the formats used by ATA and SCSI so they are expected in the storage world. Signed-off-by: Christoph Hellwig Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 61 ++++++++++++++++++++++++++++---------- include/linux/virtio_blk.h | 13 ++++++++ 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 51042f0ba7e..7eff828b211 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -243,10 +243,12 @@ static int index_to_minor(int index) static int __devinit virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; + struct request_queue *q; int err; u64 cap; - u32 v; - u32 blk_size, sg_elems; + u32 v, blk_size, sg_elems, opt_io_size; + u16 min_io_size; + u8 physical_block_exp, alignment_offset; if (index_to_minor(index) >= 1 << MINORBITS) return -ENOSPC; @@ -293,13 +295,13 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) goto out_mempool; } - vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); - if (!vblk->disk->queue) { + q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); + if (!q) { err = -ENOMEM; goto out_put_disk; } - vblk->disk->queue->queuedata = vblk; + q->queuedata = vblk; if (index < 26) { sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26); @@ -323,10 +325,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) /* If barriers are supported, tell block layer that queue is ordered */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) - blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_DRAIN_FLUSH, + blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH, virtblk_prepare_flush); else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) - blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL); + blk_queue_ordered(q, QUEUE_ORDERED_TAG, NULL); /* If disk is read-only in the host, the guest should obey */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) @@ -345,14 +347,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) set_capacity(vblk->disk, cap); /* We can handle whatever the host told us to handle. */ - blk_queue_max_phys_segments(vblk->disk->queue, vblk->sg_elems-2); - blk_queue_max_hw_segments(vblk->disk->queue, vblk->sg_elems-2); + blk_queue_max_phys_segments(q, vblk->sg_elems-2); + blk_queue_max_hw_segments(q, vblk->sg_elems-2); /* No need to bounce any requests */ - blk_queue_bounce_limit(vblk->disk->queue, BLK_BOUNCE_ANY); + blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); /* No real sector limit. */ - blk_queue_max_sectors(vblk->disk->queue, -1U); + blk_queue_max_sectors(q, -1U); /* Host can optionally specify maximum segment size and number of * segments. */ @@ -360,16 +362,45 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) offsetof(struct virtio_blk_config, size_max), &v); if (!err) - blk_queue_max_segment_size(vblk->disk->queue, v); + blk_queue_max_segment_size(q, v); else - blk_queue_max_segment_size(vblk->disk->queue, -1U); + blk_queue_max_segment_size(q, -1U); /* Host can optionally specify the block size of the device */ err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, offsetof(struct virtio_blk_config, blk_size), &blk_size); if (!err) - blk_queue_logical_block_size(vblk->disk->queue, blk_size); + blk_queue_logical_block_size(q, blk_size); + else + blk_size = queue_logical_block_size(q); + + /* Use topology information if available */ + err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, + offsetof(struct virtio_blk_config, physical_block_exp), + &physical_block_exp); + if (!err && physical_block_exp) + blk_queue_physical_block_size(q, + blk_size * (1 << physical_block_exp)); + + err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, + offsetof(struct virtio_blk_config, alignment_offset), + &alignment_offset); + if (!err && alignment_offset) + blk_queue_alignment_offset(q, blk_size * alignment_offset); + + err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, + offsetof(struct virtio_blk_config, min_io_size), + &min_io_size); + if (!err && min_io_size) + blk_queue_io_min(q, blk_size * min_io_size); + + err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY, + offsetof(struct virtio_blk_config, opt_io_size), + &opt_io_size); + if (!err && opt_io_size) + blk_queue_io_opt(q, blk_size * opt_io_size); + add_disk(vblk->disk); return 0; @@ -412,7 +443,7 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, - VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH + VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY }; /* diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index fd294c56d57..e52029e9891 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -15,6 +15,7 @@ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ #define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ #define VIRTIO_BLK_F_FLUSH 9 /* Cache flush command support */ +#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ struct virtio_blk_config { /* The capacity (in 512-byte sectors). */ @@ -29,8 +30,20 @@ struct virtio_blk_config { __u8 heads; __u8 sectors; } geometry; + /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ __u32 blk_size; + + /* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY */ + /* exponent for physical block per logical block. */ + __u8 physical_block_exp; + /* alignment offset in logical blocks. */ + __u8 alignment_offset; + /* minimum I/O size without performance penalty in logical blocks. */ + __u16 min_io_size; + /* optimal sustained I/O size in logical blocks. */ + __u32 opt_io_size; + } __attribute__((packed)); /*