sim-card
/
qemu
Archived
10
0
Fork 0

VMDK: introduce VmdkExtent

Introduced VmdkExtent array into BDRVVmdkState, enable holding multiple
image extents for multiple file image support.

Signed-off-by: Fam Zheng <famcool@gmail.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
Fam Zheng 2011-07-12 19:56:28 +08:00 committed by Kevin Wolf
parent 69d7e218fd
commit b3976d3c8b
1 changed files with 246 additions and 102 deletions

View File

@ -60,7 +60,11 @@ typedef struct {
#define L2_CACHE_SIZE 16
typedef struct BDRVVmdkState {
typedef struct VmdkExtent {
BlockDriverState *file;
bool flat;
int64_t sectors;
int64_t end_sector;
int64_t l1_table_offset;
int64_t l1_backup_table_offset;
uint32_t *l1_table;
@ -74,7 +78,13 @@ typedef struct BDRVVmdkState {
uint32_t l2_cache_counts[L2_CACHE_SIZE];
unsigned int cluster_sectors;
} VmdkExtent;
typedef struct BDRVVmdkState {
uint32_t parent_cid;
int num_extents;
/* Extent array with num_extents entries, ascend ordered by address */
VmdkExtent *extents;
} BDRVVmdkState;
typedef struct VmdkMetaData {
@ -105,6 +115,19 @@ static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
#define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
#define HEADER_SIZE 512 // first sector of 512 bytes
static void vmdk_free_extents(BlockDriverState *bs)
{
int i;
BDRVVmdkState *s = bs->opaque;
for (i = 0; i < s->num_extents; i++) {
qemu_free(s->extents[i].l1_table);
qemu_free(s->extents[i].l2_cache);
qemu_free(s->extents[i].l1_backup_table);
}
qemu_free(s->extents);
}
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
{
char desc[DESC_SIZE];
@ -358,11 +381,50 @@ static int vmdk_parent_open(BlockDriverState *bs)
return 0;
}
/* Create and append extent to the extent array. Return the added VmdkExtent
* address. return NULL if allocation failed. */
static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
BlockDriverState *file, bool flat, int64_t sectors,
int64_t l1_offset, int64_t l1_backup_offset,
uint32_t l1_size,
int l2_size, unsigned int cluster_sectors)
{
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
s->extents = qemu_realloc(s->extents,
(s->num_extents + 1) * sizeof(VmdkExtent));
extent = &s->extents[s->num_extents];
s->num_extents++;
memset(extent, 0, sizeof(VmdkExtent));
extent->file = file;
extent->flat = flat;
extent->sectors = sectors;
extent->l1_table_offset = l1_offset;
extent->l1_backup_table_offset = l1_backup_offset;
extent->l1_size = l1_size;
extent->l1_entry_sectors = l2_size * cluster_sectors;
extent->l2_size = l2_size;
extent->cluster_sectors = cluster_sectors;
if (s->num_extents > 1) {
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
} else {
extent->end_sector = extent->sectors;
}
bs->total_sectors = extent->end_sector;
return extent;
}
static int vmdk_open(BlockDriverState *bs, int flags)
{
BDRVVmdkState *s = bs->opaque;
uint32_t magic;
int l1_size, i;
int i;
uint32_t l1_size, l1_entry_sectors;
VmdkExtent *extent = NULL;
if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
goto fail;
@ -370,32 +432,34 @@ static int vmdk_open(BlockDriverState *bs, int flags)
magic = be32_to_cpu(magic);
if (magic == VMDK3_MAGIC) {
VMDK3Header header;
if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
!= sizeof(header)) {
goto fail;
s->cluster_sectors = le32_to_cpu(header.granularity);
s->l2_size = 1 << 9;
s->l1_size = 1 << 6;
bs->total_sectors = le32_to_cpu(header.disk_sectors);
s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
s->l1_backup_table_offset = 0;
s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
}
extent = vmdk_add_extent(bs, bs->file, false,
le32_to_cpu(header.disk_sectors),
le32_to_cpu(header.l1dir_offset) << 9, 0,
1 << 6, 1 << 9, le32_to_cpu(header.granularity));
} else if (magic == VMDK4_MAGIC) {
VMDK4Header header;
if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
!= sizeof(header)) {
goto fail;
bs->total_sectors = le64_to_cpu(header.capacity);
s->cluster_sectors = le64_to_cpu(header.granularity);
s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
if (s->l1_entry_sectors <= 0)
}
l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
* le64_to_cpu(header.granularity);
l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
/ l1_entry_sectors;
extent = vmdk_add_extent(bs, bs->file, false,
le64_to_cpu(header.capacity),
le64_to_cpu(header.gd_offset) << 9,
le64_to_cpu(header.rgd_offset) << 9,
l1_size,
le32_to_cpu(header.num_gtes_per_gte),
le64_to_cpu(header.granularity));
if (extent->l1_entry_sectors <= 0) {
goto fail;
s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
/ s->l1_entry_sectors;
s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
}
// try to open parent images, if exist
if (vmdk_parent_open(bs) != 0)
goto fail;
@ -406,40 +470,49 @@ static int vmdk_open(BlockDriverState *bs, int flags)
}
/* read the L1 table */
l1_size = s->l1_size * sizeof(uint32_t);
s->l1_table = qemu_malloc(l1_size);
if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
l1_size = extent->l1_size * sizeof(uint32_t);
extent->l1_table = qemu_malloc(l1_size);
if (bdrv_pread(bs->file,
extent->l1_table_offset,
extent->l1_table,
l1_size)
!= l1_size) {
goto fail;
for(i = 0; i < s->l1_size; i++) {
le32_to_cpus(&s->l1_table[i]);
}
for (i = 0; i < extent->l1_size; i++) {
le32_to_cpus(&extent->l1_table[i]);
}
if (s->l1_backup_table_offset) {
s->l1_backup_table = qemu_malloc(l1_size);
if (bdrv_pread(bs->file, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
if (extent->l1_backup_table_offset) {
extent->l1_backup_table = qemu_malloc(l1_size);
if (bdrv_pread(bs->file,
extent->l1_backup_table_offset,
extent->l1_backup_table,
l1_size)
!= l1_size) {
goto fail;
for(i = 0; i < s->l1_size; i++) {
le32_to_cpus(&s->l1_backup_table[i]);
}
for (i = 0; i < extent->l1_size; i++) {
le32_to_cpus(&extent->l1_backup_table[i]);
}
}
s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
extent->l2_cache =
qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
return 0;
fail:
qemu_free(s->l1_backup_table);
qemu_free(s->l1_table);
qemu_free(s->l2_cache);
vmdk_free_extents(bs);
return -1;
}
static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
uint64_t offset, int allocate);
static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
uint64_t offset, int allocate)
static int get_whole_cluster(BlockDriverState *bs,
VmdkExtent *extent,
uint64_t cluster_offset,
uint64_t offset,
bool allocate)
{
BDRVVmdkState *s = bs->opaque;
uint8_t whole_grain[s->cluster_sectors*512]; // 128 sectors * 512 bytes each = grain size 64KB
/* 128 sectors * 512 bytes each = grain size 64KB */
uint8_t whole_grain[extent->cluster_sectors * 512];
// we will be here if it's first write on non-exist grain(cluster).
// try to read from parent image, if exist
@ -450,14 +523,14 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
return -1;
ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
s->cluster_sectors);
extent->cluster_sectors);
if (ret < 0) {
return -1;
}
//Write grain only into the active image
ret = bdrv_write(bs->file, cluster_offset, whole_grain,
s->cluster_sectors);
ret = bdrv_write(extent->file, cluster_offset, whole_grain,
extent->cluster_sectors);
if (ret < 0) {
return -1;
}
@ -465,29 +538,39 @@ static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
return 0;
}
static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
{
BDRVVmdkState *s = bs->opaque;
/* update L2 table */
if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
&(m_data->offset), sizeof(m_data->offset)) < 0)
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(m_data->offset)),
&(m_data->offset),
sizeof(m_data->offset)
) < 0) {
return -1;
}
/* update backup L2 table */
if (s->l1_backup_table_offset != 0) {
m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
&(m_data->offset), sizeof(m_data->offset)) < 0)
if (extent->l1_backup_table_offset != 0) {
m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(m_data->offset)),
&(m_data->offset), sizeof(m_data->offset)
) < 0) {
return -1;
}
}
return 0;
}
static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
uint64_t offset, int allocate)
static uint64_t get_cluster_offset(BlockDriverState *bs,
VmdkExtent *extent,
VmdkMetaData *m_data,
uint64_t offset, int allocate)
{
BDRVVmdkState *s = bs->opaque;
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
uint32_t min_count, *l2_table, tmp = 0;
@ -496,21 +579,23 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
if (m_data)
m_data->valid = 0;
l1_index = (offset >> 9) / s->l1_entry_sectors;
if (l1_index >= s->l1_size)
l1_index = (offset >> 9) / extent->l1_entry_sectors;
if (l1_index >= extent->l1_size) {
return 0;
l2_offset = s->l1_table[l1_index];
if (!l2_offset)
}
l2_offset = extent->l1_table[l1_index];
if (!l2_offset) {
return 0;
}
for(i = 0; i < L2_CACHE_SIZE; i++) {
if (l2_offset == s->l2_cache_offsets[i]) {
if (l2_offset == extent->l2_cache_offsets[i]) {
/* increment the hit count */
if (++s->l2_cache_counts[i] == 0xffffffff) {
if (++extent->l2_cache_counts[i] == 0xffffffff) {
for(j = 0; j < L2_CACHE_SIZE; j++) {
s->l2_cache_counts[j] >>= 1;
extent->l2_cache_counts[j] >>= 1;
}
}
l2_table = s->l2_cache + (i * s->l2_size);
l2_table = extent->l2_cache + (i * extent->l2_size);
goto found;
}
}
@ -518,20 +603,25 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
min_index = 0;
min_count = 0xffffffff;
for(i = 0; i < L2_CACHE_SIZE; i++) {
if (s->l2_cache_counts[i] < min_count) {
min_count = s->l2_cache_counts[i];
if (extent->l2_cache_counts[i] < min_count) {
min_count = extent->l2_cache_counts[i];
min_index = i;
}
}
l2_table = s->l2_cache + (min_index * s->l2_size);
if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
s->l2_size * sizeof(uint32_t))
l2_table = extent->l2_cache + (min_index * extent->l2_size);
if (bdrv_pread(
extent->file,
(int64_t)l2_offset * 512,
l2_table,
extent->l2_size * sizeof(uint32_t)
) != extent->l2_size * sizeof(uint32_t)) {
return 0;
}
s->l2_cache_offsets[min_index] = l2_offset;
s->l2_cache_counts[min_index] = 1;
extent->l2_cache_offsets[min_index] = l2_offset;
extent->l2_cache_counts[min_index] = 1;
found:
l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
cluster_offset = le32_to_cpu(l2_table[l2_index]);
if (!cluster_offset) {
@ -539,8 +629,11 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
return 0;
// Avoid the L2 tables update for the images that have snapshots.
cluster_offset = bdrv_getlength(bs->file);
bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9));
cluster_offset = bdrv_getlength(extent->file);
bdrv_truncate(
extent->file,
cluster_offset + (extent->cluster_sectors << 9)
);
cluster_offset >>= 9;
tmp = cpu_to_le32(cluster_offset);
@ -551,7 +644,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
* This problem may occur because of insufficient space on host disk
* or inappropriate VM shutdown.
*/
if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
if (get_whole_cluster(
bs, extent, cluster_offset, offset, allocate) == -1)
return 0;
if (m_data) {
@ -566,33 +660,69 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
return cluster_offset;
}
static VmdkExtent *find_extent(BDRVVmdkState *s,
int64_t sector_num, VmdkExtent *start_hint)
{
VmdkExtent *extent = start_hint;
if (!extent) {
extent = &s->extents[0];
}
while (extent < &s->extents[s->num_extents]) {
if (sector_num < extent->end_sector) {
return extent;
}
extent++;
}
return NULL;
}
static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
{
BDRVVmdkState *s = bs->opaque;
int index_in_cluster, n;
uint64_t cluster_offset;
cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
index_in_cluster = sector_num % s->cluster_sectors;
n = s->cluster_sectors - index_in_cluster;
int64_t index_in_cluster, n, ret;
uint64_t offset;
VmdkExtent *extent;
extent = find_extent(s, sector_num, NULL);
if (!extent) {
return 0;
}
if (extent->flat) {
n = extent->end_sector - sector_num;
ret = 1;
} else {
offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
index_in_cluster = sector_num % extent->cluster_sectors;
n = extent->cluster_sectors - index_in_cluster;
ret = offset ? 1 : 0;
}
if (n > nb_sectors)
n = nb_sectors;
*pnum = n;
return (cluster_offset != 0);
return ret;
}
static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
BDRVVmdkState *s = bs->opaque;
int index_in_cluster, n, ret;
int ret;
uint64_t n, index_in_cluster;
VmdkExtent *extent = NULL;
uint64_t cluster_offset;
while (nb_sectors > 0) {
cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
index_in_cluster = sector_num % s->cluster_sectors;
n = s->cluster_sectors - index_in_cluster;
extent = find_extent(s, sector_num, extent);
if (!extent) {
return -EIO;
}
cluster_offset = get_cluster_offset(
bs, extent, NULL, sector_num << 9, 0);
index_in_cluster = sector_num % extent->cluster_sectors;
n = extent->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
n = nb_sectors;
if (!cluster_offset) {
@ -621,10 +751,12 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
BDRVVmdkState *s = bs->opaque;
VmdkMetaData m_data;
int index_in_cluster, n;
VmdkExtent *extent = NULL;
int n;
int64_t index_in_cluster;
uint64_t cluster_offset;
static int cid_update = 0;
VmdkMetaData m_data;
if (sector_num > bs->total_sectors) {
fprintf(stderr,
@ -635,20 +767,35 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
}
while (nb_sectors > 0) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
extent = find_extent(s, sector_num, extent);
if (!extent) {
return -EIO;
}
cluster_offset = get_cluster_offset(
bs,
extent,
&m_data,
sector_num << 9, 1);
if (!cluster_offset) {
return -1;
}
index_in_cluster = sector_num % extent->cluster_sectors;
n = extent->cluster_sectors - index_in_cluster;
if (n > nb_sectors) {
n = nb_sectors;
cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
if (!cluster_offset)
return -1;
}
if (bdrv_pwrite(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
if (bdrv_pwrite(bs->file,
cluster_offset + index_in_cluster * 512,
buf, n * 512)
!= n * 512) {
return -1;
}
if (m_data.valid) {
/* update L2 tables */
if (vmdk_L2update(bs, &m_data) == -1)
if (vmdk_L2update(extent, &m_data) == -1) {
return -1;
}
}
nb_sectors -= n;
sector_num += n;
@ -822,10 +969,7 @@ exit:
static void vmdk_close(BlockDriverState *bs)
{
BDRVVmdkState *s = bs->opaque;
qemu_free(s->l1_table);
qemu_free(s->l2_cache);
vmdk_free_extents(bs);
}
static int vmdk_flush(BlockDriverState *bs)