From 1d6333da24b0175691d25b06c92a3eb7717a8a78 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 7 Jan 2014 21:57:11 +0100
Subject: [PATCH 06/14] qcow2-cluster: Expand zero clusters
RH-Author: Max Reitz <mreitz@redhat.com>
Message-id: <1389131839-12920-7-git-send-email-mreitz@redhat.com>
Patchwork-id: 56542
O-Subject: [RHEL-7.0 qemu-kvm PATCH v2 06/14] qcow2-cluster: Expand zero clusters
Bugzilla: 1033490
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Fam Zheng <famz@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
BZ: 1033490
Add functionality for expanding zero clusters. This is necessary for
downgrading the image version to one without zero cluster support.
For non-backed images, this function may also just discard zero clusters
instead of truly expanding them.
Signed-off-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 32b6444d23d0ff618d73e5b766600cd258066169)
Signed-off-by: Max Reitz <mreitz@redhat.com>
Conflicts:
block/qcow2-cluster.c
Conflicts since the changes omitted from downstream commit
ca635f6c3ae10562a2165590bb84667aa61ad12f (because this particular commit
had not been backported then) have to be included here.
The same applies to the change in qcow2-cluster.c omitted from
downstream commit 0f795e4e9e128f9ff88a128ed8590d2357228b80.
---
block/qcow2-cluster.c | 232 +++++++++++++++++++++++++++++++++++++++++++++++++
block/qcow2-refcount.c | 29 ++++---
block/qcow2.h | 5 ++
3 files changed, 252 insertions(+), 14 deletions(-)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
block/qcow2-cluster.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++++
block/qcow2-refcount.c | 29 +++---
block/qcow2.h | 5 +
3 files changed, 252 insertions(+), 14 deletions(-)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 5d13515..796d7c5 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1499,3 +1499,235 @@ fail:
return ret;
}
+
+/*
+ * Expands all zero clusters in a specific L1 table (or deallocates them, for
+ * non-backed non-pre-allocated zero clusters).
+ *
+ * expanded_clusters is a bitmap where every bit corresponds to one cluster in
+ * the image file; a bit gets set if the corresponding cluster has been used for
+ * zero expansion (i.e., has been filled with zeroes and is referenced from an
+ * L2 table). nb_clusters contains the total cluster count of the image file,
+ * i.e., the number of bits in expanded_clusters.
+ */
+static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
+ int l1_size, uint8_t *expanded_clusters,
+ uint64_t nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ bool is_active_l1 = (l1_table == s->l1_table);
+ uint64_t *l2_table = NULL;
+ int ret;
+ int i, j;
+
+ if (!is_active_l1) {
+ /* inactive L2 tables require a buffer to be stored in when loading
+ * them from disk */
+ l2_table = qemu_blockalign(bs, s->cluster_size);
+ }
+
+ for (i = 0; i < l1_size; i++) {
+ uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
+ bool l2_dirty = false;
+
+ if (!l2_offset) {
+ /* unallocated */
+ continue;
+ }
+
+ if (is_active_l1) {
+ /* get active L2 tables from cache */
+ ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
+ (void **)&l2_table);
+ } else {
+ /* load inactive L2 tables from disk */
+ ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
+ (void *)l2_table, s->cluster_sectors);
+ }
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for (j = 0; j < s->l2_size; j++) {
+ uint64_t l2_entry = be64_to_cpu(l2_table[j]);
+ int64_t offset = l2_entry & L2E_OFFSET_MASK, cluster_index;
+ int cluster_type = qcow2_get_cluster_type(l2_entry);
+
+ if (cluster_type == QCOW2_CLUSTER_NORMAL) {
+ cluster_index = offset >> s->cluster_bits;
+ assert((cluster_index >= 0) && (cluster_index < nb_clusters));
+ if (expanded_clusters[cluster_index / 8] &
+ (1 << (cluster_index % 8))) {
+ /* Probably a shared L2 table; this cluster was a zero
+ * cluster which has been expanded, its refcount
+ * therefore most likely requires an update. */
+ ret = qcow2_update_cluster_refcount(bs, cluster_index, 1,
+ QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ goto fail;
+ }
+ /* Since we just increased the refcount, the COPIED flag may
+ * no longer be set. */
+ l2_table[j] = cpu_to_be64(l2_entry & ~QCOW_OFLAG_COPIED);
+ l2_dirty = true;
+ }
+ continue;
+ }
+ else if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_ZERO) {
+ continue;
+ }
+
+ if (!offset) {
+ /* not preallocated */
+ if (!bs->backing_hd) {
+ /* not backed; therefore we can simply deallocate the
+ * cluster */
+ l2_table[j] = 0;
+ l2_dirty = true;
+ continue;
+ }
+
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ ret = offset;
+ goto fail;
+ }
+ }
+
+ ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
+ if (ret < 0) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ goto fail;
+ }
+
+ ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
+ s->cluster_sectors, 0);
+ if (ret < 0) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ goto fail;
+ }
+
+ l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
+ l2_dirty = true;
+
+ cluster_index = offset >> s->cluster_bits;
+ assert((cluster_index >= 0) && (cluster_index < nb_clusters));
+ expanded_clusters[cluster_index / 8] |= 1 << (cluster_index % 8);
+ }
+
+ if (is_active_l1) {
+ if (l2_dirty) {
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ qcow2_cache_depends_on_flush(s->l2_table_cache);
+ }
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+ if (ret < 0) {
+ l2_table = NULL;
+ goto fail;
+ }
+ } else {
+ if (l2_dirty) {
+ ret = qcow2_pre_write_overlap_check(bs,
+ QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset,
+ s->cluster_size);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
+ (void *)l2_table, s->cluster_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ }
+ }
+
+ ret = 0;
+
+fail:
+ if (l2_table) {
+ if (!is_active_l1) {
+ qemu_vfree(l2_table);
+ } else {
+ if (ret < 0) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+ } else {
+ ret = qcow2_cache_put(bs, s->l2_table_cache,
+ (void **)&l2_table);
+ }
+ }
+ }
+ return ret;
+}
+
+/*
+ * For backed images, expands all zero clusters on the image. For non-backed
+ * images, deallocates all non-pre-allocated zero clusters (and claims the
+ * allocation for pre-allocated ones). This is important for downgrading to a
+ * qcow2 version which doesn't yet support metadata zero clusters.
+ */
+int qcow2_expand_zero_clusters(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l1_table = NULL;
+ int cluster_to_sector_bits = s->cluster_bits - BDRV_SECTOR_BITS;
+ uint64_t nb_clusters;
+ uint8_t *expanded_clusters;
+ int ret;
+ int i, j;
+
+ nb_clusters = (bs->total_sectors + (1 << cluster_to_sector_bits) - 1)
+ >> cluster_to_sector_bits;
+ expanded_clusters = g_malloc0((nb_clusters + 7) / 8);
+
+ ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
+ expanded_clusters, nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Inactive L1 tables may point to active L2 tables - therefore it is
+ * necessary to flush the L2 table cache before trying to access the L2
+ * tables pointed to by inactive L1 entries (else we might try to expand
+ * zero clusters that have already been expanded); furthermore, it is also
+ * necessary to empty the L2 table cache, since it may contain tables which
+ * are now going to be modified directly on disk, bypassing the cache.
+ * qcow2_cache_empty() does both for us. */
+ ret = qcow2_cache_empty(bs, s->l2_table_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for (i = 0; i < s->nb_snapshots; i++) {
+ int l1_sectors = (s->snapshots[i].l1_size * sizeof(uint64_t) +
+ BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE;
+
+ l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
+
+ ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset /
+ BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for (j = 0; j < s->snapshots[i].l1_size; j++) {
+ be64_to_cpus(&l1_table[j]);
+ }
+
+ ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
+ expanded_clusters, nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = 0;
+
+fail:
+ g_free(expanded_clusters);
+ g_free(l1_table);
+ return ret;
+}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 2b72d5e..389a837 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -601,10 +601,10 @@ fail:
* If the return value is non-negative, it is the new refcount of the cluster.
* If it is negative, it is -errno and indicates an error.
*/
-static int update_cluster_refcount(BlockDriverState *bs,
- int64_t cluster_index,
- int addend,
- enum qcow2_discard_type type)
+int qcow2_update_cluster_refcount(BlockDriverState *bs,
+ int64_t cluster_index,
+ int addend,
+ enum qcow2_discard_type type)
{
BDRVQcowState *s = bs->opaque;
int ret;
@@ -733,8 +733,8 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
if (free_in_cluster == 0)
s->free_byte_offset = 0;
if ((offset & (s->cluster_size - 1)) != 0)
- update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
+ qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
} else {
offset = qcow2_alloc_clusters(bs, s->cluster_size);
if (offset < 0) {
@@ -744,8 +744,8 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
if ((cluster_offset + s->cluster_size) == offset) {
/* we are lucky: contiguous data */
offset = s->free_byte_offset;
- update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
+ qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
s->free_byte_offset += size;
} else {
s->free_byte_offset = offset;
@@ -754,8 +754,8 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
}
/* The cluster refcount was incremented, either by qcow2_alloc_clusters()
- * or explicitly by update_cluster_refcount(). Refcount blocks must be
- * flushed before the caller's L2 table updates.
+ * or explicitly by qcow2_update_cluster_refcount(). Refcount blocks must
+ * be flushed before the caller's L2 table updates.
*/
qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
return offset;
@@ -896,8 +896,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
break;
}
if (addend != 0) {
- refcount = update_cluster_refcount(bs, cluster_index, addend,
- QCOW2_DISCARD_SNAPSHOT);
+ refcount = qcow2_update_cluster_refcount(bs,
+ cluster_index, addend,
+ QCOW2_DISCARD_SNAPSHOT);
} else {
refcount = get_refcount(bs, cluster_index);
}
@@ -936,8 +937,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
if (addend != 0) {
- refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend,
- QCOW2_DISCARD_SNAPSHOT);
+ refcount = qcow2_update_cluster_refcount(bs, l2_offset >>
+ s->cluster_bits, addend, QCOW2_DISCARD_SNAPSHOT);
} else {
refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
}
diff --git a/block/qcow2.h b/block/qcow2.h
index 5ca6b78..2660483 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -422,6 +422,9 @@ int qcow2_update_header(BlockDriverState *bs);
int qcow2_refcount_init(BlockDriverState *bs);
void qcow2_refcount_close(BlockDriverState *bs);
+int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
+ int addend, enum qcow2_discard_type type);
+
int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
int nb_clusters);
@@ -469,6 +472,8 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
int nb_sectors);
int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
+int qcow2_expand_zero_clusters(BlockDriverState *bs);
+
/* qcow2-snapshot.c functions */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
--
1.7.1