cryptospore / rpms / qemu-kvm

Forked from rpms/qemu-kvm 2 years ago
Clone
9ae3a8
From d4803ddf6139cf2ad7e2d53035b5f828da97b51c Mon Sep 17 00:00:00 2001
9ae3a8
From: Max Reitz <mreitz@redhat.com>
9ae3a8
Date: Mon, 4 Nov 2013 22:32:04 +0100
9ae3a8
Subject: [PATCH 11/87] qcow2-refcount: Repair shared refcount blocks
9ae3a8
9ae3a8
RH-Author: Max Reitz <mreitz@redhat.com>
9ae3a8
Message-id: <1383604354-12743-14-git-send-email-mreitz@redhat.com>
9ae3a8
Patchwork-id: 55313
9ae3a8
O-Subject: [RHEL-7.0 qemu-kvm PATCH 13/43] qcow2-refcount: Repair shared refcount blocks
9ae3a8
Bugzilla: 1004347
9ae3a8
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
9ae3a8
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
9ae3a8
RH-Acked-by: Fam Zheng <famz@redhat.com>
9ae3a8
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
9ae3a8
9ae3a8
BZ: 1004347
9ae3a8
9ae3a8
If the refcount of a refcount block is greater than one, we can at least
9ae3a8
try to repair that problem by duplicating the affected block.
9ae3a8
9ae3a8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9ae3a8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9ae3a8
(cherry picked from commit afa50193cde574528a130a25544fd6f3aa8da069)
9ae3a8
9ae3a8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9ae3a8
---
9ae3a8
 block/blkdebug.c       |   1 +
9ae3a8
 block/qcow2-refcount.c | 148 ++++++++++++++++++++++++++++++++++++++++++++++++-
9ae3a8
 include/block/block.h  |   1 +
9ae3a8
 3 files changed, 148 insertions(+), 2 deletions(-)
9ae3a8
9ae3a8
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
9ae3a8
---
9ae3a8
 block/blkdebug.c       |    1 +
9ae3a8
 block/qcow2-refcount.c |  148 +++++++++++++++++++++++++++++++++++++++++++++++-
9ae3a8
 include/block/block.h  |    1 +
9ae3a8
 3 files changed, 148 insertions(+), 2 deletions(-)
9ae3a8
9ae3a8
diff --git a/block/blkdebug.c b/block/blkdebug.c
9ae3a8
index 71f99e4..d659d38 100644
9ae3a8
--- a/block/blkdebug.c
9ae3a8
+++ b/block/blkdebug.c
9ae3a8
@@ -168,6 +168,7 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {
9ae3a8
 
9ae3a8
     [BLKDBG_REFTABLE_LOAD]                  = "reftable_load",
9ae3a8
     [BLKDBG_REFTABLE_GROW]                  = "reftable_grow",
9ae3a8
+    [BLKDBG_REFTABLE_UPDATE]                = "reftable_update",
9ae3a8
 
9ae3a8
     [BLKDBG_REFBLOCK_LOAD]                  = "refblock_load",
9ae3a8
     [BLKDBG_REFBLOCK_UPDATE]                = "refblock_update",
9ae3a8
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
9ae3a8
index 92ecc64..927bdeb 100644
9ae3a8
--- a/block/qcow2-refcount.c
9ae3a8
+++ b/block/qcow2-refcount.c
9ae3a8
@@ -1320,6 +1320,121 @@ fail:
9ae3a8
 }
9ae3a8
 
9ae3a8
 /*
9ae3a8
+ * Writes one sector of the refcount table to the disk
9ae3a8
+ */
9ae3a8
+#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t))
9ae3a8
+static int write_reftable_entry(BlockDriverState *bs, int rt_index)
9ae3a8
+{
9ae3a8
+    BDRVQcowState *s = bs->opaque;
9ae3a8
+    uint64_t buf[RT_ENTRIES_PER_SECTOR];
9ae3a8
+    int rt_start_index;
9ae3a8
+    int i, ret;
9ae3a8
+
9ae3a8
+    rt_start_index = rt_index & ~(RT_ENTRIES_PER_SECTOR - 1);
9ae3a8
+    for (i = 0; i < RT_ENTRIES_PER_SECTOR; i++) {
9ae3a8
+        buf[i] = cpu_to_be64(s->refcount_table[rt_start_index + i]);
9ae3a8
+    }
9ae3a8
+
9ae3a8
+    ret = qcow2_pre_write_overlap_check(bs,
9ae3a8
+            QCOW2_OL_DEFAULT & ~QCOW2_OL_REFCOUNT_TABLE,
9ae3a8
+            s->refcount_table_offset + rt_start_index * sizeof(uint64_t),
9ae3a8
+            sizeof(buf));
9ae3a8
+    if (ret < 0) {
9ae3a8
+        return ret;
9ae3a8
+    }
9ae3a8
+
9ae3a8
+    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
9ae3a8
+    ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
9ae3a8
+            rt_start_index * sizeof(uint64_t), buf, sizeof(buf));
9ae3a8
+    if (ret < 0) {
9ae3a8
+        return ret;
9ae3a8
+    }
9ae3a8
+
9ae3a8
+    return 0;
9ae3a8
+}
9ae3a8
+
9ae3a8
+/*
9ae3a8
+ * Allocates a new cluster for the given refcount block (represented by its
9ae3a8
+ * offset in the image file) and copies the current content there. This function
9ae3a8
+ * does _not_ decrement the reference count for the currently occupied cluster.
9ae3a8
+ *
9ae3a8
+ * This function prints an informative message to stderr on error (and returns
9ae3a8
+ * -errno); on success, 0 is returned.
9ae3a8
+ */
9ae3a8
+static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
9ae3a8
+                                      uint64_t offset)
9ae3a8
+{
9ae3a8
+    BDRVQcowState *s = bs->opaque;
9ae3a8
+    int64_t new_offset = 0;
9ae3a8
+    void *refcount_block = NULL;
9ae3a8
+    int ret;
9ae3a8
+
9ae3a8
+    /* allocate new refcount block */
9ae3a8
+    new_offset = qcow2_alloc_clusters(bs, s->cluster_size);
9ae3a8
+    if (new_offset < 0) {
9ae3a8
+        fprintf(stderr, "Could not allocate new cluster: %s\n",
9ae3a8
+                strerror(-new_offset));
9ae3a8
+        ret = new_offset;
9ae3a8
+        goto fail;
9ae3a8
+    }
9ae3a8
+
9ae3a8
+    /* fetch current refcount block content */
9ae3a8
+    ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, &refcount_block);
9ae3a8
+    if (ret < 0) {
9ae3a8
+        fprintf(stderr, "Could not fetch refcount block: %s\n", strerror(-ret));
9ae3a8
+        goto fail;
9ae3a8
+    }
9ae3a8
+
9ae3a8
+    /* new block has not yet been entered into refcount table, therefore it is
9ae3a8
+     * no refcount block yet (regarding this check) */
9ae3a8
+    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, new_offset,
9ae3a8
+            s->cluster_size);
9ae3a8
+    if (ret < 0) {
9ae3a8
+        fprintf(stderr, "Could not write refcount block; metadata overlap "
9ae3a8
+                "check failed: %s\n", strerror(-ret));
9ae3a8
+        /* the image will be marked corrupt, so don't even attempt on freeing
9ae3a8
+         * the cluster */
9ae3a8
+        new_offset = 0;
9ae3a8
+        goto fail;
9ae3a8
+    }
9ae3a8
+
9ae3a8
+    /* write to new block */
9ae3a8
+    ret = bdrv_write(bs->file, new_offset / BDRV_SECTOR_SIZE, refcount_block,
9ae3a8
+            s->cluster_sectors);
9ae3a8
+    if (ret < 0) {
9ae3a8
+        fprintf(stderr, "Could not write refcount block: %s\n", strerror(-ret));
9ae3a8
+        goto fail;
9ae3a8
+    }
9ae3a8
+
9ae3a8
+    /* update refcount table */
9ae3a8
+    assert(!(new_offset & (s->cluster_size - 1)));
9ae3a8
+    s->refcount_table[reftable_index] = new_offset;
9ae3a8
+    ret = write_reftable_entry(bs, reftable_index);
9ae3a8
+    if (ret < 0) {
9ae3a8
+        fprintf(stderr, "Could not update refcount table: %s\n",
9ae3a8
+                strerror(-ret));
9ae3a8
+        goto fail;
9ae3a8
+    }
9ae3a8
+
9ae3a8
+fail:
9ae3a8
+    if (new_offset && (ret < 0)) {
9ae3a8
+        qcow2_free_clusters(bs, new_offset, s->cluster_size,
9ae3a8
+                QCOW2_DISCARD_ALWAYS);
9ae3a8
+    }
9ae3a8
+    if (refcount_block) {
9ae3a8
+        if (ret < 0) {
9ae3a8
+            qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
9ae3a8
+        } else {
9ae3a8
+            ret = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
9ae3a8
+        }
9ae3a8
+    }
9ae3a8
+    if (ret < 0) {
9ae3a8
+        return ret;
9ae3a8
+    }
9ae3a8
+    return new_offset;
9ae3a8
+}
9ae3a8
+
9ae3a8
+/*
9ae3a8
  * Checks an image for refcount consistency.
9ae3a8
  *
9ae3a8
  * Returns 0 if no errors are found, the number of errors in case the image is
9ae3a8
@@ -1395,10 +1510,39 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
9ae3a8
             inc_refcounts(bs, res, refcount_table, nb_clusters,
9ae3a8
                 offset, s->cluster_size);
9ae3a8
             if (refcount_table[cluster] != 1) {
9ae3a8
-                fprintf(stderr, "ERROR refcount block %" PRId64
9ae3a8
+                fprintf(stderr, "%s refcount block %" PRId64
9ae3a8
                     " refcount=%d\n",
9ae3a8
+                    fix & BDRV_FIX_ERRORS ? "Repairing" :
9ae3a8
+                                            "ERROR",
9ae3a8
                     i, refcount_table[cluster]);
9ae3a8
-                res->corruptions++;
9ae3a8
+
9ae3a8
+                if (fix & BDRV_FIX_ERRORS) {
9ae3a8
+                    int64_t new_offset;
9ae3a8
+
9ae3a8
+                    new_offset = realloc_refcount_block(bs, i, offset);
9ae3a8
+                    if (new_offset < 0) {
9ae3a8
+                        res->corruptions++;
9ae3a8
+                        continue;
9ae3a8
+                    }
9ae3a8
+
9ae3a8
+                    /* update refcounts */
9ae3a8
+                    if ((new_offset >> s->cluster_bits) >= nb_clusters) {
9ae3a8
+                        /* increase refcount_table size if necessary */
9ae3a8
+                        int old_nb_clusters = nb_clusters;
9ae3a8
+                        nb_clusters = (new_offset >> s->cluster_bits) + 1;
9ae3a8
+                        refcount_table = g_realloc(refcount_table,
9ae3a8
+                                nb_clusters * sizeof(uint16_t));
9ae3a8
+                        memset(&refcount_table[old_nb_clusters], 0, (nb_clusters
9ae3a8
+                                - old_nb_clusters) * sizeof(uint16_t));
9ae3a8
+                    }
9ae3a8
+                    refcount_table[cluster]--;
9ae3a8
+                    inc_refcounts(bs, res, refcount_table, nb_clusters,
9ae3a8
+                            new_offset, s->cluster_size);
9ae3a8
+
9ae3a8
+                    res->corruptions_fixed++;
9ae3a8
+                } else {
9ae3a8
+                    res->corruptions++;
9ae3a8
+                }
9ae3a8
             }
9ae3a8
         }
9ae3a8
     }
9ae3a8
diff --git a/include/block/block.h b/include/block/block.h
9ae3a8
index 03ebc47..39770a3 100644
9ae3a8
--- a/include/block/block.h
9ae3a8
+++ b/include/block/block.h
9ae3a8
@@ -447,6 +447,7 @@ typedef enum {
9ae3a8
 
9ae3a8
     BLKDBG_REFTABLE_LOAD,
9ae3a8
     BLKDBG_REFTABLE_GROW,
9ae3a8
+    BLKDBG_REFTABLE_UPDATE,
9ae3a8
 
9ae3a8
     BLKDBG_REFBLOCK_LOAD,
9ae3a8
     BLKDBG_REFBLOCK_UPDATE,
9ae3a8
-- 
9ae3a8
1.7.1
9ae3a8