9ae3a8
From 6745478978aed56c72daed821e912a9f9644932a Mon Sep 17 00:00:00 2001
9ae3a8
From: Max Reitz <mreitz@redhat.com>
9ae3a8
Date: Sat, 13 Jun 2015 16:22:21 +0200
9ae3a8
Subject: [PATCH 27/42] qcow2: Do not perform potentially damaging repairs
9ae3a8
9ae3a8
Message-id: <1434212556-3927-28-git-send-email-mreitz@redhat.com>
9ae3a8
Patchwork-id: 66046
9ae3a8
O-Subject: [RHEL-7.2 qemu-kvm PATCH 27/42] qcow2: Do not perform potentially damaging repairs
9ae3a8
Bugzilla: 1129893
9ae3a8
RH-Acked-by: Jeffrey Cody <jcody@redhat.com>
9ae3a8
RH-Acked-by: Fam Zheng <famz@redhat.com>
9ae3a8
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
9ae3a8
9ae3a8
BZ: 1129893
9ae3a8
9ae3a8
If a referenced cluster has a refcount of 0, increasing its refcount may
9ae3a8
result in clusters being allocated for the refcount structures. This may
9ae3a8
overwrite the referenced cluster, therefore we cannot simply increase
9ae3a8
the refcount then.
9ae3a8
9ae3a8
In such cases, we can either try to replicate all the refcount
9ae3a8
operations solely for the check operation, basing the allocations on the
9ae3a8
in-memory refcount table; or we can simply rebuild the whole refcount
9ae3a8
structure based on the in-memory refcount table. Since the latter will
9ae3a8
be much easier, do that.
9ae3a8
9ae3a8
To prepare for this, introduce a "rebuild" boolean which should be set
9ae3a8
to true whenever a fix is rather dangerous or too complicated using the
9ae3a8
current refcount structures. Another example for this is refcount blocks
9ae3a8
being referenced more than once.
9ae3a8
9ae3a8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9ae3a8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9ae3a8
(cherry picked from commit f307b2558f61e068ce514f2dde2cad74c62036d6)
9ae3a8
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
9ae3a8
9ae3a8
Conflicts:
9ae3a8
	block/qcow2-refcount.c
9ae3a8
9ae3a8
Some conflicts in the code that is being removed.
9ae3a8
9ae3a8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9ae3a8
---
9ae3a8
 block/qcow2-refcount.c | 186 +++++++------------------------------------------
9ae3a8
 1 file changed, 27 insertions(+), 159 deletions(-)
9ae3a8
9ae3a8
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
9ae3a8
index 8ce0447..3d66e7c 100644
9ae3a8
--- a/block/qcow2-refcount.c
9ae3a8
+++ b/block/qcow2-refcount.c
9ae3a8
@@ -1424,125 +1424,12 @@ fail:
9ae3a8
 }
9ae3a8
 
9ae3a8
 /*
9ae3a8
- * Writes one sector of the refcount table to the disk
9ae3a8
- */
9ae3a8
-#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t))
9ae3a8
-static int write_reftable_entry(BlockDriverState *bs, int rt_index)
9ae3a8
-{
9ae3a8
-    BDRVQcowState *s = bs->opaque;
9ae3a8
-    uint64_t buf[RT_ENTRIES_PER_SECTOR];
9ae3a8
-    int rt_start_index;
9ae3a8
-    int i, ret;
9ae3a8
-
9ae3a8
-    rt_start_index = rt_index & ~(RT_ENTRIES_PER_SECTOR - 1);
9ae3a8
-    for (i = 0; i < RT_ENTRIES_PER_SECTOR; i++) {
9ae3a8
-        buf[i] = cpu_to_be64(s->refcount_table[rt_start_index + i]);
9ae3a8
-    }
9ae3a8
-
9ae3a8
-    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_TABLE,
9ae3a8
-            s->refcount_table_offset + rt_start_index * sizeof(uint64_t),
9ae3a8
-            sizeof(buf));
9ae3a8
-    if (ret < 0) {
9ae3a8
-        return ret;
9ae3a8
-    }
9ae3a8
-
9ae3a8
-    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
9ae3a8
-    ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
9ae3a8
-            rt_start_index * sizeof(uint64_t), buf, sizeof(buf));
9ae3a8
-    if (ret < 0) {
9ae3a8
-        return ret;
9ae3a8
-    }
9ae3a8
-
9ae3a8
-    return 0;
9ae3a8
-}
9ae3a8
-
9ae3a8
-/*
9ae3a8
- * Allocates a new cluster for the given refcount block (represented by its
9ae3a8
- * offset in the image file) and copies the current content there. This function
9ae3a8
- * does _not_ decrement the reference count for the currently occupied cluster.
9ae3a8
- *
9ae3a8
- * This function prints an informative message to stderr on error (and returns
9ae3a8
- * -errno); on success, 0 is returned.
9ae3a8
- */
9ae3a8
-static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
9ae3a8
-                                      uint64_t offset)
9ae3a8
-{
9ae3a8
-    BDRVQcowState *s = bs->opaque;
9ae3a8
-    int64_t new_offset = 0;
9ae3a8
-    void *refcount_block = NULL;
9ae3a8
-    int ret;
9ae3a8
-
9ae3a8
-    /* allocate new refcount block */
9ae3a8
-    new_offset = qcow2_alloc_clusters(bs, s->cluster_size);
9ae3a8
-    if (new_offset < 0) {
9ae3a8
-        fprintf(stderr, "Could not allocate new cluster: %s\n",
9ae3a8
-                strerror(-new_offset));
9ae3a8
-        ret = new_offset;
9ae3a8
-        goto fail;
9ae3a8
-    }
9ae3a8
-
9ae3a8
-    /* fetch current refcount block content */
9ae3a8
-    ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, &refcount_block);
9ae3a8
-    if (ret < 0) {
9ae3a8
-        fprintf(stderr, "Could not fetch refcount block: %s\n", strerror(-ret));
9ae3a8
-        goto fail;
9ae3a8
-    }
9ae3a8
-
9ae3a8
-    /* new block has not yet been entered into refcount table, therefore it is
9ae3a8
-     * no refcount block yet (regarding this check) */
9ae3a8
-    ret = qcow2_pre_write_overlap_check(bs, 0, new_offset, s->cluster_size);
9ae3a8
-    if (ret < 0) {
9ae3a8
-        fprintf(stderr, "Could not write refcount block; metadata overlap "
9ae3a8
-                "check failed: %s\n", strerror(-ret));
9ae3a8
-        /* the image will be marked corrupt, so don't even attempt on freeing
9ae3a8
-         * the cluster */
9ae3a8
-        new_offset = 0;
9ae3a8
-        goto fail;
9ae3a8
-    }
9ae3a8
-
9ae3a8
-    /* write to new block */
9ae3a8
-    ret = bdrv_write(bs->file, new_offset / BDRV_SECTOR_SIZE, refcount_block,
9ae3a8
-            s->cluster_sectors);
9ae3a8
-    if (ret < 0) {
9ae3a8
-        fprintf(stderr, "Could not write refcount block: %s\n", strerror(-ret));
9ae3a8
-        goto fail;
9ae3a8
-    }
9ae3a8
-
9ae3a8
-    /* update refcount table */
9ae3a8
-    assert(!(new_offset & (s->cluster_size - 1)));
9ae3a8
-    s->refcount_table[reftable_index] = new_offset;
9ae3a8
-    ret = write_reftable_entry(bs, reftable_index);
9ae3a8
-    if (ret < 0) {
9ae3a8
-        fprintf(stderr, "Could not update refcount table: %s\n",
9ae3a8
-                strerror(-ret));
9ae3a8
-        goto fail;
9ae3a8
-    }
9ae3a8
-
9ae3a8
-fail:
9ae3a8
-    if (new_offset && (ret < 0)) {
9ae3a8
-        qcow2_free_clusters(bs, new_offset, s->cluster_size,
9ae3a8
-                QCOW2_DISCARD_ALWAYS);
9ae3a8
-    }
9ae3a8
-    if (refcount_block) {
9ae3a8
-        if (ret < 0) {
9ae3a8
-            qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
9ae3a8
-        } else {
9ae3a8
-            ret = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
9ae3a8
-        }
9ae3a8
-    }
9ae3a8
-    if (ret < 0) {
9ae3a8
-        return ret;
9ae3a8
-    }
9ae3a8
-    return new_offset;
9ae3a8
-}
9ae3a8
-
9ae3a8
-/*
9ae3a8
  * Checks consistency of refblocks and accounts for each refblock in
9ae3a8
  * *refcount_table.
9ae3a8
  */
9ae3a8
 static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
9ae3a8
-                           BdrvCheckMode fix, uint16_t **refcount_table,
9ae3a8
-                           int64_t *nb_clusters)
9ae3a8
+                           BdrvCheckMode fix, bool *rebuild,
9ae3a8
+                           uint16_t **refcount_table, int64_t *nb_clusters)
9ae3a8
 {
9ae3a8
     BDRVQcowState *s = bs->opaque;
9ae3a8
     int64_t i, size;
9ae3a8
@@ -1558,6 +1445,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
9ae3a8
             fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
9ae3a8
                 "cluster aligned; refcount table entry corrupted\n", i);
9ae3a8
             res->corruptions++;
9ae3a8
+            *rebuild = true;
9ae3a8
             continue;
9ae3a8
         }
9ae3a8
 
9ae3a8
@@ -1619,6 +1507,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
9ae3a8
 
9ae3a8
 resize_fail:
9ae3a8
                 res->corruptions++;
9ae3a8
+                *rebuild = true;
9ae3a8
                 fprintf(stderr, "ERROR could not resize image: %s\n",
9ae3a8
                         strerror(-ret));
9ae3a8
             } else {
9ae3a8
@@ -1634,43 +1523,10 @@ resize_fail:
9ae3a8
                 return ret;
9ae3a8
             }
9ae3a8
             if ((*refcount_table)[cluster] != 1) {
9ae3a8
-                fprintf(stderr, "%s refcount block %" PRId64
9ae3a8
-                    " refcount=%d\n",
9ae3a8
-                    fix & BDRV_FIX_ERRORS ? "Repairing" :
9ae3a8
-                                            "ERROR",
9ae3a8
-                    i, (*refcount_table)[cluster]);
9ae3a8
-
9ae3a8
-                if (fix & BDRV_FIX_ERRORS) {
9ae3a8
-                    int64_t new_offset;
9ae3a8
-
9ae3a8
-                    new_offset = realloc_refcount_block(bs, i, offset);
9ae3a8
-                    if (new_offset < 0) {
9ae3a8
-                        res->corruptions++;
9ae3a8
-                        continue;
9ae3a8
-                    }
9ae3a8
-
9ae3a8
-                    /* update refcounts */
9ae3a8
-                    if ((new_offset >> s->cluster_bits) >= *nb_clusters) {
9ae3a8
-                        /* increase refcount_table size if necessary */
9ae3a8
-                        int old_nb_clusters = *nb_clusters;
9ae3a8
-                        *nb_clusters = (new_offset >> s->cluster_bits) + 1;
9ae3a8
-                        *refcount_table = g_renew(uint16_t, *refcount_table,
9ae3a8
-                                                  *nb_clusters);
9ae3a8
-                        memset(&(*refcount_table)[old_nb_clusters], 0,
9ae3a8
-                               (*nb_clusters - old_nb_clusters) *
9ae3a8
-                               sizeof(**refcount_table));
9ae3a8
-                    }
9ae3a8
-                    (*refcount_table)[cluster]--;
9ae3a8
-                    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
9ae3a8
-                                        new_offset, s->cluster_size);
9ae3a8
-                    if (ret < 0) {
9ae3a8
-                        return ret;
9ae3a8
-                    }
9ae3a8
-
9ae3a8
-                    res->corruptions_fixed++;
9ae3a8
-                } else {
9ae3a8
-                    res->corruptions++;
9ae3a8
-                }
9ae3a8
+                fprintf(stderr, "ERROR refcount block %" PRId64
9ae3a8
+                        " refcount=%d\n", i, (*refcount_table)[cluster]);
9ae3a8
+                res->corruptions++;
9ae3a8
+                *rebuild = true;
9ae3a8
             }
9ae3a8
         }
9ae3a8
     }
9ae3a8
@@ -1682,8 +1538,8 @@ resize_fail:
9ae3a8
  * Calculates an in-memory refcount table.
9ae3a8
  */
9ae3a8
 static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
9ae3a8
-                               BdrvCheckMode fix, uint16_t **refcount_table,
9ae3a8
-                               int64_t *nb_clusters)
9ae3a8
+                               BdrvCheckMode fix, bool *rebuild,
9ae3a8
+                               uint16_t **refcount_table, int64_t *nb_clusters)
9ae3a8
 {
9ae3a8
     BDRVQcowState *s = bs->opaque;
9ae3a8
     int64_t i;
9ae3a8
@@ -1735,7 +1591,7 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
9ae3a8
         return ret;
9ae3a8
     }
9ae3a8
 
9ae3a8
-    return check_refblocks(bs, res, fix, refcount_table, nb_clusters);
9ae3a8
+    return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters);
9ae3a8
 }
9ae3a8
 
9ae3a8
 /*
9ae3a8
@@ -1743,7 +1599,8 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
9ae3a8
  * refcount as reported by the refcount structures on-disk.
9ae3a8
  */
9ae3a8
 static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
9ae3a8
-                              BdrvCheckMode fix, int64_t *highest_cluster,
9ae3a8
+                              BdrvCheckMode fix, bool *rebuild,
9ae3a8
+                              int64_t *highest_cluster,
9ae3a8
                               uint16_t *refcount_table, int64_t nb_clusters)
9ae3a8
 {
9ae3a8
     BDRVQcowState *s = bs->opaque;
9ae3a8
@@ -1768,7 +1625,9 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
9ae3a8
         if (refcount1 != refcount2) {
9ae3a8
             /* Check if we're allowed to fix the mismatch */
9ae3a8
             int *num_fixed = NULL;
9ae3a8
-            if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
9ae3a8
+            if (refcount1 == 0) {
9ae3a8
+                *rebuild = true;
9ae3a8
+            } else if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
9ae3a8
                 num_fixed = &res->leaks_fixed;
9ae3a8
             } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
9ae3a8
                 num_fixed = &res->corruptions_fixed;
9ae3a8
@@ -1812,6 +1671,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
9ae3a8
     BDRVQcowState *s = bs->opaque;
9ae3a8
     int64_t size, highest_cluster, nb_clusters;
9ae3a8
     uint16_t *refcount_table = NULL;
9ae3a8
+    bool rebuild = false;
9ae3a8
     int ret;
9ae3a8
 
9ae3a8
     size = bdrv_getlength(bs->file);
9ae3a8
@@ -1829,14 +1689,22 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
9ae3a8
     res->bfi.total_clusters =
9ae3a8
         size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
9ae3a8
 
9ae3a8
-    ret = calculate_refcounts(bs, res, fix, &refcount_table, &nb_clusters);
9ae3a8
+    ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table,
9ae3a8
+                              &nb_clusters);
9ae3a8
     if (ret < 0) {
9ae3a8
         goto fail;
9ae3a8
     }
9ae3a8
 
9ae3a8
-    compare_refcounts(bs, res, fix, &highest_cluster, refcount_table,
9ae3a8
+    compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, refcount_table,
9ae3a8
                       nb_clusters);
9ae3a8
 
9ae3a8
+    if (rebuild) {
9ae3a8
+        fprintf(stderr, "ERROR need to rebuild refcount structures\n");
9ae3a8
+        res->check_errors++;
9ae3a8
+        /* Just carry on, the rest does not rely on the on-disk refcount
9ae3a8
+         * structures */
9ae3a8
+    }
9ae3a8
+
9ae3a8
     /* check OFLAG_COPIED */
9ae3a8
     ret = check_oflag_copied(bs, res, fix);
9ae3a8
     if (ret < 0) {
9ae3a8
-- 
9ae3a8
1.8.3.1
9ae3a8