218e99
From 4413b8524dfa9dc3a6a494a2cf031265d6ef16f3 Mon Sep 17 00:00:00 2001
218e99
From: Max Reitz <mreitz@redhat.com>
218e99
Date: Mon, 4 Nov 2013 22:32:00 +0100
218e99
Subject: [PATCH 07/87] qcow2: Metadata overlap checks
218e99
218e99
RH-Author: Max Reitz <mreitz@redhat.com>
218e99
Message-id: <1383604354-12743-10-git-send-email-mreitz@redhat.com>
218e99
Patchwork-id: 55309
218e99
O-Subject: [RHEL-7.0 qemu-kvm PATCH 09/43] qcow2: Metadata overlap checks
218e99
Bugzilla: 1004347
218e99
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
218e99
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
218e99
RH-Acked-by: Fam Zheng <famz@redhat.com>
218e99
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
218e99
218e99
BZ: 1004347
218e99
218e99
Two new functions are added; the first one checks a given range in the
218e99
image file for overlaps with metadata (main header, L1 tables, L2
218e99
tables, refcount table and blocks).
218e99
218e99
The second one should be used immediately before writing to the image
218e99
file as it calls the first function and, upon collision, marks the
218e99
image as corrupt and makes the BDS unusable, thereby preventing
218e99
further access.
218e99
218e99
Both functions take a bitmask argument specifying the structures which
218e99
should be checked for overlaps, making it possible to also check
218e99
metadata writes against colliding with other structures.
218e99
218e99
Signed-off-by: Max Reitz <mreitz@redhat.com>
218e99
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
218e99
(cherry picked from commit a40f1c2add4d5f58d594f810fe36cabcf32bc4b0)
218e99
218e99
Signed-off-by: Max Reitz <mreitz@redhat.com>
218e99
---
218e99
 block/qcow2-refcount.c    | 172 ++++++++++++++++++++++++++++++++++++++++++++++
218e99
 block/qcow2.h             |  39 +++++++++++
218e99
 include/monitor/monitor.h |   1 +
218e99
 monitor.c                 |   1 +
218e99
 4 files changed, 213 insertions(+)
218e99
218e99
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
218e99
---
218e99
 block/qcow2-refcount.c    |  172 +++++++++++++++++++++++++++++++++++++++++++++
218e99
 block/qcow2.h             |   39 ++++++++++
218e99
 include/monitor/monitor.h |    1 +
218e99
 monitor.c                 |    1 +
218e99
 4 files changed, 213 insertions(+), 0 deletions(-)
218e99
218e99
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
218e99
index 1244693..310efcc 100644
218e99
--- a/block/qcow2-refcount.c
218e99
+++ b/block/qcow2-refcount.c
218e99
@@ -25,6 +25,8 @@
218e99
 #include "qemu-common.h"
218e99
 #include "block/block_int.h"
218e99
 #include "block/qcow2.h"
218e99
+#include "qemu/range.h"
218e99
+#include "qapi/qmp/types.h"
218e99
 
218e99
 static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
218e99
 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
218e99
@@ -1372,3 +1374,173 @@ fail:
218e99
     return ret;
218e99
 }
218e99
 
218e99
+#define overlaps_with(ofs, sz) \
218e99
+    ranges_overlap(offset, size, ofs, sz)
218e99
+
218e99
+/*
218e99
+ * Checks if the given offset into the image file is actually free to use by
218e99
+ * looking for overlaps with important metadata sections (L1/L2 tables etc.),
218e99
+ * i.e. a sanity check without relying on the refcount tables.
218e99
+ *
218e99
+ * The chk parameter specifies exactly what checks to perform (being a bitmask
218e99
+ * of QCow2MetadataOverlap values).
218e99
+ *
218e99
+ * Returns:
218e99
+ * - 0 if writing to this offset will not affect the mentioned metadata
218e99
+ * - a positive QCow2MetadataOverlap value indicating one overlapping section
218e99
+ * - a negative value (-errno) indicating an error while performing a check,
218e99
+ *   e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2
218e99
+ */
218e99
+int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset,
218e99
+                                 int64_t size)
218e99
+{
218e99
+    BDRVQcowState *s = bs->opaque;
218e99
+    int i, j;
218e99
+
218e99
+    if (!size) {
218e99
+        return 0;
218e99
+    }
218e99
+
218e99
+    if (chk & QCOW2_OL_MAIN_HEADER) {
218e99
+        if (offset < s->cluster_size) {
218e99
+            return QCOW2_OL_MAIN_HEADER;
218e99
+        }
218e99
+    }
218e99
+
218e99
+    /* align range to test to cluster boundaries */
218e99
+    size = align_offset(offset_into_cluster(s, offset) + size, s->cluster_size);
218e99
+    offset = start_of_cluster(s, offset);
218e99
+
218e99
+    if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
218e99
+        if (overlaps_with(s->l1_table_offset, s->l1_size * sizeof(uint64_t))) {
218e99
+            return QCOW2_OL_ACTIVE_L1;
218e99
+        }
218e99
+    }
218e99
+
218e99
+    if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
218e99
+        if (overlaps_with(s->refcount_table_offset,
218e99
+            s->refcount_table_size * sizeof(uint64_t))) {
218e99
+            return QCOW2_OL_REFCOUNT_TABLE;
218e99
+        }
218e99
+    }
218e99
+
218e99
+    if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
218e99
+        if (overlaps_with(s->snapshots_offset, s->snapshots_size)) {
218e99
+            return QCOW2_OL_SNAPSHOT_TABLE;
218e99
+        }
218e99
+    }
218e99
+
218e99
+    if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
218e99
+        for (i = 0; i < s->nb_snapshots; i++) {
218e99
+            if (s->snapshots[i].l1_size &&
218e99
+                overlaps_with(s->snapshots[i].l1_table_offset,
218e99
+                s->snapshots[i].l1_size * sizeof(uint64_t))) {
218e99
+                return QCOW2_OL_INACTIVE_L1;
218e99
+            }
218e99
+        }
218e99
+    }
218e99
+
218e99
+    if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
218e99
+        for (i = 0; i < s->l1_size; i++) {
218e99
+            if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
218e99
+                overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK,
218e99
+                s->cluster_size)) {
218e99
+                return QCOW2_OL_ACTIVE_L2;
218e99
+            }
218e99
+        }
218e99
+    }
218e99
+
218e99
+    if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
218e99
+        for (i = 0; i < s->refcount_table_size; i++) {
218e99
+            if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
218e99
+                overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
218e99
+                s->cluster_size)) {
218e99
+                return QCOW2_OL_REFCOUNT_BLOCK;
218e99
+            }
218e99
+        }
218e99
+    }
218e99
+
218e99
+    if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
218e99
+        for (i = 0; i < s->nb_snapshots; i++) {
218e99
+            uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
218e99
+            uint32_t l1_sz  = s->snapshots[i].l1_size;
218e99
+            uint64_t *l1 = g_malloc(l1_sz * sizeof(uint64_t));
218e99
+            int ret;
218e99
+
218e99
+            ret = bdrv_read(bs->file, l1_ofs / BDRV_SECTOR_SIZE, (uint8_t *)l1,
218e99
+                            l1_sz * sizeof(uint64_t) / BDRV_SECTOR_SIZE);
218e99
+
218e99
+            if (ret < 0) {
218e99
+                g_free(l1);
218e99
+                return ret;
218e99
+            }
218e99
+
218e99
+            for (j = 0; j < l1_sz; j++) {
218e99
+                if ((l1[j] & L1E_OFFSET_MASK) &&
218e99
+                    overlaps_with(l1[j] & L1E_OFFSET_MASK, s->cluster_size)) {
218e99
+                    g_free(l1);
218e99
+                    return QCOW2_OL_INACTIVE_L2;
218e99
+                }
218e99
+            }
218e99
+
218e99
+            g_free(l1);
218e99
+        }
218e99
+    }
218e99
+
218e99
+    return 0;
218e99
+}
218e99
+
218e99
+static const char *metadata_ol_names[] = {
218e99
+    [QCOW2_OL_MAIN_HEADER_BITNR]    = "qcow2_header",
218e99
+    [QCOW2_OL_ACTIVE_L1_BITNR]      = "active L1 table",
218e99
+    [QCOW2_OL_ACTIVE_L2_BITNR]      = "active L2 table",
218e99
+    [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
218e99
+    [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
218e99
+    [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
218e99
+    [QCOW2_OL_INACTIVE_L1_BITNR]    = "inactive L1 table",
218e99
+    [QCOW2_OL_INACTIVE_L2_BITNR]    = "inactive L2 table",
218e99
+};
218e99
+
218e99
+/*
218e99
+ * First performs a check for metadata overlaps (through
218e99
+ * qcow2_check_metadata_overlap); if that fails with a negative value (error
218e99
+ * while performing a check), that value is returned. If an impending overlap
218e99
+ * is detected, the BDS will be made unusable, the qcow2 file marked corrupt
218e99
+ * and -EIO returned.
218e99
+ *
218e99
+ * Returns 0 if there were neither overlaps nor errors while checking for
218e99
+ * overlaps; or a negative value (-errno) on error.
218e99
+ */
218e99
+int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset,
218e99
+                                  int64_t size)
218e99
+{
218e99
+    int ret = qcow2_check_metadata_overlap(bs, chk, offset, size);
218e99
+
218e99
+    if (ret < 0) {
218e99
+        return ret;
218e99
+    } else if (ret > 0) {
218e99
+        int metadata_ol_bitnr = ffs(ret) - 1;
218e99
+        char *message;
218e99
+        QObject *data;
218e99
+
218e99
+        assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
218e99
+
218e99
+        fprintf(stderr, "qcow2: Preventing invalid write on metadata (overlaps "
218e99
+                "with %s); image marked as corrupt.\n",
218e99
+                metadata_ol_names[metadata_ol_bitnr]);
218e99
+        message = g_strdup_printf("Prevented %s overwrite",
218e99
+                metadata_ol_names[metadata_ol_bitnr]);
218e99
+        data = qobject_from_jsonf("{ 'device': %s, 'msg': %s, 'offset': %"
218e99
+                PRId64 ", 'size': %" PRId64 " }", bs->device_name, message,
218e99
+                offset, size);
218e99
+        monitor_protocol_event(QEVENT_BLOCK_IMAGE_CORRUPTED, data);
218e99
+        g_free(message);
218e99
+        qobject_decref(data);
218e99
+
218e99
+        qcow2_mark_corrupt(bs);
218e99
+        bs->drv = NULL; /* make BDS unusable */
218e99
+        return -EIO;
218e99
+    }
218e99
+
218e99
+    return 0;
218e99
+}
218e99
diff --git a/block/qcow2.h b/block/qcow2.h
218e99
index 4297487..86ddb30 100644
218e99
--- a/block/qcow2.h
218e99
+++ b/block/qcow2.h
218e99
@@ -289,6 +289,40 @@ enum {
218e99
     QCOW2_CLUSTER_ZERO
218e99
 };
218e99
 
218e99
+typedef enum QCow2MetadataOverlap {
218e99
+    QCOW2_OL_MAIN_HEADER_BITNR    = 0,
218e99
+    QCOW2_OL_ACTIVE_L1_BITNR      = 1,
218e99
+    QCOW2_OL_ACTIVE_L2_BITNR      = 2,
218e99
+    QCOW2_OL_REFCOUNT_TABLE_BITNR = 3,
218e99
+    QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4,
218e99
+    QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5,
218e99
+    QCOW2_OL_INACTIVE_L1_BITNR    = 6,
218e99
+    QCOW2_OL_INACTIVE_L2_BITNR    = 7,
218e99
+
218e99
+    QCOW2_OL_MAX_BITNR            = 8,
218e99
+
218e99
+    QCOW2_OL_NONE           = 0,
218e99
+    QCOW2_OL_MAIN_HEADER    = (1 << QCOW2_OL_MAIN_HEADER_BITNR),
218e99
+    QCOW2_OL_ACTIVE_L1      = (1 << QCOW2_OL_ACTIVE_L1_BITNR),
218e99
+    QCOW2_OL_ACTIVE_L2      = (1 << QCOW2_OL_ACTIVE_L2_BITNR),
218e99
+    QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR),
218e99
+    QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR),
218e99
+    QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR),
218e99
+    QCOW2_OL_INACTIVE_L1    = (1 << QCOW2_OL_INACTIVE_L1_BITNR),
218e99
+    /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv
218e99
+     * reads. */
218e99
+    QCOW2_OL_INACTIVE_L2    = (1 << QCOW2_OL_INACTIVE_L2_BITNR),
218e99
+} QCow2MetadataOverlap;
218e99
+
218e99
+/* Perform all overlap checks which don't require disk access */
218e99
+#define QCOW2_OL_CACHED \
218e99
+    (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_ACTIVE_L2 | \
218e99
+     QCOW2_OL_REFCOUNT_TABLE | QCOW2_OL_REFCOUNT_BLOCK | \
218e99
+     QCOW2_OL_SNAPSHOT_TABLE | QCOW2_OL_INACTIVE_L1)
218e99
+
218e99
+/* The default checks to perform */
218e99
+#define QCOW2_OL_DEFAULT QCOW2_OL_CACHED
218e99
+
218e99
 #define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
218e99
 #define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
218e99
 #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
218e99
@@ -390,6 +424,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
218e99
 
218e99
 void qcow2_process_discards(BlockDriverState *bs, int ret);
218e99
 
218e99
+int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset,
218e99
+                                 int64_t size);
218e99
+int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset,
218e99
+                                  int64_t size);
218e99
+
218e99
 /* qcow2-cluster.c functions */
218e99
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
218e99
                         bool exact_size);
218e99
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
218e99
index 1a6cfcf..07b41a6 100644
218e99
--- a/include/monitor/monitor.h
218e99
+++ b/include/monitor/monitor.h
218e99
@@ -47,6 +47,7 @@ typedef enum MonitorEvent {
218e99
     QEVENT_BALLOON_CHANGE,
218e99
     QEVENT_SPICE_MIGRATE_COMPLETED,
218e99
     QEVENT_GUEST_PANICKED,
218e99
+    QEVENT_BLOCK_IMAGE_CORRUPTED,
218e99
 
218e99
     /* Add to 'monitor_event_names' array in monitor.c when
218e99
      * defining new events here */
218e99
diff --git a/monitor.c b/monitor.c
218e99
index deb0dc8..c226acf 100644
218e99
--- a/monitor.c
218e99
+++ b/monitor.c
218e99
@@ -504,6 +504,7 @@ static const char *monitor_event_names[] = {
218e99
     [QEVENT_BALLOON_CHANGE] = "BALLOON_CHANGE",
218e99
     [QEVENT_SPICE_MIGRATE_COMPLETED] = "SPICE_MIGRATE_COMPLETED",
218e99
     [QEVENT_GUEST_PANICKED] = "GUEST_PANICKED",
218e99
+    [QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED",
218e99
 };
218e99
 QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)
218e99
 
218e99
-- 
218e99
1.7.1
218e99