|
|
218e99 |
From 4413b8524dfa9dc3a6a494a2cf031265d6ef16f3 Mon Sep 17 00:00:00 2001
|
|
|
218e99 |
From: Max Reitz <mreitz@redhat.com>
|
|
|
218e99 |
Date: Mon, 4 Nov 2013 22:32:00 +0100
|
|
|
218e99 |
Subject: [PATCH 07/87] qcow2: Metadata overlap checks
|
|
|
218e99 |
|
|
|
218e99 |
RH-Author: Max Reitz <mreitz@redhat.com>
|
|
|
218e99 |
Message-id: <1383604354-12743-10-git-send-email-mreitz@redhat.com>
|
|
|
218e99 |
Patchwork-id: 55309
|
|
|
218e99 |
O-Subject: [RHEL-7.0 qemu-kvm PATCH 09/43] qcow2: Metadata overlap checks
|
|
|
218e99 |
Bugzilla: 1004347
|
|
|
218e99 |
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
218e99 |
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
|
|
|
218e99 |
RH-Acked-by: Fam Zheng <famz@redhat.com>
|
|
|
218e99 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
218e99 |
|
|
|
218e99 |
BZ: 1004347
|
|
|
218e99 |
|
|
|
218e99 |
Two new functions are added; the first one checks a given range in the
|
|
|
218e99 |
image file for overlaps with metadata (main header, L1 tables, L2
|
|
|
218e99 |
tables, refcount table and blocks).
|
|
|
218e99 |
|
|
|
218e99 |
The second one should be used immediately before writing to the image
|
|
|
218e99 |
file as it calls the first function and, upon collision, marks the
|
|
|
218e99 |
image as corrupt and makes the BDS unusable, thereby preventing
|
|
|
218e99 |
further access.
|
|
|
218e99 |
|
|
|
218e99 |
Both functions take a bitmask argument specifying the structures which
|
|
|
218e99 |
should be checked for overlaps, making it possible to also check
|
|
|
218e99 |
metadata writes against colliding with other structures.
|
|
|
218e99 |
|
|
|
218e99 |
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
|
|
218e99 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
218e99 |
(cherry picked from commit a40f1c2add4d5f58d594f810fe36cabcf32bc4b0)
|
|
|
218e99 |
|
|
|
218e99 |
Signed-off-by: Max Reitz <mreitz@redhat.com>
|
|
|
218e99 |
---
|
|
|
218e99 |
block/qcow2-refcount.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
218e99 |
block/qcow2.h | 39 +++++++++++
|
|
|
218e99 |
include/monitor/monitor.h | 1 +
|
|
|
218e99 |
monitor.c | 1 +
|
|
|
218e99 |
4 files changed, 213 insertions(+)
|
|
|
218e99 |
|
|
|
218e99 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
218e99 |
---
|
|
|
218e99 |
block/qcow2-refcount.c | 172 +++++++++++++++++++++++++++++++++++++++++++++
|
|
|
218e99 |
block/qcow2.h | 39 ++++++++++
|
|
|
218e99 |
include/monitor/monitor.h | 1 +
|
|
|
218e99 |
monitor.c | 1 +
|
|
|
218e99 |
4 files changed, 213 insertions(+), 0 deletions(-)
|
|
|
218e99 |
|
|
|
218e99 |
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
|
|
|
218e99 |
index 1244693..310efcc 100644
|
|
|
218e99 |
--- a/block/qcow2-refcount.c
|
|
|
218e99 |
+++ b/block/qcow2-refcount.c
|
|
|
218e99 |
@@ -25,6 +25,8 @@
|
|
|
218e99 |
#include "qemu-common.h"
|
|
|
218e99 |
#include "block/block_int.h"
|
|
|
218e99 |
#include "block/qcow2.h"
|
|
|
218e99 |
+#include "qemu/range.h"
|
|
|
218e99 |
+#include "qapi/qmp/types.h"
|
|
|
218e99 |
|
|
|
218e99 |
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
|
|
|
218e99 |
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
|
|
|
218e99 |
@@ -1372,3 +1374,173 @@ fail:
|
|
|
218e99 |
return ret;
|
|
|
218e99 |
}
|
|
|
218e99 |
|
|
|
218e99 |
+#define overlaps_with(ofs, sz) \
|
|
|
218e99 |
+ ranges_overlap(offset, size, ofs, sz)
|
|
|
218e99 |
+
|
|
|
218e99 |
+/*
|
|
|
218e99 |
+ * Checks if the given offset into the image file is actually free to use by
|
|
|
218e99 |
+ * looking for overlaps with important metadata sections (L1/L2 tables etc.),
|
|
|
218e99 |
+ * i.e. a sanity check without relying on the refcount tables.
|
|
|
218e99 |
+ *
|
|
|
218e99 |
+ * The chk parameter specifies exactly what checks to perform (being a bitmask
|
|
|
218e99 |
+ * of QCow2MetadataOverlap values).
|
|
|
218e99 |
+ *
|
|
|
218e99 |
+ * Returns:
|
|
|
218e99 |
+ * - 0 if writing to this offset will not affect the mentioned metadata
|
|
|
218e99 |
+ * - a positive QCow2MetadataOverlap value indicating one overlapping section
|
|
|
218e99 |
+ * - a negative value (-errno) indicating an error while performing a check,
|
|
|
218e99 |
+ * e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2
|
|
|
218e99 |
+ */
|
|
|
218e99 |
+int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset,
|
|
|
218e99 |
+ int64_t size)
|
|
|
218e99 |
+{
|
|
|
218e99 |
+ BDRVQcowState *s = bs->opaque;
|
|
|
218e99 |
+ int i, j;
|
|
|
218e99 |
+
|
|
|
218e99 |
+ if (!size) {
|
|
|
218e99 |
+ return 0;
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ if (chk & QCOW2_OL_MAIN_HEADER) {
|
|
|
218e99 |
+ if (offset < s->cluster_size) {
|
|
|
218e99 |
+ return QCOW2_OL_MAIN_HEADER;
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ /* align range to test to cluster boundaries */
|
|
|
218e99 |
+ size = align_offset(offset_into_cluster(s, offset) + size, s->cluster_size);
|
|
|
218e99 |
+ offset = start_of_cluster(s, offset);
|
|
|
218e99 |
+
|
|
|
218e99 |
+ if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
|
|
|
218e99 |
+ if (overlaps_with(s->l1_table_offset, s->l1_size * sizeof(uint64_t))) {
|
|
|
218e99 |
+ return QCOW2_OL_ACTIVE_L1;
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
|
|
|
218e99 |
+ if (overlaps_with(s->refcount_table_offset,
|
|
|
218e99 |
+ s->refcount_table_size * sizeof(uint64_t))) {
|
|
|
218e99 |
+ return QCOW2_OL_REFCOUNT_TABLE;
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
|
|
|
218e99 |
+ if (overlaps_with(s->snapshots_offset, s->snapshots_size)) {
|
|
|
218e99 |
+ return QCOW2_OL_SNAPSHOT_TABLE;
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
|
|
|
218e99 |
+ for (i = 0; i < s->nb_snapshots; i++) {
|
|
|
218e99 |
+ if (s->snapshots[i].l1_size &&
|
|
|
218e99 |
+ overlaps_with(s->snapshots[i].l1_table_offset,
|
|
|
218e99 |
+ s->snapshots[i].l1_size * sizeof(uint64_t))) {
|
|
|
218e99 |
+ return QCOW2_OL_INACTIVE_L1;
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
|
|
|
218e99 |
+ for (i = 0; i < s->l1_size; i++) {
|
|
|
218e99 |
+ if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
|
|
|
218e99 |
+ overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK,
|
|
|
218e99 |
+ s->cluster_size)) {
|
|
|
218e99 |
+ return QCOW2_OL_ACTIVE_L2;
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
|
|
|
218e99 |
+ for (i = 0; i < s->refcount_table_size; i++) {
|
|
|
218e99 |
+ if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
|
|
|
218e99 |
+ overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
|
|
|
218e99 |
+ s->cluster_size)) {
|
|
|
218e99 |
+ return QCOW2_OL_REFCOUNT_BLOCK;
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
|
|
|
218e99 |
+ for (i = 0; i < s->nb_snapshots; i++) {
|
|
|
218e99 |
+ uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
|
|
|
218e99 |
+ uint32_t l1_sz = s->snapshots[i].l1_size;
|
|
|
218e99 |
+ uint64_t *l1 = g_malloc(l1_sz * sizeof(uint64_t));
|
|
|
218e99 |
+ int ret;
|
|
|
218e99 |
+
|
|
|
218e99 |
+ ret = bdrv_read(bs->file, l1_ofs / BDRV_SECTOR_SIZE, (uint8_t *)l1,
|
|
|
218e99 |
+ l1_sz * sizeof(uint64_t) / BDRV_SECTOR_SIZE);
|
|
|
218e99 |
+
|
|
|
218e99 |
+ if (ret < 0) {
|
|
|
218e99 |
+ g_free(l1);
|
|
|
218e99 |
+ return ret;
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ for (j = 0; j < l1_sz; j++) {
|
|
|
218e99 |
+ if ((l1[j] & L1E_OFFSET_MASK) &&
|
|
|
218e99 |
+ overlaps_with(l1[j] & L1E_OFFSET_MASK, s->cluster_size)) {
|
|
|
218e99 |
+ g_free(l1);
|
|
|
218e99 |
+ return QCOW2_OL_INACTIVE_L2;
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ g_free(l1);
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ return 0;
|
|
|
218e99 |
+}
|
|
|
218e99 |
+
|
|
|
218e99 |
+static const char *metadata_ol_names[] = {
|
|
|
218e99 |
+ [QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header",
|
|
|
218e99 |
+ [QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table",
|
|
|
218e99 |
+ [QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table",
|
|
|
218e99 |
+ [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
|
|
|
218e99 |
+ [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
|
|
|
218e99 |
+ [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
|
|
|
218e99 |
+ [QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table",
|
|
|
218e99 |
+ [QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table",
|
|
|
218e99 |
+};
|
|
|
218e99 |
+
|
|
|
218e99 |
+/*
|
|
|
218e99 |
+ * First performs a check for metadata overlaps (through
|
|
|
218e99 |
+ * qcow2_check_metadata_overlap); if that fails with a negative value (error
|
|
|
218e99 |
+ * while performing a check), that value is returned. If an impending overlap
|
|
|
218e99 |
+ * is detected, the BDS will be made unusable, the qcow2 file marked corrupt
|
|
|
218e99 |
+ * and -EIO returned.
|
|
|
218e99 |
+ *
|
|
|
218e99 |
+ * Returns 0 if there were neither overlaps nor errors while checking for
|
|
|
218e99 |
+ * overlaps; or a negative value (-errno) on error.
|
|
|
218e99 |
+ */
|
|
|
218e99 |
+int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset,
|
|
|
218e99 |
+ int64_t size)
|
|
|
218e99 |
+{
|
|
|
218e99 |
+ int ret = qcow2_check_metadata_overlap(bs, chk, offset, size);
|
|
|
218e99 |
+
|
|
|
218e99 |
+ if (ret < 0) {
|
|
|
218e99 |
+ return ret;
|
|
|
218e99 |
+ } else if (ret > 0) {
|
|
|
218e99 |
+ int metadata_ol_bitnr = ffs(ret) - 1;
|
|
|
218e99 |
+ char *message;
|
|
|
218e99 |
+ QObject *data;
|
|
|
218e99 |
+
|
|
|
218e99 |
+ assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
|
|
|
218e99 |
+
|
|
|
218e99 |
+ fprintf(stderr, "qcow2: Preventing invalid write on metadata (overlaps "
|
|
|
218e99 |
+ "with %s); image marked as corrupt.\n",
|
|
|
218e99 |
+ metadata_ol_names[metadata_ol_bitnr]);
|
|
|
218e99 |
+ message = g_strdup_printf("Prevented %s overwrite",
|
|
|
218e99 |
+ metadata_ol_names[metadata_ol_bitnr]);
|
|
|
218e99 |
+ data = qobject_from_jsonf("{ 'device': %s, 'msg': %s, 'offset': %"
|
|
|
218e99 |
+ PRId64 ", 'size': %" PRId64 " }", bs->device_name, message,
|
|
|
218e99 |
+ offset, size);
|
|
|
218e99 |
+ monitor_protocol_event(QEVENT_BLOCK_IMAGE_CORRUPTED, data);
|
|
|
218e99 |
+ g_free(message);
|
|
|
218e99 |
+ qobject_decref(data);
|
|
|
218e99 |
+
|
|
|
218e99 |
+ qcow2_mark_corrupt(bs);
|
|
|
218e99 |
+ bs->drv = NULL; /* make BDS unusable */
|
|
|
218e99 |
+ return -EIO;
|
|
|
218e99 |
+ }
|
|
|
218e99 |
+
|
|
|
218e99 |
+ return 0;
|
|
|
218e99 |
+}
|
|
|
218e99 |
diff --git a/block/qcow2.h b/block/qcow2.h
|
|
|
218e99 |
index 4297487..86ddb30 100644
|
|
|
218e99 |
--- a/block/qcow2.h
|
|
|
218e99 |
+++ b/block/qcow2.h
|
|
|
218e99 |
@@ -289,6 +289,40 @@ enum {
|
|
|
218e99 |
QCOW2_CLUSTER_ZERO
|
|
|
218e99 |
};
|
|
|
218e99 |
|
|
|
218e99 |
+typedef enum QCow2MetadataOverlap {
|
|
|
218e99 |
+ QCOW2_OL_MAIN_HEADER_BITNR = 0,
|
|
|
218e99 |
+ QCOW2_OL_ACTIVE_L1_BITNR = 1,
|
|
|
218e99 |
+ QCOW2_OL_ACTIVE_L2_BITNR = 2,
|
|
|
218e99 |
+ QCOW2_OL_REFCOUNT_TABLE_BITNR = 3,
|
|
|
218e99 |
+ QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4,
|
|
|
218e99 |
+ QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5,
|
|
|
218e99 |
+ QCOW2_OL_INACTIVE_L1_BITNR = 6,
|
|
|
218e99 |
+ QCOW2_OL_INACTIVE_L2_BITNR = 7,
|
|
|
218e99 |
+
|
|
|
218e99 |
+ QCOW2_OL_MAX_BITNR = 8,
|
|
|
218e99 |
+
|
|
|
218e99 |
+ QCOW2_OL_NONE = 0,
|
|
|
218e99 |
+ QCOW2_OL_MAIN_HEADER = (1 << QCOW2_OL_MAIN_HEADER_BITNR),
|
|
|
218e99 |
+ QCOW2_OL_ACTIVE_L1 = (1 << QCOW2_OL_ACTIVE_L1_BITNR),
|
|
|
218e99 |
+ QCOW2_OL_ACTIVE_L2 = (1 << QCOW2_OL_ACTIVE_L2_BITNR),
|
|
|
218e99 |
+ QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR),
|
|
|
218e99 |
+ QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR),
|
|
|
218e99 |
+ QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR),
|
|
|
218e99 |
+ QCOW2_OL_INACTIVE_L1 = (1 << QCOW2_OL_INACTIVE_L1_BITNR),
|
|
|
218e99 |
+ /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv
|
|
|
218e99 |
+ * reads. */
|
|
|
218e99 |
+ QCOW2_OL_INACTIVE_L2 = (1 << QCOW2_OL_INACTIVE_L2_BITNR),
|
|
|
218e99 |
+} QCow2MetadataOverlap;
|
|
|
218e99 |
+
|
|
|
218e99 |
+/* Perform all overlap checks which don't require disk access */
|
|
|
218e99 |
+#define QCOW2_OL_CACHED \
|
|
|
218e99 |
+ (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_ACTIVE_L2 | \
|
|
|
218e99 |
+ QCOW2_OL_REFCOUNT_TABLE | QCOW2_OL_REFCOUNT_BLOCK | \
|
|
|
218e99 |
+ QCOW2_OL_SNAPSHOT_TABLE | QCOW2_OL_INACTIVE_L1)
|
|
|
218e99 |
+
|
|
|
218e99 |
+/* The default checks to perform */
|
|
|
218e99 |
+#define QCOW2_OL_DEFAULT QCOW2_OL_CACHED
|
|
|
218e99 |
+
|
|
|
218e99 |
#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
|
|
|
218e99 |
#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
|
|
|
218e99 |
#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
|
|
|
218e99 |
@@ -390,6 +424,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
|
|
|
218e99 |
|
|
|
218e99 |
void qcow2_process_discards(BlockDriverState *bs, int ret);
|
|
|
218e99 |
|
|
|
218e99 |
+int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset,
|
|
|
218e99 |
+ int64_t size);
|
|
|
218e99 |
+int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset,
|
|
|
218e99 |
+ int64_t size);
|
|
|
218e99 |
+
|
|
|
218e99 |
/* qcow2-cluster.c functions */
|
|
|
218e99 |
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
|
|
|
218e99 |
bool exact_size);
|
|
|
218e99 |
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
|
|
|
218e99 |
index 1a6cfcf..07b41a6 100644
|
|
|
218e99 |
--- a/include/monitor/monitor.h
|
|
|
218e99 |
+++ b/include/monitor/monitor.h
|
|
|
218e99 |
@@ -47,6 +47,7 @@ typedef enum MonitorEvent {
|
|
|
218e99 |
QEVENT_BALLOON_CHANGE,
|
|
|
218e99 |
QEVENT_SPICE_MIGRATE_COMPLETED,
|
|
|
218e99 |
QEVENT_GUEST_PANICKED,
|
|
|
218e99 |
+ QEVENT_BLOCK_IMAGE_CORRUPTED,
|
|
|
218e99 |
|
|
|
218e99 |
/* Add to 'monitor_event_names' array in monitor.c when
|
|
|
218e99 |
* defining new events here */
|
|
|
218e99 |
diff --git a/monitor.c b/monitor.c
|
|
|
218e99 |
index deb0dc8..c226acf 100644
|
|
|
218e99 |
--- a/monitor.c
|
|
|
218e99 |
+++ b/monitor.c
|
|
|
218e99 |
@@ -504,6 +504,7 @@ static const char *monitor_event_names[] = {
|
|
|
218e99 |
[QEVENT_BALLOON_CHANGE] = "BALLOON_CHANGE",
|
|
|
218e99 |
[QEVENT_SPICE_MIGRATE_COMPLETED] = "SPICE_MIGRATE_COMPLETED",
|
|
|
218e99 |
[QEVENT_GUEST_PANICKED] = "GUEST_PANICKED",
|
|
|
218e99 |
+ [QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED",
|
|
|
218e99 |
};
|
|
|
218e99 |
QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX)
|
|
|
218e99 |
|
|
|
218e99 |
--
|
|
|
218e99 |
1.7.1
|
|
|
218e99 |
|