|
|
0a122b |
From a2b10eec76a72aa7fe63e797181b93f69de9600e Mon Sep 17 00:00:00 2001
|
|
|
0a122b |
From: Kevin Wolf <kwolf@redhat.com>
|
|
|
0a122b |
Date: Tue, 25 Mar 2014 14:23:34 +0100
|
|
|
0a122b |
Subject: [PATCH 27/49] qcow2: Don't rely on free_cluster_index in alloc_refcount_block() (CVE-2014-0147)
|
|
|
0a122b |
|
|
|
0a122b |
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
|
0a122b |
Message-id: <1395753835-7591-28-git-send-email-kwolf@redhat.com>
|
|
|
0a122b |
Patchwork-id: n/a
|
|
|
0a122b |
O-Subject: [virt-devel] [EMBARGOED RHEL-7.0 qemu-kvm PATCH 27/48] qcow2: Don't rely on free_cluster_index in alloc_refcount_block() (CVE-2014-0147)
|
|
|
0a122b |
Bugzilla: 1079339
|
|
|
0a122b |
RH-Acked-by: Jeff Cody <jcody@redhat.com>
|
|
|
0a122b |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
0a122b |
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
0a122b |
|
|
|
0a122b |
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1079339
|
|
|
0a122b |
Upstream status: Embargoed
|
|
|
0a122b |
|
|
|
0a122b |
free_cluster_index is only correct if update_refcount() was called from
|
|
|
0a122b |
an allocation function, and even there it's brittle because it's used to
|
|
|
0a122b |
protect unfinished allocations which still have a refcount of 0 - if it
|
|
|
0a122b |
moves in the wrong place, the unfinished allocation can be corrupted.
|
|
|
0a122b |
|
|
|
0a122b |
So not using it any more seems to be a good idea. Instead, use the
|
|
|
0a122b |
first requested cluster to do the calculations. Return -EAGAIN if
|
|
|
0a122b |
unfinished allocations could become invalid and let the caller restart
|
|
|
0a122b |
its search for some free clusters.
|
|
|
0a122b |
|
|
|
0a122b |
The context of creating a snapsnot is one situation where
|
|
|
0a122b |
update_refcount() is called outside of a cluster allocation. For this
|
|
|
0a122b |
case, the change fixes a buffer overflow if a cluster is referenced in
|
|
|
0a122b |
an L2 table that cannot be represented by an existing refcount block.
|
|
|
0a122b |
(new_table[refcount_table_index] was out of bounds)
|
|
|
0a122b |
|
|
|
0a122b |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
0a122b |
---
|
|
|
0a122b |
block/qcow2-refcount.c | 74 ++++++++++++++++++++++---------------------
|
|
|
0a122b |
block/qcow2.c | 7 ++--
|
|
|
0a122b |
tests/qemu-iotests/044.out | 2 +-
|
|
|
0a122b |
tests/qemu-iotests/080 | 11 ++++++
|
|
|
0a122b |
tests/qemu-iotests/080.out | 7 ++++
|
|
|
0a122b |
5 files changed, 61 insertions(+), 40 deletions(-)
|
|
|
0a122b |
|
|
|
0a122b |
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
|
|
|
0a122b |
index 13ea5f7..54bcbd1 100644
|
|
|
0a122b |
--- a/block/qcow2-refcount.c
|
|
|
0a122b |
+++ b/block/qcow2-refcount.c
|
|
|
0a122b |
@@ -193,10 +193,11 @@ static int alloc_refcount_block(BlockDriverState *bs,
|
|
|
0a122b |
* they can describe them themselves.
|
|
|
0a122b |
*
|
|
|
0a122b |
* - We need to consider that at this point we are inside update_refcounts
|
|
|
0a122b |
- * and doing the initial refcount increase. This means that some clusters
|
|
|
0a122b |
- * have already been allocated by the caller, but their refcount isn't
|
|
|
0a122b |
- * accurate yet. free_cluster_index tells us where this allocation ends
|
|
|
0a122b |
- * as long as we don't overwrite it by freeing clusters.
|
|
|
0a122b |
+ * and potentially doing an initial refcount increase. This means that
|
|
|
0a122b |
+ * some clusters have already been allocated by the caller, but their
|
|
|
0a122b |
+ * refcount isn't accurate yet. If we allocate clusters for metadata, we
|
|
|
0a122b |
+ * need to return -EAGAIN to signal the caller that it needs to restart
|
|
|
0a122b |
+ * the search for free clusters.
|
|
|
0a122b |
*
|
|
|
0a122b |
* - alloc_clusters_noref and qcow2_free_clusters may load a different
|
|
|
0a122b |
* refcount block into the cache
|
|
|
0a122b |
@@ -281,7 +282,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
|
|
|
0a122b |
}
|
|
|
0a122b |
|
|
|
0a122b |
s->refcount_table[refcount_table_index] = new_block;
|
|
|
0a122b |
- return 0;
|
|
|
0a122b |
+
|
|
|
0a122b |
+ /* The new refcount block may be where the caller intended to put its
|
|
|
0a122b |
+ * data, so let it restart the search. */
|
|
|
0a122b |
+ return -EAGAIN;
|
|
|
0a122b |
}
|
|
|
0a122b |
|
|
|
0a122b |
ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
|
|
|
0a122b |
@@ -304,8 +308,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
|
|
|
0a122b |
|
|
|
0a122b |
/* Calculate the number of refcount blocks needed so far */
|
|
|
0a122b |
uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
|
|
|
0a122b |
- uint64_t blocks_used = (s->free_cluster_index +
|
|
|
0a122b |
- refcount_block_clusters - 1) / refcount_block_clusters;
|
|
|
0a122b |
+ uint64_t blocks_used = DIV_ROUND_UP(cluster_index, refcount_block_clusters);
|
|
|
0a122b |
|
|
|
0a122b |
/* And now we need at least one block more for the new metadata */
|
|
|
0a122b |
uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
|
|
|
0a122b |
@@ -338,8 +341,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
|
|
|
0a122b |
uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
|
|
|
0a122b |
uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
|
|
|
0a122b |
|
|
|
0a122b |
- assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
|
|
|
0a122b |
-
|
|
|
0a122b |
/* Fill the new refcount table */
|
|
|
0a122b |
memcpy(new_table, s->refcount_table,
|
|
|
0a122b |
s->refcount_table_size * sizeof(uint64_t));
|
|
|
0a122b |
@@ -402,18 +403,19 @@ static int alloc_refcount_block(BlockDriverState *bs,
|
|
|
0a122b |
s->refcount_table_size = table_size;
|
|
|
0a122b |
s->refcount_table_offset = table_offset;
|
|
|
0a122b |
|
|
|
0a122b |
- /* Free old table. Remember, we must not change free_cluster_index */
|
|
|
0a122b |
- uint64_t old_free_cluster_index = s->free_cluster_index;
|
|
|
0a122b |
+ /* Free old table. */
|
|
|
0a122b |
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
|
|
|
0a122b |
QCOW2_DISCARD_OTHER);
|
|
|
0a122b |
- s->free_cluster_index = old_free_cluster_index;
|
|
|
0a122b |
|
|
|
0a122b |
ret = load_refcount_block(bs, new_block, (void**) refcount_block);
|
|
|
0a122b |
if (ret < 0) {
|
|
|
0a122b |
return ret;
|
|
|
0a122b |
}
|
|
|
0a122b |
|
|
|
0a122b |
- return 0;
|
|
|
0a122b |
+ /* If we were trying to do the initial refcount update for some cluster
|
|
|
0a122b |
+ * allocation, we might have used the same clusters to store newly
|
|
|
0a122b |
+ * allocated metadata. Make the caller search some new space. */
|
|
|
0a122b |
+ return -EAGAIN;
|
|
|
0a122b |
|
|
|
0a122b |
fail_table:
|
|
|
0a122b |
g_free(new_table);
|
|
|
0a122b |
@@ -659,12 +661,15 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
|
|
|
0a122b |
int ret;
|
|
|
0a122b |
|
|
|
0a122b |
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
|
|
|
0a122b |
- offset = alloc_clusters_noref(bs, size);
|
|
|
0a122b |
- if (offset < 0) {
|
|
|
0a122b |
- return offset;
|
|
|
0a122b |
- }
|
|
|
0a122b |
+ do {
|
|
|
0a122b |
+ offset = alloc_clusters_noref(bs, size);
|
|
|
0a122b |
+ if (offset < 0) {
|
|
|
0a122b |
+ return offset;
|
|
|
0a122b |
+ }
|
|
|
0a122b |
+
|
|
|
0a122b |
+ ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
|
|
|
0a122b |
+ } while (ret == -EAGAIN);
|
|
|
0a122b |
|
|
|
0a122b |
- ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
|
|
|
0a122b |
if (ret < 0) {
|
|
|
0a122b |
return ret;
|
|
|
0a122b |
}
|
|
|
0a122b |
@@ -677,7 +682,6 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
|
|
|
0a122b |
{
|
|
|
0a122b |
BDRVQcowState *s = bs->opaque;
|
|
|
0a122b |
uint64_t cluster_index;
|
|
|
0a122b |
- uint64_t old_free_cluster_index;
|
|
|
0a122b |
uint64_t i;
|
|
|
0a122b |
int refcount, ret;
|
|
|
0a122b |
|
|
|
0a122b |
@@ -686,30 +690,28 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
|
|
|
0a122b |
return 0;
|
|
|
0a122b |
}
|
|
|
0a122b |
|
|
|
0a122b |
- /* Check how many clusters there are free */
|
|
|
0a122b |
- cluster_index = offset >> s->cluster_bits;
|
|
|
0a122b |
- for(i = 0; i < nb_clusters; i++) {
|
|
|
0a122b |
- refcount = get_refcount(bs, cluster_index++);
|
|
|
0a122b |
+ do {
|
|
|
0a122b |
+ /* Check how many clusters there are free */
|
|
|
0a122b |
+ cluster_index = offset >> s->cluster_bits;
|
|
|
0a122b |
+ for(i = 0; i < nb_clusters; i++) {
|
|
|
0a122b |
+ refcount = get_refcount(bs, cluster_index++);
|
|
|
0a122b |
|
|
|
0a122b |
- if (refcount < 0) {
|
|
|
0a122b |
- return refcount;
|
|
|
0a122b |
- } else if (refcount != 0) {
|
|
|
0a122b |
- break;
|
|
|
0a122b |
+ if (refcount < 0) {
|
|
|
0a122b |
+ return refcount;
|
|
|
0a122b |
+ } else if (refcount != 0) {
|
|
|
0a122b |
+ break;
|
|
|
0a122b |
+ }
|
|
|
0a122b |
}
|
|
|
0a122b |
- }
|
|
|
0a122b |
|
|
|
0a122b |
- /* And then allocate them */
|
|
|
0a122b |
- old_free_cluster_index = s->free_cluster_index;
|
|
|
0a122b |
- s->free_cluster_index = cluster_index + i;
|
|
|
0a122b |
+ /* And then allocate them */
|
|
|
0a122b |
+ ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
|
|
|
0a122b |
+ QCOW2_DISCARD_NEVER);
|
|
|
0a122b |
+ } while (ret == -EAGAIN);
|
|
|
0a122b |
|
|
|
0a122b |
- ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
|
|
|
0a122b |
- QCOW2_DISCARD_NEVER);
|
|
|
0a122b |
if (ret < 0) {
|
|
|
0a122b |
return ret;
|
|
|
0a122b |
}
|
|
|
0a122b |
|
|
|
0a122b |
- s->free_cluster_index = old_free_cluster_index;
|
|
|
0a122b |
-
|
|
|
0a122b |
return i;
|
|
|
0a122b |
}
|
|
|
0a122b |
|
|
|
0a122b |
diff --git a/block/qcow2.c b/block/qcow2.c
|
|
|
0a122b |
index a8ad9e1..87f2958 100644
|
|
|
0a122b |
--- a/block/qcow2.c
|
|
|
0a122b |
+++ b/block/qcow2.c
|
|
|
0a122b |
@@ -1580,7 +1580,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
|
|
|
0a122b |
*/
|
|
|
0a122b |
BlockDriverState* bs;
|
|
|
0a122b |
QCowHeader *header;
|
|
|
0a122b |
- uint8_t* refcount_table;
|
|
|
0a122b |
+ uint64_t* refcount_table;
|
|
|
0a122b |
Error *local_err = NULL;
|
|
|
0a122b |
int ret;
|
|
|
0a122b |
|
|
|
0a122b |
@@ -1630,8 +1630,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
|
|
|
0a122b |
goto out;
|
|
|
0a122b |
}
|
|
|
0a122b |
|
|
|
0a122b |
- /* Write an empty refcount table */
|
|
|
0a122b |
+ /* Write a refcount table with one refcount block */
|
|
|
0a122b |
refcount_table = g_malloc0(cluster_size);
|
|
|
0a122b |
+ refcount_table[0] = cpu_to_be64(2 * cluster_size);
|
|
|
0a122b |
ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
|
|
|
0a122b |
g_free(refcount_table);
|
|
|
0a122b |
|
|
|
0a122b |
@@ -1656,7 +1657,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
|
|
|
0a122b |
goto out;
|
|
|
0a122b |
}
|
|
|
0a122b |
|
|
|
0a122b |
- ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
|
|
|
0a122b |
+ ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
|
|
|
0a122b |
if (ret < 0) {
|
|
|
0a122b |
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
|
|
|
0a122b |
"header and refcount table");
|
|
|
0a122b |
diff --git a/tests/qemu-iotests/044.out b/tests/qemu-iotests/044.out
|
|
|
0a122b |
index 5c5aa92..4789a53 100644
|
|
|
0a122b |
--- a/tests/qemu-iotests/044.out
|
|
|
0a122b |
+++ b/tests/qemu-iotests/044.out
|
|
|
0a122b |
@@ -1,6 +1,6 @@
|
|
|
0a122b |
No errors were found on the image.
|
|
|
0a122b |
7292415/33554432 = 21.73% allocated, 0.00% fragmented, 0.00% compressed clusters
|
|
|
0a122b |
-Image end offset: 4296448000
|
|
|
0a122b |
+Image end offset: 4296152064
|
|
|
0a122b |
.
|
|
|
0a122b |
----------------------------------------------------------------------
|
|
|
0a122b |
Ran 1 tests
|
|
|
0a122b |
diff --git a/tests/qemu-iotests/080 b/tests/qemu-iotests/080
|
|
|
0a122b |
index f3091a9..56f8903 100755
|
|
|
0a122b |
--- a/tests/qemu-iotests/080
|
|
|
0a122b |
+++ b/tests/qemu-iotests/080
|
|
|
0a122b |
@@ -56,6 +56,8 @@ offset_header_size=100
|
|
|
0a122b |
offset_ext_magic=$header_size
|
|
|
0a122b |
offset_ext_size=$((header_size + 4))
|
|
|
0a122b |
|
|
|
0a122b |
+offset_l2_table_0=$((0x40000))
|
|
|
0a122b |
+
|
|
|
0a122b |
echo
|
|
|
0a122b |
echo "== Huge header size =="
|
|
|
0a122b |
_make_test_img 64M
|
|
|
0a122b |
@@ -143,6 +145,15 @@ poke_file "$TEST_IMG" "$offset_backing_file_offset" "\x00\x00\x00\x00\x00\x00\x1
|
|
|
0a122b |
poke_file "$TEST_IMG" "$offset_backing_file_size" "\xff\xff\xff\xff"
|
|
|
0a122b |
{ $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
|
|
|
0a122b |
|
|
|
0a122b |
+echo
|
|
|
0a122b |
+echo "== Invalid L2 entry (huge physical offset) =="
|
|
|
0a122b |
+_make_test_img 64M
|
|
|
0a122b |
+{ $QEMU_IO -c "write 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
|
|
|
0a122b |
+poke_file "$TEST_IMG" "$offset_l2_table_0" "\xbf\xff\xff\xff\xff\xff\x00\x00"
|
|
|
0a122b |
+{ $QEMU_IMG snapshot -c test $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
|
|
|
0a122b |
+poke_file "$TEST_IMG" "$offset_l2_table_0" "\x80\x00\x00\xff\xff\xff\x00\x00"
|
|
|
0a122b |
+{ $QEMU_IMG snapshot -c test $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
|
|
|
0a122b |
+
|
|
|
0a122b |
# success, all done
|
|
|
0a122b |
echo "*** done"
|
|
|
0a122b |
rm -f $seq.full
|
|
|
0a122b |
diff --git a/tests/qemu-iotests/080.out b/tests/qemu-iotests/080.out
|
|
|
0a122b |
index 8103211..303d6c3 100644
|
|
|
0a122b |
--- a/tests/qemu-iotests/080.out
|
|
|
0a122b |
+++ b/tests/qemu-iotests/080.out
|
|
|
0a122b |
@@ -63,4 +63,11 @@ no file open, try 'help open'
|
|
|
0a122b |
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
|
|
|
0a122b |
qemu-io: can't open device TEST_DIR/t.qcow2: Backing file name too long
|
|
|
0a122b |
no file open, try 'help open'
|
|
|
0a122b |
+
|
|
|
0a122b |
+== Invalid L2 entry (huge physical offset) ==
|
|
|
0a122b |
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
|
|
|
0a122b |
+wrote 512/512 bytes at offset 0
|
|
|
0a122b |
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
|
|
0a122b |
+qemu-img: Could not create snapshot 'test': -27 (File too large)
|
|
|
0a122b |
+qemu-img: Could not create snapshot 'test': -11 (Resource temporarily unavailable)
|
|
|
0a122b |
*** done
|
|
|
0a122b |
--
|
|
|
0a122b |
1.7.1
|
|
|
0a122b |
|