Tree - rpms/qemu-kvm-ma - CentOS Git server

rpms / qemu-kvm-ma

Blame SOURCES/kvm-block-backup-fix-fleecing-scheme-use-serialized-writ.patch

Blob History Raw

		1bdc94	`From 5627c9fb0b86809d42914f1beef9b68226141d4b Mon Sep 17 00:00:00 2001`
		1bdc94	`From: John Snow <jsnow@redhat.com>`
		1bdc94	`Date: Wed, 18 Jul 2018 22:54:58 +0200`
		1bdc94	`Subject: [PATCH 73/89] block/backup: fix fleecing scheme: use serialized`
		1bdc94	`writes`
		1bdc94
		1bdc94	`RH-Author: John Snow <jsnow@redhat.com>`
		1bdc94	`Message-id: <20180718225511.14878-23-jsnow@redhat.com>`
		1bdc94	`Patchwork-id: 81396`
		1bdc94	`O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 22/35] block/backup: fix fleecing scheme: use serialized writes`
		1bdc94	`Bugzilla: 1207657`
		1bdc94	`RH-Acked-by: Eric Blake <eblake@redhat.com>`
		1bdc94	`RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>`
		1bdc94	`RH-Acked-by: Fam Zheng <famz@redhat.com>`
		1bdc94
		1bdc94	`From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>`
		1bdc94
		1bdc94	`Fleecing scheme works as follows: we want a kind of temporary snapshot`
		1bdc94	`of active drive A. We create temporary image B, with B->backing = A.`
		1bdc94	`Then we start backup(sync=none) from A to B. From this point, B reads`
		1bdc94	`as point-in-time snapshot of A (A continues to be active drive,`
		1bdc94	`accepting guest IO).`
		1bdc94
		1bdc94	`This scheme needs some additional synchronization between reads from B`
		1bdc94	`and backup COW operations, otherwise, the following situation is`
		1bdc94	`theoretically possible:`
		1bdc94
		1bdc94	`(assume B is qcow2, client is NBD client, reading from B)`
		1bdc94
		1bdc94	`1. client starts reading and take qcow2 mutex in qcow2_co_preadv, and`
		1bdc94	`goes up to l2 table loading (assume cache miss)`
		1bdc94
		1bdc94	`2) guest write => backup COW => qcow2 write =>`
		1bdc94	`try to take qcow2 mutex => waiting`
		1bdc94
		1bdc94	`3. l2 table loaded, we see that cluster is UNALLOCATED, go to`
		1bdc94	`"case QCOW2_CLUSTER_UNALLOCATED" and unlock mutex before`
		1bdc94	`bdrv_co_preadv(bs->backing, ...)`
		1bdc94
		1bdc94	`4) aha, mutex unlocked, backup COW continues, and we finally finish`
		1bdc94	`guest write and change cluster in our active disk A`
		1bdc94
		1bdc94	`5. actually, do bdrv_co_preadv(bs->backing, ...) and read`
		1bdc94	`_new updated_ data.`
		1bdc94
		1bdc94	`To avoid this, let's make backup writes serializing, to not intersect`
		1bdc94	`with reads from B.`
		1bdc94
		1bdc94	`Note: we expand range of handled cases from (sync=none and`
		1bdc94	`B->backing = A) to just (A in backing chain of B), to finally allow`
		1bdc94	`safe reading from B during backup for all cases when A in backing chain`
		1bdc94	`of B, i.e. B formally looks like point-in-time snapshot of A.`
		1bdc94
		1bdc94	`Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>`
		1bdc94	`Reviewed-by: Fam Zheng <famz@redhat.com>`
		1bdc94	`Signed-off-by: Kevin Wolf <kwolf@redhat.com>`
		1bdc94	`(cherry picked from commit f8d59dfb40bbc6f5aeea57c8aac1e68c1d2454ee)`
		1bdc94	`Signed-off-by: John Snow <jsnow@redhat.com>`
		1bdc94	`Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>`
		1bdc94	`---`
		1bdc94	`block/backup.c \| 20 ++++++++++++++------`
		1bdc94	`1 file changed, 14 insertions(+), 6 deletions(-)`
		1bdc94
		1bdc94	`diff --git a/block/backup.c b/block/backup.c`
		1bdc94	`index 369155a..4ba1a6a 100644`
		1bdc94	`--- a/block/backup.c`
		1bdc94	`+++ b/block/backup.c`
		1bdc94	`@@ -47,6 +47,8 @@ typedef struct BackupBlockJob {`
		1bdc94	`HBitmap *copy_bitmap;`
		1bdc94	`bool use_copy_range;`
		1bdc94	`int64_t copy_range_size;`
		1bdc94	`+`
		1bdc94	`+ bool serialize_target_writes;`
		1bdc94	`} BackupBlockJob;`
		1bdc94
		1bdc94	`static const BlockJobDriver backup_job_driver;`
		1bdc94	`@@ -102,6 +104,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,`
		1bdc94	`QEMUIOVector qiov;`
		1bdc94	`BlockBackend *blk = job->common.blk;`
		1bdc94	`int nbytes;`
		1bdc94	`+ int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;`
		1bdc94	`+ int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;`
		1bdc94
		1bdc94	`hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);`
		1bdc94	`nbytes = MIN(job->cluster_size, job->len - start);`
		1bdc94	`@@ -112,8 +116,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,`
		1bdc94	`iov.iov_len = nbytes;`
		1bdc94	`qemu_iovec_init_external(&qiov, &iov, 1);`
		1bdc94
		1bdc94	`- ret = blk_co_preadv(blk, start, qiov.size, &qiov,`
		1bdc94	`- is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);`
		1bdc94	`+ ret = blk_co_preadv(blk, start, qiov.size, &qiov, read_flags);`
		1bdc94	`if (ret < 0) {`
		1bdc94	`trace_backup_do_cow_read_fail(job, start, ret);`
		1bdc94	`if (error_is_read) {`
		1bdc94	`@@ -124,11 +127,11 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,`
		1bdc94
		1bdc94	`if (qemu_iovec_is_zero(&qiov)) {`
		1bdc94	`ret = blk_co_pwrite_zeroes(job->target, start,`
		1bdc94	`- qiov.size, BDRV_REQ_MAY_UNMAP);`
		1bdc94	`+ qiov.size, write_flags \| BDRV_REQ_MAY_UNMAP);`
		1bdc94	`} else {`
		1bdc94	`ret = blk_co_pwritev(job->target, start,`
		1bdc94	`- qiov.size, &qiov,`
		1bdc94	`- job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);`
		1bdc94	`+ qiov.size, &qiov, write_flags \|`
		1bdc94	`+ (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0));`
		1bdc94	`}`
		1bdc94	`if (ret < 0) {`
		1bdc94	`trace_backup_do_cow_write_fail(job, start, ret);`
		1bdc94	`@@ -156,6 +159,8 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,`
		1bdc94	`int nr_clusters;`
		1bdc94	`BlockBackend *blk = job->common.blk;`
		1bdc94	`int nbytes;`
		1bdc94	`+ int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;`
		1bdc94	`+ int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;`
		1bdc94
		1bdc94	`assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));`
		1bdc94	`nbytes = MIN(job->copy_range_size, end - start);`
		1bdc94	`@@ -163,7 +168,7 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,`
		1bdc94	`hbitmap_reset(job->copy_bitmap, start / job->cluster_size,`
		1bdc94	`nr_clusters);`
		1bdc94	`ret = blk_co_copy_range(blk, start, job->target, start, nbytes,`
		1bdc94	`- is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0, 0);`
		1bdc94	`+ read_flags, write_flags);`
		1bdc94	`if (ret < 0) {`
		1bdc94	`trace_backup_do_cow_copy_range_fail(job, start, ret);`
		1bdc94	`hbitmap_set(job->copy_bitmap, start / job->cluster_size,`
		1bdc94	`@@ -701,6 +706,9 @@ BlockJob backup_job_create(const char job_id, BlockDriverState *bs,`
		1bdc94	`sync_bitmap : NULL;`
		1bdc94	`job->compress = compress;`
		1bdc94
		1bdc94	`+ /* Detect image-fleecing (and similar) schemes */`
		1bdc94	`+ job->serialize_target_writes = bdrv_chain_contains(target, bs);`
		1bdc94	`+`
		1bdc94	`/* If there is no backing file on the target, we cannot rely on COW if our`
		1bdc94	`* backup cluster size is smaller than the target cluster size. Even for`
		1bdc94	`* targets with a backing file, try to avoid COW if possible. */`
		1bdc94	`--`
		1bdc94	`1.8.3.1`
		1bdc94

rpms / qemu-kvm-ma

Source Code

Blame SOURCES/kvm-block-backup-fix-fleecing-scheme-use-serialized-writ.patch