|
|
26ba25 |
From 1df22a2aa1fc70f0b2b268bbe0c184d95ce74b04 Mon Sep 17 00:00:00 2001
|
|
|
26ba25 |
From: John Snow <jsnow@redhat.com>
|
|
|
26ba25 |
Date: Wed, 18 Jul 2018 22:54:52 +0200
|
|
|
26ba25 |
Subject: [PATCH 234/268] backup: Use copy offloading
|
|
|
26ba25 |
|
|
|
26ba25 |
RH-Author: John Snow <jsnow@redhat.com>
|
|
|
26ba25 |
Message-id: <20180718225511.14878-17-jsnow@redhat.com>
|
|
|
26ba25 |
Patchwork-id: 81399
|
|
|
26ba25 |
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 16/35] backup: Use copy offloading
|
|
|
26ba25 |
Bugzilla: 1207657
|
|
|
26ba25 |
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
|
|
26ba25 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
26ba25 |
RH-Acked-by: Fam Zheng <famz@redhat.com>
|
|
|
26ba25 |
|
|
|
26ba25 |
From: Fam Zheng <famz@redhat.com>
|
|
|
26ba25 |
|
|
|
26ba25 |
The implementation is similar to the 'qemu-img convert'. In the
|
|
|
26ba25 |
beginning of the job, offloaded copy is attempted. If it fails, further
|
|
|
26ba25 |
I/O will go through the existing bounce buffer code path.
|
|
|
26ba25 |
|
|
|
26ba25 |
Then, as Kevin pointed out, both this and qemu-img convert can benefit
|
|
|
26ba25 |
from a local check if one request fails because of, for example, the
|
|
|
26ba25 |
offset is beyond EOF, but another may well be accepted by the protocol
|
|
|
26ba25 |
layer. This will be implemented separately.
|
|
|
26ba25 |
|
|
|
26ba25 |
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
26ba25 |
Signed-off-by: Fam Zheng <famz@redhat.com>
|
|
|
26ba25 |
Message-id: 20180703023758.14422-4-famz@redhat.com
|
|
|
26ba25 |
Signed-off-by: Jeff Cody <jcody@redhat.com>
|
|
|
26ba25 |
(cherry picked from commit 9ded4a0114968e98b41494fc035ba14f84cdf700)
|
|
|
26ba25 |
Signed-off-by: John Snow <jsnow@redhat.com>
|
|
|
26ba25 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
26ba25 |
---
|
|
|
26ba25 |
block/backup.c | 150 ++++++++++++++++++++++++++++++++++++++---------------
|
|
|
26ba25 |
block/trace-events | 1 +
|
|
|
26ba25 |
2 files changed, 110 insertions(+), 41 deletions(-)
|
|
|
26ba25 |
|
|
|
26ba25 |
diff --git a/block/backup.c b/block/backup.c
|
|
|
26ba25 |
index 5661435..d26eeb5 100644
|
|
|
26ba25 |
--- a/block/backup.c
|
|
|
26ba25 |
+++ b/block/backup.c
|
|
|
26ba25 |
@@ -45,6 +45,8 @@ typedef struct BackupBlockJob {
|
|
|
26ba25 |
QLIST_HEAD(, CowRequest) inflight_reqs;
|
|
|
26ba25 |
|
|
|
26ba25 |
HBitmap *copy_bitmap;
|
|
|
26ba25 |
+ bool use_copy_range;
|
|
|
26ba25 |
+ int64_t copy_range_size;
|
|
|
26ba25 |
} BackupBlockJob;
|
|
|
26ba25 |
|
|
|
26ba25 |
static const BlockJobDriver backup_job_driver;
|
|
|
26ba25 |
@@ -86,19 +88,101 @@ static void cow_request_end(CowRequest *req)
|
|
|
26ba25 |
qemu_co_queue_restart_all(&req->wait_queue);
|
|
|
26ba25 |
}
|
|
|
26ba25 |
|
|
|
26ba25 |
+/* Copy range to target with a bounce buffer and return the bytes copied. If
|
|
|
26ba25 |
+ * error occured, return a negative error number */
|
|
|
26ba25 |
+static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
|
|
|
26ba25 |
+ int64_t start,
|
|
|
26ba25 |
+ int64_t end,
|
|
|
26ba25 |
+ bool is_write_notifier,
|
|
|
26ba25 |
+ bool *error_is_read,
|
|
|
26ba25 |
+ void **bounce_buffer)
|
|
|
26ba25 |
+{
|
|
|
26ba25 |
+ int ret;
|
|
|
26ba25 |
+ struct iovec iov;
|
|
|
26ba25 |
+ QEMUIOVector qiov;
|
|
|
26ba25 |
+ BlockBackend *blk = job->common.blk;
|
|
|
26ba25 |
+ int nbytes;
|
|
|
26ba25 |
+
|
|
|
26ba25 |
+ hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
|
|
|
26ba25 |
+ nbytes = MIN(job->cluster_size, job->len - start);
|
|
|
26ba25 |
+ if (!*bounce_buffer) {
|
|
|
26ba25 |
+ *bounce_buffer = blk_blockalign(blk, job->cluster_size);
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
+ iov.iov_base = *bounce_buffer;
|
|
|
26ba25 |
+ iov.iov_len = nbytes;
|
|
|
26ba25 |
+ qemu_iovec_init_external(&qiov, &iov, 1);
|
|
|
26ba25 |
+
|
|
|
26ba25 |
+ ret = blk_co_preadv(blk, start, qiov.size, &qiov,
|
|
|
26ba25 |
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
|
|
|
26ba25 |
+ if (ret < 0) {
|
|
|
26ba25 |
+ trace_backup_do_cow_read_fail(job, start, ret);
|
|
|
26ba25 |
+ if (error_is_read) {
|
|
|
26ba25 |
+ *error_is_read = true;
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
+ goto fail;
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
+
|
|
|
26ba25 |
+ if (qemu_iovec_is_zero(&qiov)) {
|
|
|
26ba25 |
+ ret = blk_co_pwrite_zeroes(job->target, start,
|
|
|
26ba25 |
+ qiov.size, BDRV_REQ_MAY_UNMAP);
|
|
|
26ba25 |
+ } else {
|
|
|
26ba25 |
+ ret = blk_co_pwritev(job->target, start,
|
|
|
26ba25 |
+ qiov.size, &qiov,
|
|
|
26ba25 |
+ job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
+ if (ret < 0) {
|
|
|
26ba25 |
+ trace_backup_do_cow_write_fail(job, start, ret);
|
|
|
26ba25 |
+ if (error_is_read) {
|
|
|
26ba25 |
+ *error_is_read = false;
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
+ goto fail;
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
+
|
|
|
26ba25 |
+ return nbytes;
|
|
|
26ba25 |
+fail:
|
|
|
26ba25 |
+ hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
|
|
|
26ba25 |
+ return ret;
|
|
|
26ba25 |
+
|
|
|
26ba25 |
+}
|
|
|
26ba25 |
+
|
|
|
26ba25 |
+/* Copy range to target and return the bytes copied. If error occured, return a
|
|
|
26ba25 |
+ * negative error number. */
|
|
|
26ba25 |
+static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
|
|
|
26ba25 |
+ int64_t start,
|
|
|
26ba25 |
+ int64_t end,
|
|
|
26ba25 |
+ bool is_write_notifier)
|
|
|
26ba25 |
+{
|
|
|
26ba25 |
+ int ret;
|
|
|
26ba25 |
+ int nr_clusters;
|
|
|
26ba25 |
+ BlockBackend *blk = job->common.blk;
|
|
|
26ba25 |
+ int nbytes;
|
|
|
26ba25 |
+
|
|
|
26ba25 |
+ assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
|
|
|
26ba25 |
+ nbytes = MIN(job->copy_range_size, end - start);
|
|
|
26ba25 |
+ nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
|
|
|
26ba25 |
+ hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
|
|
|
26ba25 |
+ nr_clusters);
|
|
|
26ba25 |
+ ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
|
|
|
26ba25 |
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
|
|
|
26ba25 |
+ if (ret < 0) {
|
|
|
26ba25 |
+ trace_backup_do_cow_copy_range_fail(job, start, ret);
|
|
|
26ba25 |
+ hbitmap_set(job->copy_bitmap, start / job->cluster_size,
|
|
|
26ba25 |
+ nr_clusters);
|
|
|
26ba25 |
+ return ret;
|
|
|
26ba25 |
+ }
|
|
|
26ba25 |
+
|
|
|
26ba25 |
+ return nbytes;
|
|
|
26ba25 |
+}
|
|
|
26ba25 |
+
|
|
|
26ba25 |
static int coroutine_fn backup_do_cow(BackupBlockJob *job,
|
|
|
26ba25 |
int64_t offset, uint64_t bytes,
|
|
|
26ba25 |
bool *error_is_read,
|
|
|
26ba25 |
bool is_write_notifier)
|
|
|
26ba25 |
{
|
|
|
26ba25 |
- BlockBackend *blk = job->common.blk;
|
|
|
26ba25 |
CowRequest cow_request;
|
|
|
26ba25 |
- struct iovec iov;
|
|
|
26ba25 |
- QEMUIOVector bounce_qiov;
|
|
|
26ba25 |
- void *bounce_buffer = NULL;
|
|
|
26ba25 |
int ret = 0;
|
|
|
26ba25 |
int64_t start, end; /* bytes */
|
|
|
26ba25 |
- int n; /* bytes */
|
|
|
26ba25 |
+ void *bounce_buffer = NULL;
|
|
|
26ba25 |
|
|
|
26ba25 |
qemu_co_rwlock_rdlock(&job->flush_rwlock);
|
|
|
26ba25 |
|
|
|
26ba25 |
@@ -110,60 +194,38 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
|
|
|
26ba25 |
wait_for_overlapping_requests(job, start, end);
|
|
|
26ba25 |
cow_request_begin(&cow_request, job, start, end);
|
|
|
26ba25 |
|
|
|
26ba25 |
- for (; start < end; start += job->cluster_size) {
|
|
|
26ba25 |
+ while (start < end) {
|
|
|
26ba25 |
if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
|
|
|
26ba25 |
trace_backup_do_cow_skip(job, start);
|
|
|
26ba25 |
+ start += job->cluster_size;
|
|
|
26ba25 |
continue; /* already copied */
|
|
|
26ba25 |
}
|
|
|
26ba25 |
- hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
|
|
|
26ba25 |
|
|
|
26ba25 |
trace_backup_do_cow_process(job, start);
|
|
|
26ba25 |
|
|
|
26ba25 |
- n = MIN(job->cluster_size, job->len - start);
|
|
|
26ba25 |
-
|
|
|
26ba25 |
- if (!bounce_buffer) {
|
|
|
26ba25 |
- bounce_buffer = blk_blockalign(blk, job->cluster_size);
|
|
|
26ba25 |
- }
|
|
|
26ba25 |
- iov.iov_base = bounce_buffer;
|
|
|
26ba25 |
- iov.iov_len = n;
|
|
|
26ba25 |
- qemu_iovec_init_external(&bounce_qiov, &iov, 1);
|
|
|
26ba25 |
-
|
|
|
26ba25 |
- ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
|
|
|
26ba25 |
- is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
|
|
|
26ba25 |
- if (ret < 0) {
|
|
|
26ba25 |
- trace_backup_do_cow_read_fail(job, start, ret);
|
|
|
26ba25 |
- if (error_is_read) {
|
|
|
26ba25 |
- *error_is_read = true;
|
|
|
26ba25 |
+ if (job->use_copy_range) {
|
|
|
26ba25 |
+ ret = backup_cow_with_offload(job, start, end, is_write_notifier);
|
|
|
26ba25 |
+ if (ret < 0) {
|
|
|
26ba25 |
+ job->use_copy_range = false;
|
|
|
26ba25 |
}
|
|
|
26ba25 |
- hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
|
|
|
26ba25 |
- goto out;
|
|
|
26ba25 |
}
|
|
|
26ba25 |
-
|
|
|
26ba25 |
- if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
|
|
|
26ba25 |
- ret = blk_co_pwrite_zeroes(job->target, start,
|
|
|
26ba25 |
- bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
|
|
|
26ba25 |
- } else {
|
|
|
26ba25 |
- ret = blk_co_pwritev(job->target, start,
|
|
|
26ba25 |
- bounce_qiov.size, &bounce_qiov,
|
|
|
26ba25 |
- job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
|
|
|
26ba25 |
+ if (!job->use_copy_range) {
|
|
|
26ba25 |
+ ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
|
|
|
26ba25 |
+ error_is_read, &bounce_buffer);
|
|
|
26ba25 |
}
|
|
|
26ba25 |
if (ret < 0) {
|
|
|
26ba25 |
- trace_backup_do_cow_write_fail(job, start, ret);
|
|
|
26ba25 |
- if (error_is_read) {
|
|
|
26ba25 |
- *error_is_read = false;
|
|
|
26ba25 |
- }
|
|
|
26ba25 |
- hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
|
|
|
26ba25 |
- goto out;
|
|
|
26ba25 |
+ break;
|
|
|
26ba25 |
}
|
|
|
26ba25 |
|
|
|
26ba25 |
/* Publish progress, guest I/O counts as progress too. Note that the
|
|
|
26ba25 |
* offset field is an opaque progress value, it is not a disk offset.
|
|
|
26ba25 |
*/
|
|
|
26ba25 |
- job->bytes_read += n;
|
|
|
26ba25 |
- job_progress_update(&job->common.job, n);
|
|
|
26ba25 |
+ start += ret;
|
|
|
26ba25 |
+ job->bytes_read += ret;
|
|
|
26ba25 |
+ job_progress_update(&job->common.job, ret);
|
|
|
26ba25 |
+ ret = 0;
|
|
|
26ba25 |
}
|
|
|
26ba25 |
|
|
|
26ba25 |
-out:
|
|
|
26ba25 |
if (bounce_buffer) {
|
|
|
26ba25 |
qemu_vfree(bounce_buffer);
|
|
|
26ba25 |
}
|
|
|
26ba25 |
@@ -665,6 +727,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
|
|
|
26ba25 |
} else {
|
|
|
26ba25 |
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
|
|
|
26ba25 |
}
|
|
|
26ba25 |
+ job->use_copy_range = true;
|
|
|
26ba25 |
+ job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
|
|
|
26ba25 |
+ blk_get_max_transfer(job->target));
|
|
|
26ba25 |
+ job->copy_range_size = MAX(job->cluster_size,
|
|
|
26ba25 |
+ QEMU_ALIGN_UP(job->copy_range_size,
|
|
|
26ba25 |
+ job->cluster_size));
|
|
|
26ba25 |
|
|
|
26ba25 |
/* Required permissions are already taken with target's blk_new() */
|
|
|
26ba25 |
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
|
|
|
26ba25 |
diff --git a/block/trace-events b/block/trace-events
|
|
|
26ba25 |
index 2d59b53..c35287b 100644
|
|
|
26ba25 |
--- a/block/trace-events
|
|
|
26ba25 |
+++ b/block/trace-events
|
|
|
26ba25 |
@@ -42,6 +42,7 @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
|
|
|
26ba25 |
backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
|
|
|
26ba25 |
backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
|
|
|
26ba25 |
backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
|
|
|
26ba25 |
+backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
|
|
|
26ba25 |
|
|
|
26ba25 |
# blockdev.c
|
|
|
26ba25 |
qmp_block_job_cancel(void *job) "job %p"
|
|
|
26ba25 |
--
|
|
|
26ba25 |
1.8.3.1
|
|
|
26ba25 |
|