Blame SOURCES/kvm-backup-Use-copy-offloading.patch

383d26
From 6e2070840430f3009b201976b0a10a9d627adfed Mon Sep 17 00:00:00 2001
383d26
From: John Snow <jsnow@redhat.com>
383d26
Date: Wed, 18 Jul 2018 22:54:52 +0200
383d26
Subject: [PATCH 67/89] backup: Use copy offloading
383d26
383d26
RH-Author: John Snow <jsnow@redhat.com>
383d26
Message-id: <20180718225511.14878-17-jsnow@redhat.com>
383d26
Patchwork-id: 81399
383d26
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 16/35] backup: Use copy offloading
383d26
Bugzilla: 1207657
383d26
RH-Acked-by: Eric Blake <eblake@redhat.com>
383d26
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
383d26
RH-Acked-by: Fam Zheng <famz@redhat.com>
383d26
383d26
From: Fam Zheng <famz@redhat.com>
383d26
383d26
The implementation is similar to the 'qemu-img convert'. In the
383d26
beginning of the job, offloaded copy is attempted. If it fails, further
383d26
I/O will go through the existing bounce buffer code path.
383d26
383d26
Then, as Kevin pointed out, both this and qemu-img convert can benefit
383d26
from a local check if one request fails because of, for example, the
383d26
offset is beyond EOF, but another may well be accepted by the protocol
383d26
layer. This will be implemented separately.
383d26
383d26
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
383d26
Signed-off-by: Fam Zheng <famz@redhat.com>
383d26
Message-id: 20180703023758.14422-4-famz@redhat.com
383d26
Signed-off-by: Jeff Cody <jcody@redhat.com>
383d26
(cherry picked from commit 9ded4a0114968e98b41494fc035ba14f84cdf700)
383d26
Signed-off-by: John Snow <jsnow@redhat.com>
383d26
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
383d26
---
383d26
 block/backup.c     | 150 ++++++++++++++++++++++++++++++++++++++---------------
383d26
 block/trace-events |   1 +
383d26
 2 files changed, 110 insertions(+), 41 deletions(-)
383d26
383d26
diff --git a/block/backup.c b/block/backup.c
383d26
index 5661435..d26eeb5 100644
383d26
--- a/block/backup.c
383d26
+++ b/block/backup.c
383d26
@@ -45,6 +45,8 @@ typedef struct BackupBlockJob {
383d26
     QLIST_HEAD(, CowRequest) inflight_reqs;
383d26
 
383d26
     HBitmap *copy_bitmap;
383d26
+    bool use_copy_range;
383d26
+    int64_t copy_range_size;
383d26
 } BackupBlockJob;
383d26
 
383d26
 static const BlockJobDriver backup_job_driver;
383d26
@@ -86,19 +88,101 @@ static void cow_request_end(CowRequest *req)
383d26
     qemu_co_queue_restart_all(&req->wait_queue);
383d26
 }
383d26
 
383d26
+/* Copy range to target with a bounce buffer and return the bytes copied. If
383d26
+ * error occured, return a negative error number */
383d26
+static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
383d26
+                                                      int64_t start,
383d26
+                                                      int64_t end,
383d26
+                                                      bool is_write_notifier,
383d26
+                                                      bool *error_is_read,
383d26
+                                                      void **bounce_buffer)
383d26
+{
383d26
+    int ret;
383d26
+    struct iovec iov;
383d26
+    QEMUIOVector qiov;
383d26
+    BlockBackend *blk = job->common.blk;
383d26
+    int nbytes;
383d26
+
383d26
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
383d26
+    nbytes = MIN(job->cluster_size, job->len - start);
383d26
+    if (!*bounce_buffer) {
383d26
+        *bounce_buffer = blk_blockalign(blk, job->cluster_size);
383d26
+    }
383d26
+    iov.iov_base = *bounce_buffer;
383d26
+    iov.iov_len = nbytes;
383d26
+    qemu_iovec_init_external(&qiov, &iov, 1);
383d26
+
383d26
+    ret = blk_co_preadv(blk, start, qiov.size, &qiov,
383d26
+                        is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
383d26
+    if (ret < 0) {
383d26
+        trace_backup_do_cow_read_fail(job, start, ret);
383d26
+        if (error_is_read) {
383d26
+            *error_is_read = true;
383d26
+        }
383d26
+        goto fail;
383d26
+    }
383d26
+
383d26
+    if (qemu_iovec_is_zero(&qiov)) {
383d26
+        ret = blk_co_pwrite_zeroes(job->target, start,
383d26
+                                   qiov.size, BDRV_REQ_MAY_UNMAP);
383d26
+    } else {
383d26
+        ret = blk_co_pwritev(job->target, start,
383d26
+                             qiov.size, &qiov,
383d26
+                             job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
383d26
+    }
383d26
+    if (ret < 0) {
383d26
+        trace_backup_do_cow_write_fail(job, start, ret);
383d26
+        if (error_is_read) {
383d26
+            *error_is_read = false;
383d26
+        }
383d26
+        goto fail;
383d26
+    }
383d26
+
383d26
+    return nbytes;
383d26
+fail:
383d26
+    hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
383d26
+    return ret;
383d26
+
383d26
+}
383d26
+
383d26
+/* Copy range to target and return the bytes copied. If error occured, return a
383d26
+ * negative error number. */
383d26
+static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
383d26
+                                                int64_t start,
383d26
+                                                int64_t end,
383d26
+                                                bool is_write_notifier)
383d26
+{
383d26
+    int ret;
383d26
+    int nr_clusters;
383d26
+    BlockBackend *blk = job->common.blk;
383d26
+    int nbytes;
383d26
+
383d26
+    assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
383d26
+    nbytes = MIN(job->copy_range_size, end - start);
383d26
+    nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
383d26
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
383d26
+                  nr_clusters);
383d26
+    ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
383d26
+                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
383d26
+    if (ret < 0) {
383d26
+        trace_backup_do_cow_copy_range_fail(job, start, ret);
383d26
+        hbitmap_set(job->copy_bitmap, start / job->cluster_size,
383d26
+                    nr_clusters);
383d26
+        return ret;
383d26
+    }
383d26
+
383d26
+    return nbytes;
383d26
+}
383d26
+
383d26
 static int coroutine_fn backup_do_cow(BackupBlockJob *job,
383d26
                                       int64_t offset, uint64_t bytes,
383d26
                                       bool *error_is_read,
383d26
                                       bool is_write_notifier)
383d26
 {
383d26
-    BlockBackend *blk = job->common.blk;
383d26
     CowRequest cow_request;
383d26
-    struct iovec iov;
383d26
-    QEMUIOVector bounce_qiov;
383d26
-    void *bounce_buffer = NULL;
383d26
     int ret = 0;
383d26
     int64_t start, end; /* bytes */
383d26
-    int n; /* bytes */
383d26
+    void *bounce_buffer = NULL;
383d26
 
383d26
     qemu_co_rwlock_rdlock(&job->flush_rwlock);
383d26
 
383d26
@@ -110,60 +194,38 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
383d26
     wait_for_overlapping_requests(job, start, end);
383d26
     cow_request_begin(&cow_request, job, start, end);
383d26
 
383d26
-    for (; start < end; start += job->cluster_size) {
383d26
+    while (start < end) {
383d26
         if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
383d26
             trace_backup_do_cow_skip(job, start);
383d26
+            start += job->cluster_size;
383d26
             continue; /* already copied */
383d26
         }
383d26
-        hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
383d26
 
383d26
         trace_backup_do_cow_process(job, start);
383d26
 
383d26
-        n = MIN(job->cluster_size, job->len - start);
383d26
-
383d26
-        if (!bounce_buffer) {
383d26
-            bounce_buffer = blk_blockalign(blk, job->cluster_size);
383d26
-        }
383d26
-        iov.iov_base = bounce_buffer;
383d26
-        iov.iov_len = n;
383d26
-        qemu_iovec_init_external(&bounce_qiov, &iov, 1);
383d26
-
383d26
-        ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
383d26
-                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
383d26
-        if (ret < 0) {
383d26
-            trace_backup_do_cow_read_fail(job, start, ret);
383d26
-            if (error_is_read) {
383d26
-                *error_is_read = true;
383d26
+        if (job->use_copy_range) {
383d26
+            ret = backup_cow_with_offload(job, start, end, is_write_notifier);
383d26
+            if (ret < 0) {
383d26
+                job->use_copy_range = false;
383d26
             }
383d26
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
383d26
-            goto out;
383d26
         }
383d26
-
383d26
-        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
383d26
-            ret = blk_co_pwrite_zeroes(job->target, start,
383d26
-                                       bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
383d26
-        } else {
383d26
-            ret = blk_co_pwritev(job->target, start,
383d26
-                                 bounce_qiov.size, &bounce_qiov,
383d26
-                                 job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
383d26
+        if (!job->use_copy_range) {
383d26
+            ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
383d26
+                                                error_is_read, &bounce_buffer);
383d26
         }
383d26
         if (ret < 0) {
383d26
-            trace_backup_do_cow_write_fail(job, start, ret);
383d26
-            if (error_is_read) {
383d26
-                *error_is_read = false;
383d26
-            }
383d26
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
383d26
-            goto out;
383d26
+            break;
383d26
         }
383d26
 
383d26
         /* Publish progress, guest I/O counts as progress too.  Note that the
383d26
          * offset field is an opaque progress value, it is not a disk offset.
383d26
          */
383d26
-        job->bytes_read += n;
383d26
-        job_progress_update(&job->common.job, n);
383d26
+        start += ret;
383d26
+        job->bytes_read += ret;
383d26
+        job_progress_update(&job->common.job, ret);
383d26
+        ret = 0;
383d26
     }
383d26
 
383d26
-out:
383d26
     if (bounce_buffer) {
383d26
         qemu_vfree(bounce_buffer);
383d26
     }
383d26
@@ -665,6 +727,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
383d26
     } else {
383d26
         job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
383d26
     }
383d26
+    job->use_copy_range = true;
383d26
+    job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
383d26
+                                        blk_get_max_transfer(job->target));
383d26
+    job->copy_range_size = MAX(job->cluster_size,
383d26
+                               QEMU_ALIGN_UP(job->copy_range_size,
383d26
+                                             job->cluster_size));
383d26
 
383d26
     /* Required permissions are already taken with target's blk_new() */
383d26
     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
383d26
diff --git a/block/trace-events b/block/trace-events
383d26
index 2d59b53..c35287b 100644
383d26
--- a/block/trace-events
383d26
+++ b/block/trace-events
383d26
@@ -42,6 +42,7 @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
383d26
 backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
383d26
 backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
383d26
 backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
383d26
+backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
383d26
 
383d26
 # blockdev.c
383d26
 qmp_block_job_cancel(void *job) "job %p"
383d26
-- 
383d26
1.8.3.1
383d26