Blame SOURCES/kvm-backup-Use-copy-offloading.patch

357786
From 6e2070840430f3009b201976b0a10a9d627adfed Mon Sep 17 00:00:00 2001
357786
From: John Snow <jsnow@redhat.com>
357786
Date: Wed, 18 Jul 2018 22:54:52 +0200
357786
Subject: [PATCH 67/89] backup: Use copy offloading
357786
357786
RH-Author: John Snow <jsnow@redhat.com>
357786
Message-id: <20180718225511.14878-17-jsnow@redhat.com>
357786
Patchwork-id: 81399
357786
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 16/35] backup: Use copy offloading
357786
Bugzilla: 1207657
357786
RH-Acked-by: Eric Blake <eblake@redhat.com>
357786
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
357786
RH-Acked-by: Fam Zheng <famz@redhat.com>
357786
357786
From: Fam Zheng <famz@redhat.com>
357786
357786
The implementation is similar to the 'qemu-img convert'. In the
357786
beginning of the job, offloaded copy is attempted. If it fails, further
357786
I/O will go through the existing bounce buffer code path.
357786
357786
Then, as Kevin pointed out, both this and qemu-img convert can benefit
357786
from a local check if one request fails because of, for example, the
357786
offset is beyond EOF, but another may well be accepted by the protocol
357786
layer. This will be implemented separately.
357786
357786
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
357786
Signed-off-by: Fam Zheng <famz@redhat.com>
357786
Message-id: 20180703023758.14422-4-famz@redhat.com
357786
Signed-off-by: Jeff Cody <jcody@redhat.com>
357786
(cherry picked from commit 9ded4a0114968e98b41494fc035ba14f84cdf700)
357786
Signed-off-by: John Snow <jsnow@redhat.com>
357786
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
357786
---
357786
 block/backup.c     | 150 ++++++++++++++++++++++++++++++++++++++---------------
357786
 block/trace-events |   1 +
357786
 2 files changed, 110 insertions(+), 41 deletions(-)
357786
357786
diff --git a/block/backup.c b/block/backup.c
357786
index 5661435..d26eeb5 100644
357786
--- a/block/backup.c
357786
+++ b/block/backup.c
357786
@@ -45,6 +45,8 @@ typedef struct BackupBlockJob {
357786
     QLIST_HEAD(, CowRequest) inflight_reqs;
357786
 
357786
     HBitmap *copy_bitmap;
357786
+    bool use_copy_range;
357786
+    int64_t copy_range_size;
357786
 } BackupBlockJob;
357786
 
357786
 static const BlockJobDriver backup_job_driver;
357786
@@ -86,19 +88,101 @@ static void cow_request_end(CowRequest *req)
357786
     qemu_co_queue_restart_all(&req->wait_queue);
357786
 }
357786
 
357786
+/* Copy range to target with a bounce buffer and return the bytes copied. If
357786
+ * error occured, return a negative error number */
357786
+static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
357786
+                                                      int64_t start,
357786
+                                                      int64_t end,
357786
+                                                      bool is_write_notifier,
357786
+                                                      bool *error_is_read,
357786
+                                                      void **bounce_buffer)
357786
+{
357786
+    int ret;
357786
+    struct iovec iov;
357786
+    QEMUIOVector qiov;
357786
+    BlockBackend *blk = job->common.blk;
357786
+    int nbytes;
357786
+
357786
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
357786
+    nbytes = MIN(job->cluster_size, job->len - start);
357786
+    if (!*bounce_buffer) {
357786
+        *bounce_buffer = blk_blockalign(blk, job->cluster_size);
357786
+    }
357786
+    iov.iov_base = *bounce_buffer;
357786
+    iov.iov_len = nbytes;
357786
+    qemu_iovec_init_external(&qiov, &iov, 1);
357786
+
357786
+    ret = blk_co_preadv(blk, start, qiov.size, &qiov,
357786
+                        is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
357786
+    if (ret < 0) {
357786
+        trace_backup_do_cow_read_fail(job, start, ret);
357786
+        if (error_is_read) {
357786
+            *error_is_read = true;
357786
+        }
357786
+        goto fail;
357786
+    }
357786
+
357786
+    if (qemu_iovec_is_zero(&qiov)) {
357786
+        ret = blk_co_pwrite_zeroes(job->target, start,
357786
+                                   qiov.size, BDRV_REQ_MAY_UNMAP);
357786
+    } else {
357786
+        ret = blk_co_pwritev(job->target, start,
357786
+                             qiov.size, &qiov,
357786
+                             job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
357786
+    }
357786
+    if (ret < 0) {
357786
+        trace_backup_do_cow_write_fail(job, start, ret);
357786
+        if (error_is_read) {
357786
+            *error_is_read = false;
357786
+        }
357786
+        goto fail;
357786
+    }
357786
+
357786
+    return nbytes;
357786
+fail:
357786
+    hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
357786
+    return ret;
357786
+
357786
+}
357786
+
357786
+/* Copy range to target and return the bytes copied. If error occured, return a
357786
+ * negative error number. */
357786
+static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
357786
+                                                int64_t start,
357786
+                                                int64_t end,
357786
+                                                bool is_write_notifier)
357786
+{
357786
+    int ret;
357786
+    int nr_clusters;
357786
+    BlockBackend *blk = job->common.blk;
357786
+    int nbytes;
357786
+
357786
+    assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
357786
+    nbytes = MIN(job->copy_range_size, end - start);
357786
+    nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
357786
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
357786
+                  nr_clusters);
357786
+    ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
357786
+                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
357786
+    if (ret < 0) {
357786
+        trace_backup_do_cow_copy_range_fail(job, start, ret);
357786
+        hbitmap_set(job->copy_bitmap, start / job->cluster_size,
357786
+                    nr_clusters);
357786
+        return ret;
357786
+    }
357786
+
357786
+    return nbytes;
357786
+}
357786
+
357786
 static int coroutine_fn backup_do_cow(BackupBlockJob *job,
357786
                                       int64_t offset, uint64_t bytes,
357786
                                       bool *error_is_read,
357786
                                       bool is_write_notifier)
357786
 {
357786
-    BlockBackend *blk = job->common.blk;
357786
     CowRequest cow_request;
357786
-    struct iovec iov;
357786
-    QEMUIOVector bounce_qiov;
357786
-    void *bounce_buffer = NULL;
357786
     int ret = 0;
357786
     int64_t start, end; /* bytes */
357786
-    int n; /* bytes */
357786
+    void *bounce_buffer = NULL;
357786
 
357786
     qemu_co_rwlock_rdlock(&job->flush_rwlock);
357786
 
357786
@@ -110,60 +194,38 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
357786
     wait_for_overlapping_requests(job, start, end);
357786
     cow_request_begin(&cow_request, job, start, end);
357786
 
357786
-    for (; start < end; start += job->cluster_size) {
357786
+    while (start < end) {
357786
         if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
357786
             trace_backup_do_cow_skip(job, start);
357786
+            start += job->cluster_size;
357786
             continue; /* already copied */
357786
         }
357786
-        hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
357786
 
357786
         trace_backup_do_cow_process(job, start);
357786
 
357786
-        n = MIN(job->cluster_size, job->len - start);
357786
-
357786
-        if (!bounce_buffer) {
357786
-            bounce_buffer = blk_blockalign(blk, job->cluster_size);
357786
-        }
357786
-        iov.iov_base = bounce_buffer;
357786
-        iov.iov_len = n;
357786
-        qemu_iovec_init_external(&bounce_qiov, &iov, 1);
357786
-
357786
-        ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
357786
-                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
357786
-        if (ret < 0) {
357786
-            trace_backup_do_cow_read_fail(job, start, ret);
357786
-            if (error_is_read) {
357786
-                *error_is_read = true;
357786
+        if (job->use_copy_range) {
357786
+            ret = backup_cow_with_offload(job, start, end, is_write_notifier);
357786
+            if (ret < 0) {
357786
+                job->use_copy_range = false;
357786
             }
357786
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
357786
-            goto out;
357786
         }
357786
-
357786
-        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
357786
-            ret = blk_co_pwrite_zeroes(job->target, start,
357786
-                                       bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
357786
-        } else {
357786
-            ret = blk_co_pwritev(job->target, start,
357786
-                                 bounce_qiov.size, &bounce_qiov,
357786
-                                 job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
357786
+        if (!job->use_copy_range) {
357786
+            ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
357786
+                                                error_is_read, &bounce_buffer);
357786
         }
357786
         if (ret < 0) {
357786
-            trace_backup_do_cow_write_fail(job, start, ret);
357786
-            if (error_is_read) {
357786
-                *error_is_read = false;
357786
-            }
357786
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
357786
-            goto out;
357786
+            break;
357786
         }
357786
 
357786
         /* Publish progress, guest I/O counts as progress too.  Note that the
357786
          * offset field is an opaque progress value, it is not a disk offset.
357786
          */
357786
-        job->bytes_read += n;
357786
-        job_progress_update(&job->common.job, n);
357786
+        start += ret;
357786
+        job->bytes_read += ret;
357786
+        job_progress_update(&job->common.job, ret);
357786
+        ret = 0;
357786
     }
357786
 
357786
-out:
357786
     if (bounce_buffer) {
357786
         qemu_vfree(bounce_buffer);
357786
     }
357786
@@ -665,6 +727,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
357786
     } else {
357786
         job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
357786
     }
357786
+    job->use_copy_range = true;
357786
+    job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
357786
+                                        blk_get_max_transfer(job->target));
357786
+    job->copy_range_size = MAX(job->cluster_size,
357786
+                               QEMU_ALIGN_UP(job->copy_range_size,
357786
+                                             job->cluster_size));
357786
 
357786
     /* Required permissions are already taken with target's blk_new() */
357786
     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
357786
diff --git a/block/trace-events b/block/trace-events
357786
index 2d59b53..c35287b 100644
357786
--- a/block/trace-events
357786
+++ b/block/trace-events
357786
@@ -42,6 +42,7 @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
357786
 backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
357786
 backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
357786
 backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
357786
+backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
357786
 
357786
 # blockdev.c
357786
 qmp_block_job_cancel(void *job) "job %p"
357786
-- 
357786
1.8.3.1
357786