Blame SOURCES/kvm-backup-Use-copy-offloading.patch

1bdc94
From 6e2070840430f3009b201976b0a10a9d627adfed Mon Sep 17 00:00:00 2001
1bdc94
From: John Snow <jsnow@redhat.com>
1bdc94
Date: Wed, 18 Jul 2018 22:54:52 +0200
1bdc94
Subject: [PATCH 67/89] backup: Use copy offloading
1bdc94
1bdc94
RH-Author: John Snow <jsnow@redhat.com>
1bdc94
Message-id: <20180718225511.14878-17-jsnow@redhat.com>
1bdc94
Patchwork-id: 81399
1bdc94
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 16/35] backup: Use copy offloading
1bdc94
Bugzilla: 1207657
1bdc94
RH-Acked-by: Eric Blake <eblake@redhat.com>
1bdc94
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
1bdc94
RH-Acked-by: Fam Zheng <famz@redhat.com>
1bdc94
1bdc94
From: Fam Zheng <famz@redhat.com>
1bdc94
1bdc94
The implementation is similar to the 'qemu-img convert'. In the
1bdc94
beginning of the job, offloaded copy is attempted. If it fails, further
1bdc94
I/O will go through the existing bounce buffer code path.
1bdc94
1bdc94
Then, as Kevin pointed out, both this and qemu-img convert can benefit
1bdc94
from a local check if one request fails because of, for example, the
1bdc94
offset is beyond EOF, but another may well be accepted by the protocol
1bdc94
layer. This will be implemented separately.
1bdc94
1bdc94
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
1bdc94
Signed-off-by: Fam Zheng <famz@redhat.com>
1bdc94
Message-id: 20180703023758.14422-4-famz@redhat.com
1bdc94
Signed-off-by: Jeff Cody <jcody@redhat.com>
1bdc94
(cherry picked from commit 9ded4a0114968e98b41494fc035ba14f84cdf700)
1bdc94
Signed-off-by: John Snow <jsnow@redhat.com>
1bdc94
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
1bdc94
---
1bdc94
 block/backup.c     | 150 ++++++++++++++++++++++++++++++++++++++---------------
1bdc94
 block/trace-events |   1 +
1bdc94
 2 files changed, 110 insertions(+), 41 deletions(-)
1bdc94
1bdc94
diff --git a/block/backup.c b/block/backup.c
1bdc94
index 5661435..d26eeb5 100644
1bdc94
--- a/block/backup.c
1bdc94
+++ b/block/backup.c
1bdc94
@@ -45,6 +45,8 @@ typedef struct BackupBlockJob {
1bdc94
     QLIST_HEAD(, CowRequest) inflight_reqs;
1bdc94
 
1bdc94
     HBitmap *copy_bitmap;
1bdc94
+    bool use_copy_range;
1bdc94
+    int64_t copy_range_size;
1bdc94
 } BackupBlockJob;
1bdc94
 
1bdc94
 static const BlockJobDriver backup_job_driver;
1bdc94
@@ -86,19 +88,101 @@ static void cow_request_end(CowRequest *req)
1bdc94
     qemu_co_queue_restart_all(&req->wait_queue);
1bdc94
 }
1bdc94
 
1bdc94
+/* Copy range to target with a bounce buffer and return the bytes copied. If
1bdc94
+ * error occured, return a negative error number */
1bdc94
+static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
1bdc94
+                                                      int64_t start,
1bdc94
+                                                      int64_t end,
1bdc94
+                                                      bool is_write_notifier,
1bdc94
+                                                      bool *error_is_read,
1bdc94
+                                                      void **bounce_buffer)
1bdc94
+{
1bdc94
+    int ret;
1bdc94
+    struct iovec iov;
1bdc94
+    QEMUIOVector qiov;
1bdc94
+    BlockBackend *blk = job->common.blk;
1bdc94
+    int nbytes;
1bdc94
+
1bdc94
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
1bdc94
+    nbytes = MIN(job->cluster_size, job->len - start);
1bdc94
+    if (!*bounce_buffer) {
1bdc94
+        *bounce_buffer = blk_blockalign(blk, job->cluster_size);
1bdc94
+    }
1bdc94
+    iov.iov_base = *bounce_buffer;
1bdc94
+    iov.iov_len = nbytes;
1bdc94
+    qemu_iovec_init_external(&qiov, &iov, 1);
1bdc94
+
1bdc94
+    ret = blk_co_preadv(blk, start, qiov.size, &qiov,
1bdc94
+                        is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
1bdc94
+    if (ret < 0) {
1bdc94
+        trace_backup_do_cow_read_fail(job, start, ret);
1bdc94
+        if (error_is_read) {
1bdc94
+            *error_is_read = true;
1bdc94
+        }
1bdc94
+        goto fail;
1bdc94
+    }
1bdc94
+
1bdc94
+    if (qemu_iovec_is_zero(&qiov)) {
1bdc94
+        ret = blk_co_pwrite_zeroes(job->target, start,
1bdc94
+                                   qiov.size, BDRV_REQ_MAY_UNMAP);
1bdc94
+    } else {
1bdc94
+        ret = blk_co_pwritev(job->target, start,
1bdc94
+                             qiov.size, &qiov,
1bdc94
+                             job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
1bdc94
+    }
1bdc94
+    if (ret < 0) {
1bdc94
+        trace_backup_do_cow_write_fail(job, start, ret);
1bdc94
+        if (error_is_read) {
1bdc94
+            *error_is_read = false;
1bdc94
+        }
1bdc94
+        goto fail;
1bdc94
+    }
1bdc94
+
1bdc94
+    return nbytes;
1bdc94
+fail:
1bdc94
+    hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
1bdc94
+    return ret;
1bdc94
+
1bdc94
+}
1bdc94
+
1bdc94
+/* Copy range to target and return the bytes copied. If error occured, return a
1bdc94
+ * negative error number. */
1bdc94
+static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
1bdc94
+                                                int64_t start,
1bdc94
+                                                int64_t end,
1bdc94
+                                                bool is_write_notifier)
1bdc94
+{
1bdc94
+    int ret;
1bdc94
+    int nr_clusters;
1bdc94
+    BlockBackend *blk = job->common.blk;
1bdc94
+    int nbytes;
1bdc94
+
1bdc94
+    assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
1bdc94
+    nbytes = MIN(job->copy_range_size, end - start);
1bdc94
+    nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
1bdc94
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
1bdc94
+                  nr_clusters);
1bdc94
+    ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
1bdc94
+                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
1bdc94
+    if (ret < 0) {
1bdc94
+        trace_backup_do_cow_copy_range_fail(job, start, ret);
1bdc94
+        hbitmap_set(job->copy_bitmap, start / job->cluster_size,
1bdc94
+                    nr_clusters);
1bdc94
+        return ret;
1bdc94
+    }
1bdc94
+
1bdc94
+    return nbytes;
1bdc94
+}
1bdc94
+
1bdc94
 static int coroutine_fn backup_do_cow(BackupBlockJob *job,
1bdc94
                                       int64_t offset, uint64_t bytes,
1bdc94
                                       bool *error_is_read,
1bdc94
                                       bool is_write_notifier)
1bdc94
 {
1bdc94
-    BlockBackend *blk = job->common.blk;
1bdc94
     CowRequest cow_request;
1bdc94
-    struct iovec iov;
1bdc94
-    QEMUIOVector bounce_qiov;
1bdc94
-    void *bounce_buffer = NULL;
1bdc94
     int ret = 0;
1bdc94
     int64_t start, end; /* bytes */
1bdc94
-    int n; /* bytes */
1bdc94
+    void *bounce_buffer = NULL;
1bdc94
 
1bdc94
     qemu_co_rwlock_rdlock(&job->flush_rwlock);
1bdc94
 
1bdc94
@@ -110,60 +194,38 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
1bdc94
     wait_for_overlapping_requests(job, start, end);
1bdc94
     cow_request_begin(&cow_request, job, start, end);
1bdc94
 
1bdc94
-    for (; start < end; start += job->cluster_size) {
1bdc94
+    while (start < end) {
1bdc94
         if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
1bdc94
             trace_backup_do_cow_skip(job, start);
1bdc94
+            start += job->cluster_size;
1bdc94
             continue; /* already copied */
1bdc94
         }
1bdc94
-        hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
1bdc94
 
1bdc94
         trace_backup_do_cow_process(job, start);
1bdc94
 
1bdc94
-        n = MIN(job->cluster_size, job->len - start);
1bdc94
-
1bdc94
-        if (!bounce_buffer) {
1bdc94
-            bounce_buffer = blk_blockalign(blk, job->cluster_size);
1bdc94
-        }
1bdc94
-        iov.iov_base = bounce_buffer;
1bdc94
-        iov.iov_len = n;
1bdc94
-        qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1bdc94
-
1bdc94
-        ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
1bdc94
-                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
1bdc94
-        if (ret < 0) {
1bdc94
-            trace_backup_do_cow_read_fail(job, start, ret);
1bdc94
-            if (error_is_read) {
1bdc94
-                *error_is_read = true;
1bdc94
+        if (job->use_copy_range) {
1bdc94
+            ret = backup_cow_with_offload(job, start, end, is_write_notifier);
1bdc94
+            if (ret < 0) {
1bdc94
+                job->use_copy_range = false;
1bdc94
             }
1bdc94
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
1bdc94
-            goto out;
1bdc94
         }
1bdc94
-
1bdc94
-        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
1bdc94
-            ret = blk_co_pwrite_zeroes(job->target, start,
1bdc94
-                                       bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
1bdc94
-        } else {
1bdc94
-            ret = blk_co_pwritev(job->target, start,
1bdc94
-                                 bounce_qiov.size, &bounce_qiov,
1bdc94
-                                 job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
1bdc94
+        if (!job->use_copy_range) {
1bdc94
+            ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
1bdc94
+                                                error_is_read, &bounce_buffer);
1bdc94
         }
1bdc94
         if (ret < 0) {
1bdc94
-            trace_backup_do_cow_write_fail(job, start, ret);
1bdc94
-            if (error_is_read) {
1bdc94
-                *error_is_read = false;
1bdc94
-            }
1bdc94
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
1bdc94
-            goto out;
1bdc94
+            break;
1bdc94
         }
1bdc94
 
1bdc94
         /* Publish progress, guest I/O counts as progress too.  Note that the
1bdc94
          * offset field is an opaque progress value, it is not a disk offset.
1bdc94
          */
1bdc94
-        job->bytes_read += n;
1bdc94
-        job_progress_update(&job->common.job, n);
1bdc94
+        start += ret;
1bdc94
+        job->bytes_read += ret;
1bdc94
+        job_progress_update(&job->common.job, ret);
1bdc94
+        ret = 0;
1bdc94
     }
1bdc94
 
1bdc94
-out:
1bdc94
     if (bounce_buffer) {
1bdc94
         qemu_vfree(bounce_buffer);
1bdc94
     }
1bdc94
@@ -665,6 +727,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
1bdc94
     } else {
1bdc94
         job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
1bdc94
     }
1bdc94
+    job->use_copy_range = true;
1bdc94
+    job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
1bdc94
+                                        blk_get_max_transfer(job->target));
1bdc94
+    job->copy_range_size = MAX(job->cluster_size,
1bdc94
+                               QEMU_ALIGN_UP(job->copy_range_size,
1bdc94
+                                             job->cluster_size));
1bdc94
 
1bdc94
     /* Required permissions are already taken with target's blk_new() */
1bdc94
     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
1bdc94
diff --git a/block/trace-events b/block/trace-events
1bdc94
index 2d59b53..c35287b 100644
1bdc94
--- a/block/trace-events
1bdc94
+++ b/block/trace-events
1bdc94
@@ -42,6 +42,7 @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
1bdc94
 backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
1bdc94
 backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
1bdc94
 backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
1bdc94
+backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
1bdc94
 
1bdc94
 # blockdev.c
1bdc94
 qmp_block_job_cancel(void *job) "job %p"
1bdc94
-- 
1bdc94
1.8.3.1
1bdc94