26ba25
From 1df22a2aa1fc70f0b2b268bbe0c184d95ce74b04 Mon Sep 17 00:00:00 2001
26ba25
From: John Snow <jsnow@redhat.com>
26ba25
Date: Wed, 18 Jul 2018 22:54:52 +0200
26ba25
Subject: [PATCH 234/268] backup: Use copy offloading
26ba25
26ba25
RH-Author: John Snow <jsnow@redhat.com>
26ba25
Message-id: <20180718225511.14878-17-jsnow@redhat.com>
26ba25
Patchwork-id: 81399
26ba25
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 16/35] backup: Use copy offloading
26ba25
Bugzilla: 1207657
26ba25
RH-Acked-by: Eric Blake <eblake@redhat.com>
26ba25
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
26ba25
RH-Acked-by: Fam Zheng <famz@redhat.com>
26ba25
26ba25
From: Fam Zheng <famz@redhat.com>
26ba25
26ba25
The implementation is similar to the 'qemu-img convert'. In the
26ba25
beginning of the job, offloaded copy is attempted. If it fails, further
26ba25
I/O will go through the existing bounce buffer code path.
26ba25
26ba25
Then, as Kevin pointed out, both this and qemu-img convert can benefit
26ba25
from a local check if one request fails because of, for example, the
26ba25
offset is beyond EOF, but another may well be accepted by the protocol
26ba25
layer. This will be implemented separately.
26ba25
26ba25
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
26ba25
Signed-off-by: Fam Zheng <famz@redhat.com>
26ba25
Message-id: 20180703023758.14422-4-famz@redhat.com
26ba25
Signed-off-by: Jeff Cody <jcody@redhat.com>
26ba25
(cherry picked from commit 9ded4a0114968e98b41494fc035ba14f84cdf700)
26ba25
Signed-off-by: John Snow <jsnow@redhat.com>
26ba25
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
26ba25
---
26ba25
 block/backup.c     | 150 ++++++++++++++++++++++++++++++++++++++---------------
26ba25
 block/trace-events |   1 +
26ba25
 2 files changed, 110 insertions(+), 41 deletions(-)
26ba25
26ba25
diff --git a/block/backup.c b/block/backup.c
26ba25
index 5661435..d26eeb5 100644
26ba25
--- a/block/backup.c
26ba25
+++ b/block/backup.c
26ba25
@@ -45,6 +45,8 @@ typedef struct BackupBlockJob {
26ba25
     QLIST_HEAD(, CowRequest) inflight_reqs;
26ba25
 
26ba25
     HBitmap *copy_bitmap;
26ba25
+    bool use_copy_range;
26ba25
+    int64_t copy_range_size;
26ba25
 } BackupBlockJob;
26ba25
 
26ba25
 static const BlockJobDriver backup_job_driver;
26ba25
@@ -86,19 +88,101 @@ static void cow_request_end(CowRequest *req)
26ba25
     qemu_co_queue_restart_all(&req->wait_queue);
26ba25
 }
26ba25
 
26ba25
+/* Copy range to target with a bounce buffer and return the bytes copied. If
26ba25
+ * error occured, return a negative error number */
26ba25
+static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
26ba25
+                                                      int64_t start,
26ba25
+                                                      int64_t end,
26ba25
+                                                      bool is_write_notifier,
26ba25
+                                                      bool *error_is_read,
26ba25
+                                                      void **bounce_buffer)
26ba25
+{
26ba25
+    int ret;
26ba25
+    struct iovec iov;
26ba25
+    QEMUIOVector qiov;
26ba25
+    BlockBackend *blk = job->common.blk;
26ba25
+    int nbytes;
26ba25
+
26ba25
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
26ba25
+    nbytes = MIN(job->cluster_size, job->len - start);
26ba25
+    if (!*bounce_buffer) {
26ba25
+        *bounce_buffer = blk_blockalign(blk, job->cluster_size);
26ba25
+    }
26ba25
+    iov.iov_base = *bounce_buffer;
26ba25
+    iov.iov_len = nbytes;
26ba25
+    qemu_iovec_init_external(&qiov, &iov, 1);
26ba25
+
26ba25
+    ret = blk_co_preadv(blk, start, qiov.size, &qiov,
26ba25
+                        is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
26ba25
+    if (ret < 0) {
26ba25
+        trace_backup_do_cow_read_fail(job, start, ret);
26ba25
+        if (error_is_read) {
26ba25
+            *error_is_read = true;
26ba25
+        }
26ba25
+        goto fail;
26ba25
+    }
26ba25
+
26ba25
+    if (qemu_iovec_is_zero(&qiov)) {
26ba25
+        ret = blk_co_pwrite_zeroes(job->target, start,
26ba25
+                                   qiov.size, BDRV_REQ_MAY_UNMAP);
26ba25
+    } else {
26ba25
+        ret = blk_co_pwritev(job->target, start,
26ba25
+                             qiov.size, &qiov,
26ba25
+                             job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
26ba25
+    }
26ba25
+    if (ret < 0) {
26ba25
+        trace_backup_do_cow_write_fail(job, start, ret);
26ba25
+        if (error_is_read) {
26ba25
+            *error_is_read = false;
26ba25
+        }
26ba25
+        goto fail;
26ba25
+    }
26ba25
+
26ba25
+    return nbytes;
26ba25
+fail:
26ba25
+    hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
26ba25
+    return ret;
26ba25
+
26ba25
+}
26ba25
+
26ba25
+/* Copy range to target and return the bytes copied. If error occured, return a
26ba25
+ * negative error number. */
26ba25
+static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
26ba25
+                                                int64_t start,
26ba25
+                                                int64_t end,
26ba25
+                                                bool is_write_notifier)
26ba25
+{
26ba25
+    int ret;
26ba25
+    int nr_clusters;
26ba25
+    BlockBackend *blk = job->common.blk;
26ba25
+    int nbytes;
26ba25
+
26ba25
+    assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
26ba25
+    nbytes = MIN(job->copy_range_size, end - start);
26ba25
+    nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
26ba25
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
26ba25
+                  nr_clusters);
26ba25
+    ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
26ba25
+                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
26ba25
+    if (ret < 0) {
26ba25
+        trace_backup_do_cow_copy_range_fail(job, start, ret);
26ba25
+        hbitmap_set(job->copy_bitmap, start / job->cluster_size,
26ba25
+                    nr_clusters);
26ba25
+        return ret;
26ba25
+    }
26ba25
+
26ba25
+    return nbytes;
26ba25
+}
26ba25
+
26ba25
 static int coroutine_fn backup_do_cow(BackupBlockJob *job,
26ba25
                                       int64_t offset, uint64_t bytes,
26ba25
                                       bool *error_is_read,
26ba25
                                       bool is_write_notifier)
26ba25
 {
26ba25
-    BlockBackend *blk = job->common.blk;
26ba25
     CowRequest cow_request;
26ba25
-    struct iovec iov;
26ba25
-    QEMUIOVector bounce_qiov;
26ba25
-    void *bounce_buffer = NULL;
26ba25
     int ret = 0;
26ba25
     int64_t start, end; /* bytes */
26ba25
-    int n; /* bytes */
26ba25
+    void *bounce_buffer = NULL;
26ba25
 
26ba25
     qemu_co_rwlock_rdlock(&job->flush_rwlock);
26ba25
 
26ba25
@@ -110,60 +194,38 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
26ba25
     wait_for_overlapping_requests(job, start, end);
26ba25
     cow_request_begin(&cow_request, job, start, end);
26ba25
 
26ba25
-    for (; start < end; start += job->cluster_size) {
26ba25
+    while (start < end) {
26ba25
         if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
26ba25
             trace_backup_do_cow_skip(job, start);
26ba25
+            start += job->cluster_size;
26ba25
             continue; /* already copied */
26ba25
         }
26ba25
-        hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
26ba25
 
26ba25
         trace_backup_do_cow_process(job, start);
26ba25
 
26ba25
-        n = MIN(job->cluster_size, job->len - start);
26ba25
-
26ba25
-        if (!bounce_buffer) {
26ba25
-            bounce_buffer = blk_blockalign(blk, job->cluster_size);
26ba25
-        }
26ba25
-        iov.iov_base = bounce_buffer;
26ba25
-        iov.iov_len = n;
26ba25
-        qemu_iovec_init_external(&bounce_qiov, &iov, 1);
26ba25
-
26ba25
-        ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
26ba25
-                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
26ba25
-        if (ret < 0) {
26ba25
-            trace_backup_do_cow_read_fail(job, start, ret);
26ba25
-            if (error_is_read) {
26ba25
-                *error_is_read = true;
26ba25
+        if (job->use_copy_range) {
26ba25
+            ret = backup_cow_with_offload(job, start, end, is_write_notifier);
26ba25
+            if (ret < 0) {
26ba25
+                job->use_copy_range = false;
26ba25
             }
26ba25
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
26ba25
-            goto out;
26ba25
         }
26ba25
-
26ba25
-        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
26ba25
-            ret = blk_co_pwrite_zeroes(job->target, start,
26ba25
-                                       bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
26ba25
-        } else {
26ba25
-            ret = blk_co_pwritev(job->target, start,
26ba25
-                                 bounce_qiov.size, &bounce_qiov,
26ba25
-                                 job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
26ba25
+        if (!job->use_copy_range) {
26ba25
+            ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
26ba25
+                                                error_is_read, &bounce_buffer);
26ba25
         }
26ba25
         if (ret < 0) {
26ba25
-            trace_backup_do_cow_write_fail(job, start, ret);
26ba25
-            if (error_is_read) {
26ba25
-                *error_is_read = false;
26ba25
-            }
26ba25
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
26ba25
-            goto out;
26ba25
+            break;
26ba25
         }
26ba25
 
26ba25
         /* Publish progress, guest I/O counts as progress too.  Note that the
26ba25
          * offset field is an opaque progress value, it is not a disk offset.
26ba25
          */
26ba25
-        job->bytes_read += n;
26ba25
-        job_progress_update(&job->common.job, n);
26ba25
+        start += ret;
26ba25
+        job->bytes_read += ret;
26ba25
+        job_progress_update(&job->common.job, ret);
26ba25
+        ret = 0;
26ba25
     }
26ba25
 
26ba25
-out:
26ba25
     if (bounce_buffer) {
26ba25
         qemu_vfree(bounce_buffer);
26ba25
     }
26ba25
@@ -665,6 +727,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
26ba25
     } else {
26ba25
         job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
26ba25
     }
26ba25
+    job->use_copy_range = true;
26ba25
+    job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
26ba25
+                                        blk_get_max_transfer(job->target));
26ba25
+    job->copy_range_size = MAX(job->cluster_size,
26ba25
+                               QEMU_ALIGN_UP(job->copy_range_size,
26ba25
+                                             job->cluster_size));
26ba25
 
26ba25
     /* Required permissions are already taken with target's blk_new() */
26ba25
     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
26ba25
diff --git a/block/trace-events b/block/trace-events
26ba25
index 2d59b53..c35287b 100644
26ba25
--- a/block/trace-events
26ba25
+++ b/block/trace-events
26ba25
@@ -42,6 +42,7 @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
26ba25
 backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
26ba25
 backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
26ba25
 backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
26ba25
+backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
26ba25
 
26ba25
 # blockdev.c
26ba25
 qmp_block_job_cancel(void *job) "job %p"
26ba25
-- 
26ba25
1.8.3.1
26ba25