ae23c9
From 1df22a2aa1fc70f0b2b268bbe0c184d95ce74b04 Mon Sep 17 00:00:00 2001
ae23c9
From: John Snow <jsnow@redhat.com>
ae23c9
Date: Wed, 18 Jul 2018 22:54:52 +0200
ae23c9
Subject: [PATCH 234/268] backup: Use copy offloading
ae23c9
ae23c9
RH-Author: John Snow <jsnow@redhat.com>
ae23c9
Message-id: <20180718225511.14878-17-jsnow@redhat.com>
ae23c9
Patchwork-id: 81399
ae23c9
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 16/35] backup: Use copy offloading
ae23c9
Bugzilla: 1207657
ae23c9
RH-Acked-by: Eric Blake <eblake@redhat.com>
ae23c9
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
ae23c9
RH-Acked-by: Fam Zheng <famz@redhat.com>
ae23c9
ae23c9
From: Fam Zheng <famz@redhat.com>
ae23c9
ae23c9
The implementation is similar to the 'qemu-img convert'. In the
ae23c9
beginning of the job, offloaded copy is attempted. If it fails, further
ae23c9
I/O will go through the existing bounce buffer code path.
ae23c9
ae23c9
Then, as Kevin pointed out, both this and qemu-img convert can benefit
ae23c9
from a local check if one request fails because of, for example, the
ae23c9
offset is beyond EOF, but another may well be accepted by the protocol
ae23c9
layer. This will be implemented separately.
ae23c9
ae23c9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
ae23c9
Signed-off-by: Fam Zheng <famz@redhat.com>
ae23c9
Message-id: 20180703023758.14422-4-famz@redhat.com
ae23c9
Signed-off-by: Jeff Cody <jcody@redhat.com>
ae23c9
(cherry picked from commit 9ded4a0114968e98b41494fc035ba14f84cdf700)
ae23c9
Signed-off-by: John Snow <jsnow@redhat.com>
ae23c9
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
ae23c9
---
ae23c9
 block/backup.c     | 150 ++++++++++++++++++++++++++++++++++++++---------------
ae23c9
 block/trace-events |   1 +
ae23c9
 2 files changed, 110 insertions(+), 41 deletions(-)
ae23c9
ae23c9
diff --git a/block/backup.c b/block/backup.c
ae23c9
index 5661435..d26eeb5 100644
ae23c9
--- a/block/backup.c
ae23c9
+++ b/block/backup.c
ae23c9
@@ -45,6 +45,8 @@ typedef struct BackupBlockJob {
ae23c9
     QLIST_HEAD(, CowRequest) inflight_reqs;
ae23c9
 
ae23c9
     HBitmap *copy_bitmap;
ae23c9
+    bool use_copy_range;
ae23c9
+    int64_t copy_range_size;
ae23c9
 } BackupBlockJob;
ae23c9
 
ae23c9
 static const BlockJobDriver backup_job_driver;
ae23c9
@@ -86,19 +88,101 @@ static void cow_request_end(CowRequest *req)
ae23c9
     qemu_co_queue_restart_all(&req->wait_queue);
ae23c9
 }
ae23c9
 
ae23c9
+/* Copy range to target with a bounce buffer and return the bytes copied. If
ae23c9
+ * error occured, return a negative error number */
ae23c9
+static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
ae23c9
+                                                      int64_t start,
ae23c9
+                                                      int64_t end,
ae23c9
+                                                      bool is_write_notifier,
ae23c9
+                                                      bool *error_is_read,
ae23c9
+                                                      void **bounce_buffer)
ae23c9
+{
ae23c9
+    int ret;
ae23c9
+    struct iovec iov;
ae23c9
+    QEMUIOVector qiov;
ae23c9
+    BlockBackend *blk = job->common.blk;
ae23c9
+    int nbytes;
ae23c9
+
ae23c9
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
ae23c9
+    nbytes = MIN(job->cluster_size, job->len - start);
ae23c9
+    if (!*bounce_buffer) {
ae23c9
+        *bounce_buffer = blk_blockalign(blk, job->cluster_size);
ae23c9
+    }
ae23c9
+    iov.iov_base = *bounce_buffer;
ae23c9
+    iov.iov_len = nbytes;
ae23c9
+    qemu_iovec_init_external(&qiov, &iov, 1);
ae23c9
+
ae23c9
+    ret = blk_co_preadv(blk, start, qiov.size, &qiov,
ae23c9
+                        is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
ae23c9
+    if (ret < 0) {
ae23c9
+        trace_backup_do_cow_read_fail(job, start, ret);
ae23c9
+        if (error_is_read) {
ae23c9
+            *error_is_read = true;
ae23c9
+        }
ae23c9
+        goto fail;
ae23c9
+    }
ae23c9
+
ae23c9
+    if (qemu_iovec_is_zero(&qiov)) {
ae23c9
+        ret = blk_co_pwrite_zeroes(job->target, start,
ae23c9
+                                   qiov.size, BDRV_REQ_MAY_UNMAP);
ae23c9
+    } else {
ae23c9
+        ret = blk_co_pwritev(job->target, start,
ae23c9
+                             qiov.size, &qiov,
ae23c9
+                             job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
ae23c9
+    }
ae23c9
+    if (ret < 0) {
ae23c9
+        trace_backup_do_cow_write_fail(job, start, ret);
ae23c9
+        if (error_is_read) {
ae23c9
+            *error_is_read = false;
ae23c9
+        }
ae23c9
+        goto fail;
ae23c9
+    }
ae23c9
+
ae23c9
+    return nbytes;
ae23c9
+fail:
ae23c9
+    hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
ae23c9
+    return ret;
ae23c9
+
ae23c9
+}
ae23c9
+
ae23c9
+/* Copy range to target and return the bytes copied. If error occured, return a
ae23c9
+ * negative error number. */
ae23c9
+static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
ae23c9
+                                                int64_t start,
ae23c9
+                                                int64_t end,
ae23c9
+                                                bool is_write_notifier)
ae23c9
+{
ae23c9
+    int ret;
ae23c9
+    int nr_clusters;
ae23c9
+    BlockBackend *blk = job->common.blk;
ae23c9
+    int nbytes;
ae23c9
+
ae23c9
+    assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
ae23c9
+    nbytes = MIN(job->copy_range_size, end - start);
ae23c9
+    nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
ae23c9
+    hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
ae23c9
+                  nr_clusters);
ae23c9
+    ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
ae23c9
+                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
ae23c9
+    if (ret < 0) {
ae23c9
+        trace_backup_do_cow_copy_range_fail(job, start, ret);
ae23c9
+        hbitmap_set(job->copy_bitmap, start / job->cluster_size,
ae23c9
+                    nr_clusters);
ae23c9
+        return ret;
ae23c9
+    }
ae23c9
+
ae23c9
+    return nbytes;
ae23c9
+}
ae23c9
+
ae23c9
 static int coroutine_fn backup_do_cow(BackupBlockJob *job,
ae23c9
                                       int64_t offset, uint64_t bytes,
ae23c9
                                       bool *error_is_read,
ae23c9
                                       bool is_write_notifier)
ae23c9
 {
ae23c9
-    BlockBackend *blk = job->common.blk;
ae23c9
     CowRequest cow_request;
ae23c9
-    struct iovec iov;
ae23c9
-    QEMUIOVector bounce_qiov;
ae23c9
-    void *bounce_buffer = NULL;
ae23c9
     int ret = 0;
ae23c9
     int64_t start, end; /* bytes */
ae23c9
-    int n; /* bytes */
ae23c9
+    void *bounce_buffer = NULL;
ae23c9
 
ae23c9
     qemu_co_rwlock_rdlock(&job->flush_rwlock);
ae23c9
 
ae23c9
@@ -110,60 +194,38 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
ae23c9
     wait_for_overlapping_requests(job, start, end);
ae23c9
     cow_request_begin(&cow_request, job, start, end);
ae23c9
 
ae23c9
-    for (; start < end; start += job->cluster_size) {
ae23c9
+    while (start < end) {
ae23c9
         if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
ae23c9
             trace_backup_do_cow_skip(job, start);
ae23c9
+            start += job->cluster_size;
ae23c9
             continue; /* already copied */
ae23c9
         }
ae23c9
-        hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
ae23c9
 
ae23c9
         trace_backup_do_cow_process(job, start);
ae23c9
 
ae23c9
-        n = MIN(job->cluster_size, job->len - start);
ae23c9
-
ae23c9
-        if (!bounce_buffer) {
ae23c9
-            bounce_buffer = blk_blockalign(blk, job->cluster_size);
ae23c9
-        }
ae23c9
-        iov.iov_base = bounce_buffer;
ae23c9
-        iov.iov_len = n;
ae23c9
-        qemu_iovec_init_external(&bounce_qiov, &iov, 1);
ae23c9
-
ae23c9
-        ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
ae23c9
-                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
ae23c9
-        if (ret < 0) {
ae23c9
-            trace_backup_do_cow_read_fail(job, start, ret);
ae23c9
-            if (error_is_read) {
ae23c9
-                *error_is_read = true;
ae23c9
+        if (job->use_copy_range) {
ae23c9
+            ret = backup_cow_with_offload(job, start, end, is_write_notifier);
ae23c9
+            if (ret < 0) {
ae23c9
+                job->use_copy_range = false;
ae23c9
             }
ae23c9
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
ae23c9
-            goto out;
ae23c9
         }
ae23c9
-
ae23c9
-        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
ae23c9
-            ret = blk_co_pwrite_zeroes(job->target, start,
ae23c9
-                                       bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
ae23c9
-        } else {
ae23c9
-            ret = blk_co_pwritev(job->target, start,
ae23c9
-                                 bounce_qiov.size, &bounce_qiov,
ae23c9
-                                 job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
ae23c9
+        if (!job->use_copy_range) {
ae23c9
+            ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
ae23c9
+                                                error_is_read, &bounce_buffer);
ae23c9
         }
ae23c9
         if (ret < 0) {
ae23c9
-            trace_backup_do_cow_write_fail(job, start, ret);
ae23c9
-            if (error_is_read) {
ae23c9
-                *error_is_read = false;
ae23c9
-            }
ae23c9
-            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
ae23c9
-            goto out;
ae23c9
+            break;
ae23c9
         }
ae23c9
 
ae23c9
         /* Publish progress, guest I/O counts as progress too.  Note that the
ae23c9
          * offset field is an opaque progress value, it is not a disk offset.
ae23c9
          */
ae23c9
-        job->bytes_read += n;
ae23c9
-        job_progress_update(&job->common.job, n);
ae23c9
+        start += ret;
ae23c9
+        job->bytes_read += ret;
ae23c9
+        job_progress_update(&job->common.job, ret);
ae23c9
+        ret = 0;
ae23c9
     }
ae23c9
 
ae23c9
-out:
ae23c9
     if (bounce_buffer) {
ae23c9
         qemu_vfree(bounce_buffer);
ae23c9
     }
ae23c9
@@ -665,6 +727,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
ae23c9
     } else {
ae23c9
         job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
ae23c9
     }
ae23c9
+    job->use_copy_range = true;
ae23c9
+    job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
ae23c9
+                                        blk_get_max_transfer(job->target));
ae23c9
+    job->copy_range_size = MAX(job->cluster_size,
ae23c9
+                               QEMU_ALIGN_UP(job->copy_range_size,
ae23c9
+                                             job->cluster_size));
ae23c9
 
ae23c9
     /* Required permissions are already taken with target's blk_new() */
ae23c9
     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
ae23c9
diff --git a/block/trace-events b/block/trace-events
ae23c9
index 2d59b53..c35287b 100644
ae23c9
--- a/block/trace-events
ae23c9
+++ b/block/trace-events
ae23c9
@@ -42,6 +42,7 @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
ae23c9
 backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
ae23c9
 backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
ae23c9
 backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
ae23c9
+backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
ae23c9
 
ae23c9
 # blockdev.c
ae23c9
 qmp_block_job_cancel(void *job) "job %p"
ae23c9
-- 
ae23c9
1.8.3.1
ae23c9