Blame SOURCES/kvm-qcow2-Implement-copy-offloading.patch

1bdc94
From 3650b202ee9499c9c234eb05d296eb4d35f52f34 Mon Sep 17 00:00:00 2001
1bdc94
From: Fam Zheng <famz@redhat.com>
1bdc94
Date: Fri, 29 Jun 2018 06:11:44 +0200
1bdc94
Subject: [PATCH 40/57] qcow2: Implement copy offloading
1bdc94
1bdc94
RH-Author: Fam Zheng <famz@redhat.com>
1bdc94
Message-id: <20180629061153.12687-5-famz@redhat.com>
1bdc94
Patchwork-id: 81154
1bdc94
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH v2 04/13] qcow2: Implement copy offloading
1bdc94
Bugzilla: 1482537
1bdc94
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
1bdc94
RH-Acked-by: Max Reitz <mreitz@redhat.com>
1bdc94
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
1bdc94
1bdc94
The two callbacks are implemented quite similarly to the read/write
1bdc94
functions: bdrv_co_copy_range_from maps for read and calls into bs->file
1bdc94
or bs->backing depending on the allocation status; bdrv_co_copy_range_to
1bdc94
maps for write and calls into bs->file.
1bdc94
1bdc94
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
1bdc94
Signed-off-by: Fam Zheng <famz@redhat.com>
1bdc94
Message-id: 20180601092648.24614-5-famz@redhat.com
1bdc94
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
1bdc94
(cherry picked from commit fd9fcd37a8645efe322956d94f76e90135522a16)
1bdc94
Signed-off-by: Fam Zheng <famz@redhat.com>
1bdc94
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
1bdc94
---
1bdc94
 block/qcow2.c | 229 ++++++++++++++++++++++++++++++++++++++++++++++++++--------
1bdc94
 1 file changed, 199 insertions(+), 30 deletions(-)
1bdc94
1bdc94
diff --git a/block/qcow2.c b/block/qcow2.c
1bdc94
index 092db81..c85ebcb 100644
1bdc94
--- a/block/qcow2.c
1bdc94
+++ b/block/qcow2.c
1bdc94
@@ -1756,6 +1756,39 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
1bdc94
     return status;
1bdc94
 }
1bdc94
 
1bdc94
+static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
1bdc94
+                                            QCowL2Meta **pl2meta,
1bdc94
+                                            bool link_l2)
1bdc94
+{
1bdc94
+    int ret = 0;
1bdc94
+    QCowL2Meta *l2meta = *pl2meta;
1bdc94
+
1bdc94
+    while (l2meta != NULL) {
1bdc94
+        QCowL2Meta *next;
1bdc94
+
1bdc94
+        if (!ret && link_l2) {
1bdc94
+            ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
1bdc94
+            if (ret) {
1bdc94
+                goto out;
1bdc94
+            }
1bdc94
+        }
1bdc94
+
1bdc94
+        /* Take the request off the list of running requests */
1bdc94
+        if (l2meta->nb_clusters != 0) {
1bdc94
+            QLIST_REMOVE(l2meta, next_in_flight);
1bdc94
+        }
1bdc94
+
1bdc94
+        qemu_co_queue_restart_all(&l2meta->dependent_requests);
1bdc94
+
1bdc94
+        next = l2meta->next;
1bdc94
+        g_free(l2meta);
1bdc94
+        l2meta = next;
1bdc94
+    }
1bdc94
+out:
1bdc94
+    *pl2meta = l2meta;
1bdc94
+    return ret;
1bdc94
+}
1bdc94
+
1bdc94
 static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
1bdc94
                                         uint64_t bytes, QEMUIOVector *qiov,
1bdc94
                                         int flags)
1bdc94
@@ -2042,24 +2075,9 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
1bdc94
             }
1bdc94
         }
1bdc94
 
1bdc94
-        while (l2meta != NULL) {
1bdc94
-            QCowL2Meta *next;
1bdc94
-
1bdc94
-            ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
1bdc94
-            if (ret < 0) {
1bdc94
-                goto fail;
1bdc94
-            }
1bdc94
-
1bdc94
-            /* Take the request off the list of running requests */
1bdc94
-            if (l2meta->nb_clusters != 0) {
1bdc94
-                QLIST_REMOVE(l2meta, next_in_flight);
1bdc94
-            }
1bdc94
-
1bdc94
-            qemu_co_queue_restart_all(&l2meta->dependent_requests);
1bdc94
-
1bdc94
-            next = l2meta->next;
1bdc94
-            g_free(l2meta);
1bdc94
-            l2meta = next;
1bdc94
+        ret = qcow2_handle_l2meta(bs, &l2meta, true);
1bdc94
+        if (ret) {
1bdc94
+            goto fail;
1bdc94
         }
1bdc94
 
1bdc94
         bytes -= cur_bytes;
1bdc94
@@ -2070,18 +2088,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
1bdc94
     ret = 0;
1bdc94
 
1bdc94
 fail:
1bdc94
-    while (l2meta != NULL) {
1bdc94
-        QCowL2Meta *next;
1bdc94
-
1bdc94
-        if (l2meta->nb_clusters != 0) {
1bdc94
-            QLIST_REMOVE(l2meta, next_in_flight);
1bdc94
-        }
1bdc94
-        qemu_co_queue_restart_all(&l2meta->dependent_requests);
1bdc94
-
1bdc94
-        next = l2meta->next;
1bdc94
-        g_free(l2meta);
1bdc94
-        l2meta = next;
1bdc94
-    }
1bdc94
+    qcow2_handle_l2meta(bs, &l2meta, false);
1bdc94
 
1bdc94
     qemu_co_mutex_unlock(&s->lock);
1bdc94
 
1bdc94
@@ -3264,6 +3271,166 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
1bdc94
     return ret;
1bdc94
 }
1bdc94
 
1bdc94
+static int coroutine_fn
1bdc94
+qcow2_co_copy_range_from(BlockDriverState *bs,
1bdc94
+                         BdrvChild *src, uint64_t src_offset,
1bdc94
+                         BdrvChild *dst, uint64_t dst_offset,
1bdc94
+                         uint64_t bytes, BdrvRequestFlags flags)
1bdc94
+{
1bdc94
+    BDRVQcow2State *s = bs->opaque;
1bdc94
+    int ret;
1bdc94
+    unsigned int cur_bytes; /* number of bytes in current iteration */
1bdc94
+    BdrvChild *child = NULL;
1bdc94
+    BdrvRequestFlags cur_flags;
1bdc94
+
1bdc94
+    assert(!bs->encrypted);
1bdc94
+    qemu_co_mutex_lock(&s->lock);
1bdc94
+
1bdc94
+    while (bytes != 0) {
1bdc94
+        uint64_t copy_offset = 0;
1bdc94
+        /* prepare next request */
1bdc94
+        cur_bytes = MIN(bytes, INT_MAX);
1bdc94
+        cur_flags = flags;
1bdc94
+
1bdc94
+        ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, &copy_offset);
1bdc94
+        if (ret < 0) {
1bdc94
+            goto out;
1bdc94
+        }
1bdc94
+
1bdc94
+        switch (ret) {
1bdc94
+        case QCOW2_CLUSTER_UNALLOCATED:
1bdc94
+            if (bs->backing && bs->backing->bs) {
1bdc94
+                int64_t backing_length = bdrv_getlength(bs->backing->bs);
1bdc94
+                if (src_offset >= backing_length) {
1bdc94
+                    cur_flags |= BDRV_REQ_ZERO_WRITE;
1bdc94
+                } else {
1bdc94
+                    child = bs->backing;
1bdc94
+                    cur_bytes = MIN(cur_bytes, backing_length - src_offset);
1bdc94
+                    copy_offset = src_offset;
1bdc94
+                }
1bdc94
+            } else {
1bdc94
+                cur_flags |= BDRV_REQ_ZERO_WRITE;
1bdc94
+            }
1bdc94
+            break;
1bdc94
+
1bdc94
+        case QCOW2_CLUSTER_ZERO_PLAIN:
1bdc94
+        case QCOW2_CLUSTER_ZERO_ALLOC:
1bdc94
+            cur_flags |= BDRV_REQ_ZERO_WRITE;
1bdc94
+            break;
1bdc94
+
1bdc94
+        case QCOW2_CLUSTER_COMPRESSED:
1bdc94
+            ret = -ENOTSUP;
1bdc94
+            goto out;
1bdc94
+            break;
1bdc94
+
1bdc94
+        case QCOW2_CLUSTER_NORMAL:
1bdc94
+            child = bs->file;
1bdc94
+            copy_offset += offset_into_cluster(s, src_offset);
1bdc94
+            if ((copy_offset & 511) != 0) {
1bdc94
+                ret = -EIO;
1bdc94
+                goto out;
1bdc94
+            }
1bdc94
+            break;
1bdc94
+
1bdc94
+        default:
1bdc94
+            abort();
1bdc94
+        }
1bdc94
+        qemu_co_mutex_unlock(&s->lock);
1bdc94
+        ret = bdrv_co_copy_range_from(child,
1bdc94
+                                      copy_offset,
1bdc94
+                                      dst, dst_offset,
1bdc94
+                                      cur_bytes, cur_flags);
1bdc94
+        qemu_co_mutex_lock(&s->lock);
1bdc94
+        if (ret < 0) {
1bdc94
+            goto out;
1bdc94
+        }
1bdc94
+
1bdc94
+        bytes -= cur_bytes;
1bdc94
+        src_offset += cur_bytes;
1bdc94
+        dst_offset += cur_bytes;
1bdc94
+    }
1bdc94
+    ret = 0;
1bdc94
+
1bdc94
+out:
1bdc94
+    qemu_co_mutex_unlock(&s->lock);
1bdc94
+    return ret;
1bdc94
+}
1bdc94
+
1bdc94
+static int coroutine_fn
1bdc94
+qcow2_co_copy_range_to(BlockDriverState *bs,
1bdc94
+                       BdrvChild *src, uint64_t src_offset,
1bdc94
+                       BdrvChild *dst, uint64_t dst_offset,
1bdc94
+                       uint64_t bytes, BdrvRequestFlags flags)
1bdc94
+{
1bdc94
+    BDRVQcow2State *s = bs->opaque;
1bdc94
+    int offset_in_cluster;
1bdc94
+    int ret;
1bdc94
+    unsigned int cur_bytes; /* number of sectors in current iteration */
1bdc94
+    uint64_t cluster_offset;
1bdc94
+    uint8_t *cluster_data = NULL;
1bdc94
+    QCowL2Meta *l2meta = NULL;
1bdc94
+
1bdc94
+    assert(!bs->encrypted);
1bdc94
+    s->cluster_cache_offset = -1; /* disable compressed cache */
1bdc94
+
1bdc94
+    qemu_co_mutex_lock(&s->lock);
1bdc94
+
1bdc94
+    while (bytes != 0) {
1bdc94
+
1bdc94
+        l2meta = NULL;
1bdc94
+
1bdc94
+        offset_in_cluster = offset_into_cluster(s, dst_offset);
1bdc94
+        cur_bytes = MIN(bytes, INT_MAX);
1bdc94
+
1bdc94
+        /* TODO:
1bdc94
+         * If src->bs == dst->bs, we could simply copy by incrementing
1bdc94
+         * the refcnt, without copying user data.
1bdc94
+         * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */
1bdc94
+        ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
1bdc94
+                                         &cluster_offset, &l2meta);
1bdc94
+        if (ret < 0) {
1bdc94
+            goto fail;
1bdc94
+        }
1bdc94
+
1bdc94
+        assert((cluster_offset & 511) == 0);
1bdc94
+
1bdc94
+        ret = qcow2_pre_write_overlap_check(bs, 0,
1bdc94
+                cluster_offset + offset_in_cluster, cur_bytes);
1bdc94
+        if (ret < 0) {
1bdc94
+            goto fail;
1bdc94
+        }
1bdc94
+
1bdc94
+        qemu_co_mutex_unlock(&s->lock);
1bdc94
+        ret = bdrv_co_copy_range_to(src, src_offset,
1bdc94
+                                    bs->file,
1bdc94
+                                    cluster_offset + offset_in_cluster,
1bdc94
+                                    cur_bytes, flags);
1bdc94
+        qemu_co_mutex_lock(&s->lock);
1bdc94
+        if (ret < 0) {
1bdc94
+            goto fail;
1bdc94
+        }
1bdc94
+
1bdc94
+        ret = qcow2_handle_l2meta(bs, &l2meta, true);
1bdc94
+        if (ret) {
1bdc94
+            goto fail;
1bdc94
+        }
1bdc94
+
1bdc94
+        bytes -= cur_bytes;
1bdc94
+        dst_offset += cur_bytes;
1bdc94
+    }
1bdc94
+    ret = 0;
1bdc94
+
1bdc94
+fail:
1bdc94
+    qcow2_handle_l2meta(bs, &l2meta, false);
1bdc94
+
1bdc94
+    qemu_co_mutex_unlock(&s->lock);
1bdc94
+
1bdc94
+    qemu_vfree(cluster_data);
1bdc94
+    trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
1bdc94
+
1bdc94
+    return ret;
1bdc94
+}
1bdc94
+
1bdc94
 static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
1bdc94
                           PreallocMode prealloc, Error **errp)
1bdc94
 {
1bdc94
@@ -4522,6 +4689,8 @@ BlockDriver bdrv_qcow2 = {
1bdc94
 
1bdc94
     .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
1bdc94
     .bdrv_co_pdiscard       = qcow2_co_pdiscard,
1bdc94
+    .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
1bdc94
+    .bdrv_co_copy_range_to  = qcow2_co_copy_range_to,
1bdc94
     .bdrv_truncate          = qcow2_truncate,
1bdc94
     .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
1bdc94
     .bdrv_make_empty        = qcow2_make_empty,
1bdc94
-- 
1bdc94
1.8.3.1
1bdc94