Blame SOURCES/kvm-block-Introduce-API-for-copy-offloading.patch

383d26
From 62cb216e19f41b1d1c4c5d4523df56dce865ab30 Mon Sep 17 00:00:00 2001
383d26
From: Fam Zheng <famz@redhat.com>
383d26
Date: Fri, 29 Jun 2018 06:11:41 +0200
383d26
Subject: [PATCH 37/57] block: Introduce API for copy offloading
383d26
383d26
RH-Author: Fam Zheng <famz@redhat.com>
383d26
Message-id: <20180629061153.12687-2-famz@redhat.com>
383d26
Patchwork-id: 81153
383d26
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH v2 01/13] block: Introduce API for copy offloading
383d26
Bugzilla: 1482537
383d26
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
383d26
RH-Acked-by: Max Reitz <mreitz@redhat.com>
383d26
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
383d26
383d26
Introduce the bdrv_co_copy_range() API for copy offloading.  Block
383d26
drivers implementing this API support efficient copy operations that
383d26
avoid reading each block from the source device and writing it to the
383d26
destination devices.  Examples of copy offload primitives are SCSI
383d26
EXTENDED COPY and Linux copy_file_range(2).
383d26
383d26
Signed-off-by: Fam Zheng <famz@redhat.com>
383d26
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
383d26
Message-id: 20180601092648.24614-2-famz@redhat.com
383d26
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
383d26
(cherry picked from commit fcc6767836efe1b160289905dce7228d594c123c)
383d26
Signed-off-by: Fam Zheng <famz@redhat.com>
383d26
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
383d26
---
383d26
 block/io.c                | 97 +++++++++++++++++++++++++++++++++++++++++++++++
383d26
 include/block/block.h     | 32 ++++++++++++++++
383d26
 include/block/block_int.h | 38 +++++++++++++++++++
383d26
 3 files changed, 167 insertions(+)
383d26
383d26
diff --git a/block/io.c b/block/io.c
383d26
index fada4ef..5c043a4 100644
383d26
--- a/block/io.c
383d26
+++ b/block/io.c
383d26
@@ -2832,3 +2832,100 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
383d26
         bdrv_unregister_buf(child->bs, host);
383d26
     }
383d26
 }
383d26
+
383d26
+static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
383d26
+                                                    uint64_t src_offset,
383d26
+                                                    BdrvChild *dst,
383d26
+                                                    uint64_t dst_offset,
383d26
+                                                    uint64_t bytes,
383d26
+                                                    BdrvRequestFlags flags,
383d26
+                                                    bool recurse_src)
383d26
+{
383d26
+    int ret;
383d26
+
383d26
+    if (!src || !dst || !src->bs || !dst->bs) {
383d26
+        return -ENOMEDIUM;
383d26
+    }
383d26
+    ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
383d26
+    if (ret) {
383d26
+        return ret;
383d26
+    }
383d26
+
383d26
+    ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
383d26
+    if (ret) {
383d26
+        return ret;
383d26
+    }
383d26
+    if (flags & BDRV_REQ_ZERO_WRITE) {
383d26
+        return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
383d26
+    }
383d26
+
383d26
+    if (!src->bs->drv->bdrv_co_copy_range_from
383d26
+        || !dst->bs->drv->bdrv_co_copy_range_to
383d26
+        || src->bs->encrypted || dst->bs->encrypted) {
383d26
+        return -ENOTSUP;
383d26
+    }
383d26
+    if (recurse_src) {
383d26
+        return src->bs->drv->bdrv_co_copy_range_from(src->bs,
383d26
+                                                     src, src_offset,
383d26
+                                                     dst, dst_offset,
383d26
+                                                     bytes, flags);
383d26
+    } else {
383d26
+        return dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
383d26
+                                                   src, src_offset,
383d26
+                                                   dst, dst_offset,
383d26
+                                                   bytes, flags);
383d26
+    }
383d26
+}
383d26
+
383d26
+/* Copy range from @src to @dst.
383d26
+ *
383d26
+ * See the comment of bdrv_co_copy_range for the parameter and return value
383d26
+ * semantics. */
383d26
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
383d26
+                                         BdrvChild *dst, uint64_t dst_offset,
383d26
+                                         uint64_t bytes, BdrvRequestFlags flags)
383d26
+{
383d26
+    return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
383d26
+                                       bytes, flags, true);
383d26
+}
383d26
+
383d26
+/* Copy range from @src to @dst.
383d26
+ *
383d26
+ * See the comment of bdrv_co_copy_range for the parameter and return value
383d26
+ * semantics. */
383d26
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
383d26
+                                       BdrvChild *dst, uint64_t dst_offset,
383d26
+                                       uint64_t bytes, BdrvRequestFlags flags)
383d26
+{
383d26
+    return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
383d26
+                                       bytes, flags, false);
383d26
+}
383d26
+
383d26
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
383d26
+                                    BdrvChild *dst, uint64_t dst_offset,
383d26
+                                    uint64_t bytes, BdrvRequestFlags flags)
383d26
+{
383d26
+    BdrvTrackedRequest src_req, dst_req;
383d26
+    BlockDriverState *src_bs = src->bs;
383d26
+    BlockDriverState *dst_bs = dst->bs;
383d26
+    int ret;
383d26
+
383d26
+    bdrv_inc_in_flight(src_bs);
383d26
+    bdrv_inc_in_flight(dst_bs);
383d26
+    tracked_request_begin(&src_req, src_bs, src_offset,
383d26
+                          bytes, BDRV_TRACKED_READ);
383d26
+    tracked_request_begin(&dst_req, dst_bs, dst_offset,
383d26
+                          bytes, BDRV_TRACKED_WRITE);
383d26
+
383d26
+    wait_serialising_requests(&src_req);
383d26
+    wait_serialising_requests(&dst_req);
383d26
+    ret = bdrv_co_copy_range_from(src, src_offset,
383d26
+                                  dst, dst_offset,
383d26
+                                  bytes, flags);
383d26
+
383d26
+    tracked_request_end(&src_req);
383d26
+    tracked_request_end(&dst_req);
383d26
+    bdrv_dec_in_flight(src_bs);
383d26
+    bdrv_dec_in_flight(dst_bs);
383d26
+    return ret;
383d26
+}
383d26
diff --git a/include/block/block.h b/include/block/block.h
383d26
index 2d17b09..e677080 100644
383d26
--- a/include/block/block.h
383d26
+++ b/include/block/block.h
383d26
@@ -613,4 +613,36 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
383d26
  */
383d26
 void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
383d26
 void bdrv_unregister_buf(BlockDriverState *bs, void *host);
383d26
+
383d26
+/**
383d26
+ *
383d26
+ * bdrv_co_copy_range:
383d26
+ *
383d26
+ * Do offloaded copy between two children. If the operation is not implemented
383d26
+ * by the driver, or if the backend storage doesn't support it, a negative
383d26
+ * error code will be returned.
383d26
+ *
383d26
+ * Note: block layer doesn't emulate or fallback to a bounce buffer approach
383d26
+ * because usually the caller shouldn't attempt offloaded copy any more (e.g.
383d26
+ * calling copy_file_range(2)) after the first error, thus it should fall back
383d26
+ * to a read+write path in the caller level.
383d26
+ *
383d26
+ * @src: Source child to copy data from
383d26
+ * @src_offset: offset in @src image to read data
383d26
+ * @dst: Destination child to copy data to
383d26
+ * @dst_offset: offset in @dst image to write data
383d26
+ * @bytes: number of bytes to copy
383d26
+ * @flags: request flags. Must be one of:
383d26
+ *         0 - actually read data from src;
383d26
+ *         BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
383d26
+ *                               write on @dst as if bdrv_co_pwrite_zeroes is
383d26
+ *                               called. Used to simplify caller code, or
383d26
+ *                               during BlockDriver.bdrv_co_copy_range_from()
383d26
+ *                               recursion.
383d26
+ *
383d26
+ * Returns: 0 if succeeded; negative error code if failed.
383d26
+ **/
383d26
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
383d26
+                                    BdrvChild *dst, uint64_t dst_offset,
383d26
+                                    uint64_t bytes, BdrvRequestFlags flags);
383d26
 #endif
383d26
diff --git a/include/block/block_int.h b/include/block/block_int.h
383d26
index ad2b852..3da86a7 100644
383d26
--- a/include/block/block_int.h
383d26
+++ b/include/block/block_int.h
383d26
@@ -206,6 +206,37 @@ struct BlockDriver {
383d26
     int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
383d26
         int64_t offset, int bytes);
383d26
 
383d26
+    /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
383d26
+     * and invoke bdrv_co_copy_range_from(child, ...), or invoke
383d26
+     * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
383d26
+     *
383d26
+     * See the comment of bdrv_co_copy_range for the parameter and return value
383d26
+     * semantics.
383d26
+     */
383d26
+    int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
383d26
+                                                BdrvChild *src,
383d26
+                                                uint64_t offset,
383d26
+                                                BdrvChild *dst,
383d26
+                                                uint64_t dst_offset,
383d26
+                                                uint64_t bytes,
383d26
+                                                BdrvRequestFlags flags);
383d26
+
383d26
+    /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
383d26
+     * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
383d26
+     * operation if @bs is the leaf and @src has the same BlockDriver.  Return
383d26
+     * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
383d26
+     *
383d26
+     * See the comment of bdrv_co_copy_range for the parameter and return value
383d26
+     * semantics.
383d26
+     */
383d26
+    int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
383d26
+                                              BdrvChild *src,
383d26
+                                              uint64_t src_offset,
383d26
+                                              BdrvChild *dst,
383d26
+                                              uint64_t dst_offset,
383d26
+                                              uint64_t bytes,
383d26
+                                              BdrvRequestFlags flags);
383d26
+
383d26
     /*
383d26
      * Building block for bdrv_block_status[_above] and
383d26
      * bdrv_is_allocated[_above].  The driver should answer only
383d26
@@ -1091,4 +1122,11 @@ void bdrv_dec_in_flight(BlockDriverState *bs);
383d26
 
383d26
 void blockdev_close_all_bdrv_states(void);
383d26
 
383d26
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
383d26
+                                         BdrvChild *dst, uint64_t dst_offset,
383d26
+                                         uint64_t bytes, BdrvRequestFlags flags);
383d26
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
383d26
+                                       BdrvChild *dst, uint64_t dst_offset,
383d26
+                                       uint64_t bytes, BdrvRequestFlags flags);
383d26
+
383d26
 #endif /* BLOCK_INT_H */
383d26
-- 
383d26
1.8.3.1
383d26