ae23c9
From d193d49f5180d6a6b959808368247cdd506bf989 Mon Sep 17 00:00:00 2001
ae23c9
From: Fam Zheng <famz@redhat.com>
ae23c9
Date: Fri, 29 Jun 2018 06:11:41 +0200
ae23c9
Subject: [PATCH 167/268] block: Introduce API for copy offloading
ae23c9
ae23c9
RH-Author: Fam Zheng <famz@redhat.com>
ae23c9
Message-id: <20180629061153.12687-2-famz@redhat.com>
ae23c9
Patchwork-id: 81153
ae23c9
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH v2 01/13] block: Introduce API for copy offloading
ae23c9
Bugzilla: 1482537
ae23c9
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
ae23c9
RH-Acked-by: Max Reitz <mreitz@redhat.com>
ae23c9
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
ae23c9
ae23c9
Introduce the bdrv_co_copy_range() API for copy offloading.  Block
ae23c9
drivers implementing this API support efficient copy operations that
ae23c9
avoid reading each block from the source device and writing it to the
ae23c9
destination devices.  Examples of copy offload primitives are SCSI
ae23c9
EXTENDED COPY and Linux copy_file_range(2).
ae23c9
ae23c9
Signed-off-by: Fam Zheng <famz@redhat.com>
ae23c9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
ae23c9
Message-id: 20180601092648.24614-2-famz@redhat.com
ae23c9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
ae23c9
(cherry picked from commit fcc6767836efe1b160289905dce7228d594c123c)
ae23c9
Signed-off-by: Fam Zheng <famz@redhat.com>
ae23c9
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
ae23c9
---
ae23c9
 block/io.c                | 97 +++++++++++++++++++++++++++++++++++++++++++++++
ae23c9
 include/block/block.h     | 32 ++++++++++++++++
ae23c9
 include/block/block_int.h | 38 +++++++++++++++++++
ae23c9
 3 files changed, 167 insertions(+)
ae23c9
ae23c9
diff --git a/block/io.c b/block/io.c
ae23c9
index fada4ef..5c043a4 100644
ae23c9
--- a/block/io.c
ae23c9
+++ b/block/io.c
ae23c9
@@ -2832,3 +2832,100 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
ae23c9
         bdrv_unregister_buf(child->bs, host);
ae23c9
     }
ae23c9
 }
ae23c9
+
ae23c9
+static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
ae23c9
+                                                    uint64_t src_offset,
ae23c9
+                                                    BdrvChild *dst,
ae23c9
+                                                    uint64_t dst_offset,
ae23c9
+                                                    uint64_t bytes,
ae23c9
+                                                    BdrvRequestFlags flags,
ae23c9
+                                                    bool recurse_src)
ae23c9
+{
ae23c9
+    int ret;
ae23c9
+
ae23c9
+    if (!src || !dst || !src->bs || !dst->bs) {
ae23c9
+        return -ENOMEDIUM;
ae23c9
+    }
ae23c9
+    ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
ae23c9
+    if (ret) {
ae23c9
+        return ret;
ae23c9
+    }
ae23c9
+
ae23c9
+    ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
ae23c9
+    if (ret) {
ae23c9
+        return ret;
ae23c9
+    }
ae23c9
+    if (flags & BDRV_REQ_ZERO_WRITE) {
ae23c9
+        return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
ae23c9
+    }
ae23c9
+
ae23c9
+    if (!src->bs->drv->bdrv_co_copy_range_from
ae23c9
+        || !dst->bs->drv->bdrv_co_copy_range_to
ae23c9
+        || src->bs->encrypted || dst->bs->encrypted) {
ae23c9
+        return -ENOTSUP;
ae23c9
+    }
ae23c9
+    if (recurse_src) {
ae23c9
+        return src->bs->drv->bdrv_co_copy_range_from(src->bs,
ae23c9
+                                                     src, src_offset,
ae23c9
+                                                     dst, dst_offset,
ae23c9
+                                                     bytes, flags);
ae23c9
+    } else {
ae23c9
+        return dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
ae23c9
+                                                   src, src_offset,
ae23c9
+                                                   dst, dst_offset,
ae23c9
+                                                   bytes, flags);
ae23c9
+    }
ae23c9
+}
ae23c9
+
ae23c9
+/* Copy range from @src to @dst.
ae23c9
+ *
ae23c9
+ * See the comment of bdrv_co_copy_range for the parameter and return value
ae23c9
+ * semantics. */
ae23c9
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
ae23c9
+                                         BdrvChild *dst, uint64_t dst_offset,
ae23c9
+                                         uint64_t bytes, BdrvRequestFlags flags)
ae23c9
+{
ae23c9
+    return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
ae23c9
+                                       bytes, flags, true);
ae23c9
+}
ae23c9
+
ae23c9
+/* Copy range from @src to @dst.
ae23c9
+ *
ae23c9
+ * See the comment of bdrv_co_copy_range for the parameter and return value
ae23c9
+ * semantics. */
ae23c9
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
ae23c9
+                                       BdrvChild *dst, uint64_t dst_offset,
ae23c9
+                                       uint64_t bytes, BdrvRequestFlags flags)
ae23c9
+{
ae23c9
+    return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
ae23c9
+                                       bytes, flags, false);
ae23c9
+}
ae23c9
+
ae23c9
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
ae23c9
+                                    BdrvChild *dst, uint64_t dst_offset,
ae23c9
+                                    uint64_t bytes, BdrvRequestFlags flags)
ae23c9
+{
ae23c9
+    BdrvTrackedRequest src_req, dst_req;
ae23c9
+    BlockDriverState *src_bs = src->bs;
ae23c9
+    BlockDriverState *dst_bs = dst->bs;
ae23c9
+    int ret;
ae23c9
+
ae23c9
+    bdrv_inc_in_flight(src_bs);
ae23c9
+    bdrv_inc_in_flight(dst_bs);
ae23c9
+    tracked_request_begin(&src_req, src_bs, src_offset,
ae23c9
+                          bytes, BDRV_TRACKED_READ);
ae23c9
+    tracked_request_begin(&dst_req, dst_bs, dst_offset,
ae23c9
+                          bytes, BDRV_TRACKED_WRITE);
ae23c9
+
ae23c9
+    wait_serialising_requests(&src_req);
ae23c9
+    wait_serialising_requests(&dst_req);
ae23c9
+    ret = bdrv_co_copy_range_from(src, src_offset,
ae23c9
+                                  dst, dst_offset,
ae23c9
+                                  bytes, flags);
ae23c9
+
ae23c9
+    tracked_request_end(&src_req);
ae23c9
+    tracked_request_end(&dst_req);
ae23c9
+    bdrv_dec_in_flight(src_bs);
ae23c9
+    bdrv_dec_in_flight(dst_bs);
ae23c9
+    return ret;
ae23c9
+}
ae23c9
diff --git a/include/block/block.h b/include/block/block.h
ae23c9
index 2d17b09..e677080 100644
ae23c9
--- a/include/block/block.h
ae23c9
+++ b/include/block/block.h
ae23c9
@@ -613,4 +613,36 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
ae23c9
  */
ae23c9
 void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
ae23c9
 void bdrv_unregister_buf(BlockDriverState *bs, void *host);
ae23c9
+
ae23c9
+/**
ae23c9
+ *
ae23c9
+ * bdrv_co_copy_range:
ae23c9
+ *
ae23c9
+ * Do offloaded copy between two children. If the operation is not implemented
ae23c9
+ * by the driver, or if the backend storage doesn't support it, a negative
ae23c9
+ * error code will be returned.
ae23c9
+ *
ae23c9
+ * Note: block layer doesn't emulate or fallback to a bounce buffer approach
ae23c9
+ * because usually the caller shouldn't attempt offloaded copy any more (e.g.
ae23c9
+ * calling copy_file_range(2)) after the first error, thus it should fall back
ae23c9
+ * to a read+write path in the caller level.
ae23c9
+ *
ae23c9
+ * @src: Source child to copy data from
ae23c9
+ * @src_offset: offset in @src image to read data
ae23c9
+ * @dst: Destination child to copy data to
ae23c9
+ * @dst_offset: offset in @dst image to write data
ae23c9
+ * @bytes: number of bytes to copy
ae23c9
+ * @flags: request flags. Must be one of:
ae23c9
+ *         0 - actually read data from src;
ae23c9
+ *         BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
ae23c9
+ *                               write on @dst as if bdrv_co_pwrite_zeroes is
ae23c9
+ *                               called. Used to simplify caller code, or
ae23c9
+ *                               during BlockDriver.bdrv_co_copy_range_from()
ae23c9
+ *                               recursion.
ae23c9
+ *
ae23c9
+ * Returns: 0 if succeeded; negative error code if failed.
ae23c9
+ **/
ae23c9
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
ae23c9
+                                    BdrvChild *dst, uint64_t dst_offset,
ae23c9
+                                    uint64_t bytes, BdrvRequestFlags flags);
ae23c9
 #endif
ae23c9
diff --git a/include/block/block_int.h b/include/block/block_int.h
ae23c9
index ad2b852..3da86a7 100644
ae23c9
--- a/include/block/block_int.h
ae23c9
+++ b/include/block/block_int.h
ae23c9
@@ -206,6 +206,37 @@ struct BlockDriver {
ae23c9
     int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
ae23c9
         int64_t offset, int bytes);
ae23c9
 
ae23c9
+    /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
ae23c9
+     * and invoke bdrv_co_copy_range_from(child, ...), or invoke
ae23c9
+     * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
ae23c9
+     *
ae23c9
+     * See the comment of bdrv_co_copy_range for the parameter and return value
ae23c9
+     * semantics.
ae23c9
+     */
ae23c9
+    int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
ae23c9
+                                                BdrvChild *src,
ae23c9
+                                                uint64_t offset,
ae23c9
+                                                BdrvChild *dst,
ae23c9
+                                                uint64_t dst_offset,
ae23c9
+                                                uint64_t bytes,
ae23c9
+                                                BdrvRequestFlags flags);
ae23c9
+
ae23c9
+    /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
ae23c9
+     * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
ae23c9
+     * operation if @bs is the leaf and @src has the same BlockDriver.  Return
ae23c9
+     * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
ae23c9
+     *
ae23c9
+     * See the comment of bdrv_co_copy_range for the parameter and return value
ae23c9
+     * semantics.
ae23c9
+     */
ae23c9
+    int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
ae23c9
+                                              BdrvChild *src,
ae23c9
+                                              uint64_t src_offset,
ae23c9
+                                              BdrvChild *dst,
ae23c9
+                                              uint64_t dst_offset,
ae23c9
+                                              uint64_t bytes,
ae23c9
+                                              BdrvRequestFlags flags);
ae23c9
+
ae23c9
     /*
ae23c9
      * Building block for bdrv_block_status[_above] and
ae23c9
      * bdrv_is_allocated[_above].  The driver should answer only
ae23c9
@@ -1091,4 +1122,11 @@ void bdrv_dec_in_flight(BlockDriverState *bs);
ae23c9
 
ae23c9
 void blockdev_close_all_bdrv_states(void);
ae23c9
 
ae23c9
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
ae23c9
+                                         BdrvChild *dst, uint64_t dst_offset,
ae23c9
+                                         uint64_t bytes, BdrvRequestFlags flags);
ae23c9
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
ae23c9
+                                       BdrvChild *dst, uint64_t dst_offset,
ae23c9
+                                       uint64_t bytes, BdrvRequestFlags flags);
ae23c9
+
ae23c9
 #endif /* BLOCK_INT_H */
ae23c9
-- 
ae23c9
1.8.3.1
ae23c9