|
|
1bdc94 |
From 62cb216e19f41b1d1c4c5d4523df56dce865ab30 Mon Sep 17 00:00:00 2001
|
|
|
1bdc94 |
From: Fam Zheng <famz@redhat.com>
|
|
|
1bdc94 |
Date: Fri, 29 Jun 2018 06:11:41 +0200
|
|
|
1bdc94 |
Subject: [PATCH 37/57] block: Introduce API for copy offloading
|
|
|
1bdc94 |
|
|
|
1bdc94 |
RH-Author: Fam Zheng <famz@redhat.com>
|
|
|
1bdc94 |
Message-id: <20180629061153.12687-2-famz@redhat.com>
|
|
|
1bdc94 |
Patchwork-id: 81153
|
|
|
1bdc94 |
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH v2 01/13] block: Introduce API for copy offloading
|
|
|
1bdc94 |
Bugzilla: 1482537
|
|
|
1bdc94 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
1bdc94 |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
1bdc94 |
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Introduce the bdrv_co_copy_range() API for copy offloading. Block
|
|
|
1bdc94 |
drivers implementing this API support efficient copy operations that
|
|
|
1bdc94 |
avoid reading each block from the source device and writing it to the
|
|
|
1bdc94 |
destination devices. Examples of copy offload primitives are SCSI
|
|
|
1bdc94 |
EXTENDED COPY and Linux copy_file_range(2).
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Signed-off-by: Fam Zheng <famz@redhat.com>
|
|
|
1bdc94 |
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
1bdc94 |
Message-id: 20180601092648.24614-2-famz@redhat.com
|
|
|
1bdc94 |
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
1bdc94 |
(cherry picked from commit fcc6767836efe1b160289905dce7228d594c123c)
|
|
|
1bdc94 |
Signed-off-by: Fam Zheng <famz@redhat.com>
|
|
|
1bdc94 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
1bdc94 |
---
|
|
|
1bdc94 |
block/io.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
1bdc94 |
include/block/block.h | 32 ++++++++++++++++
|
|
|
1bdc94 |
include/block/block_int.h | 38 +++++++++++++++++++
|
|
|
1bdc94 |
3 files changed, 167 insertions(+)
|
|
|
1bdc94 |
|
|
|
1bdc94 |
diff --git a/block/io.c b/block/io.c
|
|
|
1bdc94 |
index fada4ef..5c043a4 100644
|
|
|
1bdc94 |
--- a/block/io.c
|
|
|
1bdc94 |
+++ b/block/io.c
|
|
|
1bdc94 |
@@ -2832,3 +2832,100 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
|
|
|
1bdc94 |
bdrv_unregister_buf(child->bs, host);
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
|
|
|
1bdc94 |
+ uint64_t src_offset,
|
|
|
1bdc94 |
+ BdrvChild *dst,
|
|
|
1bdc94 |
+ uint64_t dst_offset,
|
|
|
1bdc94 |
+ uint64_t bytes,
|
|
|
1bdc94 |
+ BdrvRequestFlags flags,
|
|
|
1bdc94 |
+ bool recurse_src)
|
|
|
1bdc94 |
+{
|
|
|
1bdc94 |
+ int ret;
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ if (!src || !dst || !src->bs || !dst->bs) {
|
|
|
1bdc94 |
+ return -ENOMEDIUM;
|
|
|
1bdc94 |
+ }
|
|
|
1bdc94 |
+ ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
|
|
|
1bdc94 |
+ if (ret) {
|
|
|
1bdc94 |
+ return ret;
|
|
|
1bdc94 |
+ }
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
|
|
|
1bdc94 |
+ if (ret) {
|
|
|
1bdc94 |
+ return ret;
|
|
|
1bdc94 |
+ }
|
|
|
1bdc94 |
+ if (flags & BDRV_REQ_ZERO_WRITE) {
|
|
|
1bdc94 |
+ return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, flags);
|
|
|
1bdc94 |
+ }
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ if (!src->bs->drv->bdrv_co_copy_range_from
|
|
|
1bdc94 |
+ || !dst->bs->drv->bdrv_co_copy_range_to
|
|
|
1bdc94 |
+ || src->bs->encrypted || dst->bs->encrypted) {
|
|
|
1bdc94 |
+ return -ENOTSUP;
|
|
|
1bdc94 |
+ }
|
|
|
1bdc94 |
+ if (recurse_src) {
|
|
|
1bdc94 |
+ return src->bs->drv->bdrv_co_copy_range_from(src->bs,
|
|
|
1bdc94 |
+ src, src_offset,
|
|
|
1bdc94 |
+ dst, dst_offset,
|
|
|
1bdc94 |
+ bytes, flags);
|
|
|
1bdc94 |
+ } else {
|
|
|
1bdc94 |
+ return dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
|
|
|
1bdc94 |
+ src, src_offset,
|
|
|
1bdc94 |
+ dst, dst_offset,
|
|
|
1bdc94 |
+ bytes, flags);
|
|
|
1bdc94 |
+ }
|
|
|
1bdc94 |
+}
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+/* Copy range from @src to @dst.
|
|
|
1bdc94 |
+ *
|
|
|
1bdc94 |
+ * See the comment of bdrv_co_copy_range for the parameter and return value
|
|
|
1bdc94 |
+ * semantics. */
|
|
|
1bdc94 |
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
|
|
|
1bdc94 |
+ BdrvChild *dst, uint64_t dst_offset,
|
|
|
1bdc94 |
+ uint64_t bytes, BdrvRequestFlags flags)
|
|
|
1bdc94 |
+{
|
|
|
1bdc94 |
+ return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
|
|
|
1bdc94 |
+ bytes, flags, true);
|
|
|
1bdc94 |
+}
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+/* Copy range from @src to @dst.
|
|
|
1bdc94 |
+ *
|
|
|
1bdc94 |
+ * See the comment of bdrv_co_copy_range for the parameter and return value
|
|
|
1bdc94 |
+ * semantics. */
|
|
|
1bdc94 |
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
|
|
|
1bdc94 |
+ BdrvChild *dst, uint64_t dst_offset,
|
|
|
1bdc94 |
+ uint64_t bytes, BdrvRequestFlags flags)
|
|
|
1bdc94 |
+{
|
|
|
1bdc94 |
+ return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
|
|
|
1bdc94 |
+ bytes, flags, false);
|
|
|
1bdc94 |
+}
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
|
|
|
1bdc94 |
+ BdrvChild *dst, uint64_t dst_offset,
|
|
|
1bdc94 |
+ uint64_t bytes, BdrvRequestFlags flags)
|
|
|
1bdc94 |
+{
|
|
|
1bdc94 |
+ BdrvTrackedRequest src_req, dst_req;
|
|
|
1bdc94 |
+ BlockDriverState *src_bs = src->bs;
|
|
|
1bdc94 |
+ BlockDriverState *dst_bs = dst->bs;
|
|
|
1bdc94 |
+ int ret;
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ bdrv_inc_in_flight(src_bs);
|
|
|
1bdc94 |
+ bdrv_inc_in_flight(dst_bs);
|
|
|
1bdc94 |
+ tracked_request_begin(&src_req, src_bs, src_offset,
|
|
|
1bdc94 |
+ bytes, BDRV_TRACKED_READ);
|
|
|
1bdc94 |
+ tracked_request_begin(&dst_req, dst_bs, dst_offset,
|
|
|
1bdc94 |
+ bytes, BDRV_TRACKED_WRITE);
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ wait_serialising_requests(&src_req);
|
|
|
1bdc94 |
+ wait_serialising_requests(&dst_req);
|
|
|
1bdc94 |
+ ret = bdrv_co_copy_range_from(src, src_offset,
|
|
|
1bdc94 |
+ dst, dst_offset,
|
|
|
1bdc94 |
+ bytes, flags);
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ tracked_request_end(&src_req);
|
|
|
1bdc94 |
+ tracked_request_end(&dst_req);
|
|
|
1bdc94 |
+ bdrv_dec_in_flight(src_bs);
|
|
|
1bdc94 |
+ bdrv_dec_in_flight(dst_bs);
|
|
|
1bdc94 |
+ return ret;
|
|
|
1bdc94 |
+}
|
|
|
1bdc94 |
diff --git a/include/block/block.h b/include/block/block.h
|
|
|
1bdc94 |
index 2d17b09..e677080 100644
|
|
|
1bdc94 |
--- a/include/block/block.h
|
|
|
1bdc94 |
+++ b/include/block/block.h
|
|
|
1bdc94 |
@@ -613,4 +613,36 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
|
|
|
1bdc94 |
*/
|
|
|
1bdc94 |
void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
|
|
|
1bdc94 |
void bdrv_unregister_buf(BlockDriverState *bs, void *host);
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+/**
|
|
|
1bdc94 |
+ *
|
|
|
1bdc94 |
+ * bdrv_co_copy_range:
|
|
|
1bdc94 |
+ *
|
|
|
1bdc94 |
+ * Do offloaded copy between two children. If the operation is not implemented
|
|
|
1bdc94 |
+ * by the driver, or if the backend storage doesn't support it, a negative
|
|
|
1bdc94 |
+ * error code will be returned.
|
|
|
1bdc94 |
+ *
|
|
|
1bdc94 |
+ * Note: block layer doesn't emulate or fallback to a bounce buffer approach
|
|
|
1bdc94 |
+ * because usually the caller shouldn't attempt offloaded copy any more (e.g.
|
|
|
1bdc94 |
+ * calling copy_file_range(2)) after the first error, thus it should fall back
|
|
|
1bdc94 |
+ * to a read+write path in the caller level.
|
|
|
1bdc94 |
+ *
|
|
|
1bdc94 |
+ * @src: Source child to copy data from
|
|
|
1bdc94 |
+ * @src_offset: offset in @src image to read data
|
|
|
1bdc94 |
+ * @dst: Destination child to copy data to
|
|
|
1bdc94 |
+ * @dst_offset: offset in @dst image to write data
|
|
|
1bdc94 |
+ * @bytes: number of bytes to copy
|
|
|
1bdc94 |
+ * @flags: request flags. Must be one of:
|
|
|
1bdc94 |
+ * 0 - actually read data from src;
|
|
|
1bdc94 |
+ * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
|
|
|
1bdc94 |
+ * write on @dst as if bdrv_co_pwrite_zeroes is
|
|
|
1bdc94 |
+ * called. Used to simplify caller code, or
|
|
|
1bdc94 |
+ * during BlockDriver.bdrv_co_copy_range_from()
|
|
|
1bdc94 |
+ * recursion.
|
|
|
1bdc94 |
+ *
|
|
|
1bdc94 |
+ * Returns: 0 if succeeded; negative error code if failed.
|
|
|
1bdc94 |
+ **/
|
|
|
1bdc94 |
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
|
|
|
1bdc94 |
+ BdrvChild *dst, uint64_t dst_offset,
|
|
|
1bdc94 |
+ uint64_t bytes, BdrvRequestFlags flags);
|
|
|
1bdc94 |
#endif
|
|
|
1bdc94 |
diff --git a/include/block/block_int.h b/include/block/block_int.h
|
|
|
1bdc94 |
index ad2b852..3da86a7 100644
|
|
|
1bdc94 |
--- a/include/block/block_int.h
|
|
|
1bdc94 |
+++ b/include/block/block_int.h
|
|
|
1bdc94 |
@@ -206,6 +206,37 @@ struct BlockDriver {
|
|
|
1bdc94 |
int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
|
|
|
1bdc94 |
int64_t offset, int bytes);
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+ /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
|
|
|
1bdc94 |
+ * and invoke bdrv_co_copy_range_from(child, ...), or invoke
|
|
|
1bdc94 |
+ * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
|
|
|
1bdc94 |
+ *
|
|
|
1bdc94 |
+ * See the comment of bdrv_co_copy_range for the parameter and return value
|
|
|
1bdc94 |
+ * semantics.
|
|
|
1bdc94 |
+ */
|
|
|
1bdc94 |
+ int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
|
|
|
1bdc94 |
+ BdrvChild *src,
|
|
|
1bdc94 |
+ uint64_t offset,
|
|
|
1bdc94 |
+ BdrvChild *dst,
|
|
|
1bdc94 |
+ uint64_t dst_offset,
|
|
|
1bdc94 |
+ uint64_t bytes,
|
|
|
1bdc94 |
+ BdrvRequestFlags flags);
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
+ /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
|
|
|
1bdc94 |
+ * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
|
|
|
1bdc94 |
+ * operation if @bs is the leaf and @src has the same BlockDriver. Return
|
|
|
1bdc94 |
+ * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
|
|
|
1bdc94 |
+ *
|
|
|
1bdc94 |
+ * See the comment of bdrv_co_copy_range for the parameter and return value
|
|
|
1bdc94 |
+ * semantics.
|
|
|
1bdc94 |
+ */
|
|
|
1bdc94 |
+ int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
|
|
|
1bdc94 |
+ BdrvChild *src,
|
|
|
1bdc94 |
+ uint64_t src_offset,
|
|
|
1bdc94 |
+ BdrvChild *dst,
|
|
|
1bdc94 |
+ uint64_t dst_offset,
|
|
|
1bdc94 |
+ uint64_t bytes,
|
|
|
1bdc94 |
+ BdrvRequestFlags flags);
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
/*
|
|
|
1bdc94 |
* Building block for bdrv_block_status[_above] and
|
|
|
1bdc94 |
* bdrv_is_allocated[_above]. The driver should answer only
|
|
|
1bdc94 |
@@ -1091,4 +1122,11 @@ void bdrv_dec_in_flight(BlockDriverState *bs);
|
|
|
1bdc94 |
|
|
|
1bdc94 |
void blockdev_close_all_bdrv_states(void);
|
|
|
1bdc94 |
|
|
|
1bdc94 |
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
|
|
|
1bdc94 |
+ BdrvChild *dst, uint64_t dst_offset,
|
|
|
1bdc94 |
+ uint64_t bytes, BdrvRequestFlags flags);
|
|
|
1bdc94 |
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
|
|
|
1bdc94 |
+ BdrvChild *dst, uint64_t dst_offset,
|
|
|
1bdc94 |
+ uint64_t bytes, BdrvRequestFlags flags);
|
|
|
1bdc94 |
+
|
|
|
1bdc94 |
#endif /* BLOCK_INT_H */
|
|
|
1bdc94 |
--
|
|
|
1bdc94 |
1.8.3.1
|
|
|
1bdc94 |
|