26ba25
From 58bdc00e94d7a965f91881022386ec73fe081c2f Mon Sep 17 00:00:00 2001
26ba25
From: John Snow <jsnow@redhat.com>
26ba25
Date: Tue, 24 Jul 2018 12:59:12 +0200
26ba25
Subject: [PATCH 237/268] block/io: fix copy_range
26ba25
26ba25
RH-Author: John Snow <jsnow@redhat.com>
26ba25
Message-id: <20180718225511.14878-20-jsnow@redhat.com>
26ba25
Patchwork-id: 81420
26ba25
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 19/35] block/io: fix copy_range
26ba25
Bugzilla: 1207657
26ba25
RH-Acked-by: Eric Blake <eblake@redhat.com>
26ba25
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
26ba25
RH-Acked-by: Fam Zheng <famz@redhat.com>
26ba25
26ba25
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
26ba25
26ba25
Here two things are fixed:
26ba25
26ba25
1. Architecture
26ba25
26ba25
On each recursion step, we go to the child of src or dst, only for one
26ba25
of them. So, it's wrong to create tracked requests for both on each
26ba25
step. It leads to tracked requests duplication.
26ba25
26ba25
2. Wait for serializing requests on write path independently of
26ba25
   BDRV_REQ_NO_SERIALISING
26ba25
26ba25
Before commit 9ded4a01149 "backup: Use copy offloading",
26ba25
BDRV_REQ_NO_SERIALISING was used for only one case: read in
26ba25
copy-on-write operation during backup. Also, the flag was handled only
26ba25
on read path (in bdrv_co_preadv and bdrv_aligned_preadv).
26ba25
26ba25
After 9ded4a01149, flag is used for not waiting serializing operations
26ba25
on backup target (in same case of copy-on-write operation). This
26ba25
behavior change is unsubstantiated and potentially dangerous, let's
26ba25
drop it and add additional asserts and documentation.
26ba25
26ba25
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
26ba25
Reviewed-by: Fam Zheng <famz@redhat.com>
26ba25
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
26ba25
(cherry picked from commit 999658a05e61a8d87b65827da665302bb44ce8c9)
26ba25
Signed-off-by: John Snow <jsnow@redhat.com>
26ba25
---
26ba25
 block/io.c            | 43 +++++++++++++++++++++++++++----------------
26ba25
 include/block/block.h | 12 ++++++++++++
26ba25
 2 files changed, 39 insertions(+), 16 deletions(-)
26ba25
26ba25
diff --git a/block/io.c b/block/io.c
26ba25
index b6754f3..f8de42f 100644
26ba25
--- a/block/io.c
26ba25
+++ b/block/io.c
26ba25
@@ -1505,6 +1505,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
26ba25
     max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
26ba25
                                    align);
26ba25
 
26ba25
+    /* BDRV_REQ_NO_SERIALISING is only for read operation */
26ba25
+    assert(!(flags & BDRV_REQ_NO_SERIALISING));
26ba25
     waited = wait_serialising_requests(req);
26ba25
     assert(!waited || !req->serialising);
26ba25
     assert(req->overlap_offset <= offset);
26ba25
@@ -2847,7 +2849,7 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
26ba25
                                                     BdrvRequestFlags flags,
26ba25
                                                     bool recurse_src)
26ba25
 {
26ba25
-    BdrvTrackedRequest src_req, dst_req;
26ba25
+    BdrvTrackedRequest req;
26ba25
     int ret;
26ba25
 
26ba25
     if (!dst || !dst->bs) {
26ba25
@@ -2876,32 +2878,41 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
26ba25
         return -ENOTSUP;
26ba25
     }
26ba25
 
26ba25
-    bdrv_inc_in_flight(src->bs);
26ba25
-    bdrv_inc_in_flight(dst->bs);
26ba25
-    tracked_request_begin(&src_req, src->bs, src_offset,
26ba25
-                          bytes, BDRV_TRACKED_READ);
26ba25
-    tracked_request_begin(&dst_req, dst->bs, dst_offset,
26ba25
-                          bytes, BDRV_TRACKED_WRITE);
26ba25
-
26ba25
-    if (!(flags & BDRV_REQ_NO_SERIALISING)) {
26ba25
-        wait_serialising_requests(&src_req);
26ba25
-        wait_serialising_requests(&dst_req);
26ba25
-    }
26ba25
     if (recurse_src) {
26ba25
+        bdrv_inc_in_flight(src->bs);
26ba25
+        tracked_request_begin(&req, src->bs, src_offset, bytes,
26ba25
+                              BDRV_TRACKED_READ);
26ba25
+
26ba25
+        if (!(flags & BDRV_REQ_NO_SERIALISING)) {
26ba25
+            wait_serialising_requests(&req;;
26ba25
+        }
26ba25
+
26ba25
         ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
26ba25
                                                     src, src_offset,
26ba25
                                                     dst, dst_offset,
26ba25
                                                     bytes, flags);
26ba25
+
26ba25
+        tracked_request_end(&req;;
26ba25
+        bdrv_dec_in_flight(src->bs);
26ba25
     } else {
26ba25
+        bdrv_inc_in_flight(dst->bs);
26ba25
+        tracked_request_begin(&req, dst->bs, dst_offset, bytes,
26ba25
+                              BDRV_TRACKED_WRITE);
26ba25
+
26ba25
+        /* BDRV_REQ_NO_SERIALISING is only for read operation,
26ba25
+         * so we ignore it in flags.
26ba25
+         */
26ba25
+        wait_serialising_requests(&req;;
26ba25
+
26ba25
         ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
26ba25
                                                   src, src_offset,
26ba25
                                                   dst, dst_offset,
26ba25
                                                   bytes, flags);
26ba25
+
26ba25
+        tracked_request_end(&req;;
26ba25
+        bdrv_dec_in_flight(dst->bs);
26ba25
     }
26ba25
-    tracked_request_end(&src_req);
26ba25
-    tracked_request_end(&dst_req);
26ba25
-    bdrv_dec_in_flight(src->bs);
26ba25
-    bdrv_dec_in_flight(dst->bs);
26ba25
+
26ba25
     return ret;
26ba25
 }
26ba25
 
26ba25
diff --git a/include/block/block.h b/include/block/block.h
26ba25
index e1d5e47..716fb5b 100644
26ba25
--- a/include/block/block.h
26ba25
+++ b/include/block/block.h
26ba25
@@ -50,6 +50,18 @@ typedef enum {
26ba25
      * opened with BDRV_O_UNMAP.
26ba25
      */
26ba25
     BDRV_REQ_MAY_UNMAP          = 0x4,
26ba25
+
26ba25
+    /*
26ba25
+     * The BDRV_REQ_NO_SERIALISING flag is only valid for reads and means that
26ba25
+     * we don't want wait_serialising_requests() during the read operation.
26ba25
+     *
26ba25
+     * This flag is used for backup copy-on-write operations, when we need to
26ba25
+     * read old data before write (write notifier triggered). It is okay since
26ba25
+     * we already waited for other serializing requests in the initiating write
26ba25
+     * (see bdrv_aligned_pwritev), and it is necessary if the initiating write
26ba25
+     * is already serializing (without the flag, the read would deadlock
26ba25
+     * waiting for the serialising write to complete).
26ba25
+     */
26ba25
     BDRV_REQ_NO_SERIALISING     = 0x8,
26ba25
     BDRV_REQ_FUA                = 0x10,
26ba25
     BDRV_REQ_WRITE_COMPRESSED   = 0x20,
26ba25
-- 
26ba25
1.8.3.1
26ba25