Blame SOURCES/kvm-block-io-fix-copy_range.patch

1bdc94
From 75720bedd9d4ba79f7eff5213f30af5e6c475570 Mon Sep 17 00:00:00 2001
1bdc94
From: John Snow <jsnow@redhat.com>
1bdc94
Date: Tue, 24 Jul 2018 12:59:12 +0200
1bdc94
Subject: [PATCH 70/89] block/io: fix copy_range
1bdc94
1bdc94
RH-Author: John Snow <jsnow@redhat.com>
1bdc94
Message-id: <20180718225511.14878-20-jsnow@redhat.com>
1bdc94
Patchwork-id: 81420
1bdc94
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 19/35] block/io: fix copy_range
1bdc94
Bugzilla: 1207657
1bdc94
RH-Acked-by: Eric Blake <eblake@redhat.com>
1bdc94
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
1bdc94
RH-Acked-by: Fam Zheng <famz@redhat.com>
1bdc94
1bdc94
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1bdc94
1bdc94
Here two things are fixed:
1bdc94
1bdc94
1. Architecture
1bdc94
1bdc94
On each recursion step, we go to the child of src or dst, only for one
1bdc94
of them. So, it's wrong to create tracked requests for both on each
1bdc94
step. It leads to tracked requests duplication.
1bdc94
1bdc94
2. Wait for serializing requests on write path independently of
1bdc94
   BDRV_REQ_NO_SERIALISING
1bdc94
1bdc94
Before commit 9ded4a01149 "backup: Use copy offloading",
1bdc94
BDRV_REQ_NO_SERIALISING was used for only one case: read in
1bdc94
copy-on-write operation during backup. Also, the flag was handled only
1bdc94
on read path (in bdrv_co_preadv and bdrv_aligned_preadv).
1bdc94
1bdc94
After 9ded4a01149, flag is used for not waiting serializing operations
1bdc94
on backup target (in same case of copy-on-write operation). This
1bdc94
behavior change is unsubstantiated and potentially dangerous, let's
1bdc94
drop it and add additional asserts and documentation.
1bdc94
1bdc94
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1bdc94
Reviewed-by: Fam Zheng <famz@redhat.com>
1bdc94
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
1bdc94
(cherry picked from commit 999658a05e61a8d87b65827da665302bb44ce8c9)
1bdc94
Signed-off-by: John Snow <jsnow@redhat.com>
1bdc94
---
1bdc94
 block/io.c            | 43 +++++++++++++++++++++++++++----------------
1bdc94
 include/block/block.h | 12 ++++++++++++
1bdc94
 2 files changed, 39 insertions(+), 16 deletions(-)
1bdc94
1bdc94
diff --git a/block/io.c b/block/io.c
1bdc94
index b6754f3..f8de42f 100644
1bdc94
--- a/block/io.c
1bdc94
+++ b/block/io.c
1bdc94
@@ -1505,6 +1505,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
1bdc94
     max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
1bdc94
                                    align);
1bdc94
 
1bdc94
+    /* BDRV_REQ_NO_SERIALISING is only for read operation */
1bdc94
+    assert(!(flags & BDRV_REQ_NO_SERIALISING));
1bdc94
     waited = wait_serialising_requests(req);
1bdc94
     assert(!waited || !req->serialising);
1bdc94
     assert(req->overlap_offset <= offset);
1bdc94
@@ -2847,7 +2849,7 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
1bdc94
                                                     BdrvRequestFlags flags,
1bdc94
                                                     bool recurse_src)
1bdc94
 {
1bdc94
-    BdrvTrackedRequest src_req, dst_req;
1bdc94
+    BdrvTrackedRequest req;
1bdc94
     int ret;
1bdc94
 
1bdc94
     if (!dst || !dst->bs) {
1bdc94
@@ -2876,32 +2878,41 @@ static int coroutine_fn bdrv_co_copy_range_internal(BdrvChild *src,
1bdc94
         return -ENOTSUP;
1bdc94
     }
1bdc94
 
1bdc94
-    bdrv_inc_in_flight(src->bs);
1bdc94
-    bdrv_inc_in_flight(dst->bs);
1bdc94
-    tracked_request_begin(&src_req, src->bs, src_offset,
1bdc94
-                          bytes, BDRV_TRACKED_READ);
1bdc94
-    tracked_request_begin(&dst_req, dst->bs, dst_offset,
1bdc94
-                          bytes, BDRV_TRACKED_WRITE);
1bdc94
-
1bdc94
-    if (!(flags & BDRV_REQ_NO_SERIALISING)) {
1bdc94
-        wait_serialising_requests(&src_req);
1bdc94
-        wait_serialising_requests(&dst_req);
1bdc94
-    }
1bdc94
     if (recurse_src) {
1bdc94
+        bdrv_inc_in_flight(src->bs);
1bdc94
+        tracked_request_begin(&req, src->bs, src_offset, bytes,
1bdc94
+                              BDRV_TRACKED_READ);
1bdc94
+
1bdc94
+        if (!(flags & BDRV_REQ_NO_SERIALISING)) {
1bdc94
+            wait_serialising_requests(&req;;
1bdc94
+        }
1bdc94
+
1bdc94
         ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
1bdc94
                                                     src, src_offset,
1bdc94
                                                     dst, dst_offset,
1bdc94
                                                     bytes, flags);
1bdc94
+
1bdc94
+        tracked_request_end(&req;;
1bdc94
+        bdrv_dec_in_flight(src->bs);
1bdc94
     } else {
1bdc94
+        bdrv_inc_in_flight(dst->bs);
1bdc94
+        tracked_request_begin(&req, dst->bs, dst_offset, bytes,
1bdc94
+                              BDRV_TRACKED_WRITE);
1bdc94
+
1bdc94
+        /* BDRV_REQ_NO_SERIALISING is only for read operation,
1bdc94
+         * so we ignore it in flags.
1bdc94
+         */
1bdc94
+        wait_serialising_requests(&req;;
1bdc94
+
1bdc94
         ret = dst->bs->drv->bdrv_co_copy_range_to(dst->bs,
1bdc94
                                                   src, src_offset,
1bdc94
                                                   dst, dst_offset,
1bdc94
                                                   bytes, flags);
1bdc94
+
1bdc94
+        tracked_request_end(&req;;
1bdc94
+        bdrv_dec_in_flight(dst->bs);
1bdc94
     }
1bdc94
-    tracked_request_end(&src_req);
1bdc94
-    tracked_request_end(&dst_req);
1bdc94
-    bdrv_dec_in_flight(src->bs);
1bdc94
-    bdrv_dec_in_flight(dst->bs);
1bdc94
+
1bdc94
     return ret;
1bdc94
 }
1bdc94
 
1bdc94
diff --git a/include/block/block.h b/include/block/block.h
1bdc94
index e1d5e47..716fb5b 100644
1bdc94
--- a/include/block/block.h
1bdc94
+++ b/include/block/block.h
1bdc94
@@ -50,6 +50,18 @@ typedef enum {
1bdc94
      * opened with BDRV_O_UNMAP.
1bdc94
      */
1bdc94
     BDRV_REQ_MAY_UNMAP          = 0x4,
1bdc94
+
1bdc94
+    /*
1bdc94
+     * The BDRV_REQ_NO_SERIALISING flag is only valid for reads and means that
1bdc94
+     * we don't want wait_serialising_requests() during the read operation.
1bdc94
+     *
1bdc94
+     * This flag is used for backup copy-on-write operations, when we need to
1bdc94
+     * read old data before write (write notifier triggered). It is okay since
1bdc94
+     * we already waited for other serializing requests in the initiating write
1bdc94
+     * (see bdrv_aligned_pwritev), and it is necessary if the initiating write
1bdc94
+     * is already serializing (without the flag, the read would deadlock
1bdc94
+     * waiting for the serialising write to complete).
1bdc94
+     */
1bdc94
     BDRV_REQ_NO_SERIALISING     = 0x8,
1bdc94
     BDRV_REQ_FUA                = 0x10,
1bdc94
     BDRV_REQ_WRITE_COMPRESSED   = 0x20,
1bdc94
-- 
1bdc94
1.8.3.1
1bdc94