26ba25
From b51c41bfbf422dd3e15b946faa1695571bcbe2df Mon Sep 17 00:00:00 2001
26ba25
From: Kevin Wolf <kwolf@redhat.com>
26ba25
Date: Wed, 10 Oct 2018 20:21:50 +0100
26ba25
Subject: [PATCH 24/49] block: Poll after drain on attaching a node
26ba25
26ba25
RH-Author: Kevin Wolf <kwolf@redhat.com>
26ba25
Message-id: <20181010202213.7372-12-kwolf@redhat.com>
26ba25
Patchwork-id: 82601
26ba25
O-Subject: [RHEL-8 qemu-kvm PATCH 21/44] block: Poll after drain on attaching a node
26ba25
Bugzilla: 1637976
26ba25
RH-Acked-by: Max Reitz <mreitz@redhat.com>
26ba25
RH-Acked-by: John Snow <jsnow@redhat.com>
26ba25
RH-Acked-by: Thomas Huth <thuth@redhat.com>
26ba25
26ba25
Commit dcf94a23b1 ('block: Don't poll in parent drain callbacks')
26ba25
removed polling in bdrv_child_cb_drained_begin() on the grounds that the
26ba25
original bdrv_drain() already will poll and BdrvChildRole.drained_begin
26ba25
calls must not cause graph changes (and therefore must not call
26ba25
aio_poll() or the recursion through the graph will break.
26ba25
26ba25
This reasoning is correct for calls through bdrv_do_drained_begin().
26ba25
However, BdrvChildRole.drained_begin is also called when a node that is
26ba25
already in a drained section (i.e. bdrv_do_drained_begin() has already
26ba25
returned and therefore can't poll any more) is attached to a new parent.
26ba25
In this case, we must explicitly poll to have all requests completed
26ba25
before the drained new child can be attached to the parent.
26ba25
26ba25
In bdrv_replace_child_noperm(), we know that we're not inside the
26ba25
recursion of bdrv_do_drained_begin() because graph changes are not
26ba25
allowed there, and bdrv_replace_child_noperm() is a graph change. The
26ba25
call of BdrvChildRole.drained_begin() must therefore be followed by a
26ba25
BDRV_POLL_WHILE() that waits for the completion of requests.
26ba25
26ba25
Reported-by: Max Reitz <mreitz@redhat.com>
26ba25
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
26ba25
(cherry picked from commit 4be6a6d118123340f16fb8b3bf45220d0f8ed6d4)
26ba25
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
26ba25
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
26ba25
---
26ba25
 block.c                   |  2 +-
26ba25
 block/io.c                | 26 ++++++++++++++++++++------
26ba25
 include/block/block.h     |  8 ++++++++
26ba25
 include/block/block_int.h |  3 +++
26ba25
 4 files changed, 32 insertions(+), 7 deletions(-)
26ba25
26ba25
diff --git a/block.c b/block.c
26ba25
index eea9c6f..e89b5e3 100644
26ba25
--- a/block.c
26ba25
+++ b/block.c
26ba25
@@ -2072,7 +2072,7 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
26ba25
             }
26ba25
             assert(num >= 0);
26ba25
             for (i = 0; i < num; i++) {
26ba25
-                child->role->drained_begin(child);
26ba25
+                bdrv_parent_drained_begin_single(child, true);
26ba25
             }
26ba25
         }
26ba25
 
26ba25
diff --git a/block/io.c b/block/io.c
26ba25
index 38ae299..d404088 100644
26ba25
--- a/block/io.c
26ba25
+++ b/block/io.c
26ba25
@@ -52,9 +52,7 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
26ba25
         if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
26ba25
             continue;
26ba25
         }
26ba25
-        if (c->role->drained_begin) {
26ba25
-            c->role->drained_begin(c);
26ba25
-        }
26ba25
+        bdrv_parent_drained_begin_single(c, false);
26ba25
     }
26ba25
 }
26ba25
 
26ba25
@@ -73,6 +71,14 @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
26ba25
     }
26ba25
 }
26ba25
 
26ba25
+static bool bdrv_parent_drained_poll_single(BdrvChild *c)
26ba25
+{
26ba25
+    if (c->role->drained_poll) {
26ba25
+        return c->role->drained_poll(c);
26ba25
+    }
26ba25
+    return false;
26ba25
+}
26ba25
+
26ba25
 static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
26ba25
                                      bool ignore_bds_parents)
26ba25
 {
26ba25
@@ -83,14 +89,22 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
26ba25
         if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
26ba25
             continue;
26ba25
         }
26ba25
-        if (c->role->drained_poll) {
26ba25
-            busy |= c->role->drained_poll(c);
26ba25
-        }
26ba25
+        busy |= bdrv_parent_drained_poll_single(c);
26ba25
     }
26ba25
 
26ba25
     return busy;
26ba25
 }
26ba25
 
26ba25
+void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
26ba25
+{
26ba25
+    if (c->role->drained_begin) {
26ba25
+        c->role->drained_begin(c);
26ba25
+    }
26ba25
+    if (poll) {
26ba25
+        BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
26ba25
+    }
26ba25
+}
26ba25
+
26ba25
 static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
26ba25
 {
26ba25
     dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
26ba25
diff --git a/include/block/block.h b/include/block/block.h
26ba25
index f9079ac..356712c 100644
26ba25
--- a/include/block/block.h
26ba25
+++ b/include/block/block.h
26ba25
@@ -590,6 +590,14 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
26ba25
                                bool ignore_bds_parents);
26ba25
 
26ba25
 /**
26ba25
+ * bdrv_parent_drained_begin_single:
26ba25
+ *
26ba25
+ * Begin a quiesced section for the parent of @c. If @poll is true, wait for
26ba25
+ * any pending activity to cease.
26ba25
+ */
26ba25
+void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
26ba25
+
26ba25
+/**
26ba25
  * bdrv_parent_drained_end:
26ba25
  *
26ba25
  * End a quiesced section of all users of @bs. This is part of
26ba25
diff --git a/include/block/block_int.h b/include/block/block_int.h
26ba25
index 9757d5e..b7806e3 100644
26ba25
--- a/include/block/block_int.h
26ba25
+++ b/include/block/block_int.h
26ba25
@@ -610,6 +610,9 @@ struct BdrvChildRole {
26ba25
      * requests after returning from .drained_begin() until .drained_end() is
26ba25
      * called.
26ba25
      *
26ba25
+     * These functions must not change the graph (and therefore also must not
26ba25
+     * call aio_poll(), which could change the graph indirectly).
26ba25
+     *
26ba25
      * Note that this can be nested. If drained_begin() was called twice, new
26ba25
      * I/O is allowed only after drained_end() was called twice, too.
26ba25
      */
26ba25
-- 
26ba25
1.8.3.1
26ba25