ae23c9
From b51c41bfbf422dd3e15b946faa1695571bcbe2df Mon Sep 17 00:00:00 2001
ae23c9
From: Kevin Wolf <kwolf@redhat.com>
ae23c9
Date: Wed, 10 Oct 2018 20:21:50 +0100
ae23c9
Subject: [PATCH 24/49] block: Poll after drain on attaching a node
ae23c9
ae23c9
RH-Author: Kevin Wolf <kwolf@redhat.com>
ae23c9
Message-id: <20181010202213.7372-12-kwolf@redhat.com>
ae23c9
Patchwork-id: 82601
ae23c9
O-Subject: [RHEL-8 qemu-kvm PATCH 21/44] block: Poll after drain on attaching a node
ae23c9
Bugzilla: 1637976
ae23c9
RH-Acked-by: Max Reitz <mreitz@redhat.com>
ae23c9
RH-Acked-by: John Snow <jsnow@redhat.com>
ae23c9
RH-Acked-by: Thomas Huth <thuth@redhat.com>
ae23c9
ae23c9
Commit dcf94a23b1 ('block: Don't poll in parent drain callbacks')
ae23c9
removed polling in bdrv_child_cb_drained_begin() on the grounds that the
ae23c9
original bdrv_drain() already will poll and BdrvChildRole.drained_begin
ae23c9
calls must not cause graph changes (and therefore must not call
ae23c9
aio_poll() or the recursion through the graph will break.
ae23c9
ae23c9
This reasoning is correct for calls through bdrv_do_drained_begin().
ae23c9
However, BdrvChildRole.drained_begin is also called when a node that is
ae23c9
already in a drained section (i.e. bdrv_do_drained_begin() has already
ae23c9
returned and therefore can't poll any more) is attached to a new parent.
ae23c9
In this case, we must explicitly poll to have all requests completed
ae23c9
before the drained new child can be attached to the parent.
ae23c9
ae23c9
In bdrv_replace_child_noperm(), we know that we're not inside the
ae23c9
recursion of bdrv_do_drained_begin() because graph changes are not
ae23c9
allowed there, and bdrv_replace_child_noperm() is a graph change. The
ae23c9
call of BdrvChildRole.drained_begin() must therefore be followed by a
ae23c9
BDRV_POLL_WHILE() that waits for the completion of requests.
ae23c9
ae23c9
Reported-by: Max Reitz <mreitz@redhat.com>
ae23c9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ae23c9
(cherry picked from commit 4be6a6d118123340f16fb8b3bf45220d0f8ed6d4)
ae23c9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ae23c9
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ae23c9
---
ae23c9
 block.c                   |  2 +-
ae23c9
 block/io.c                | 26 ++++++++++++++++++++------
ae23c9
 include/block/block.h     |  8 ++++++++
ae23c9
 include/block/block_int.h |  3 +++
ae23c9
 4 files changed, 32 insertions(+), 7 deletions(-)
ae23c9
ae23c9
diff --git a/block.c b/block.c
ae23c9
index eea9c6f..e89b5e3 100644
ae23c9
--- a/block.c
ae23c9
+++ b/block.c
ae23c9
@@ -2072,7 +2072,7 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
ae23c9
             }
ae23c9
             assert(num >= 0);
ae23c9
             for (i = 0; i < num; i++) {
ae23c9
-                child->role->drained_begin(child);
ae23c9
+                bdrv_parent_drained_begin_single(child, true);
ae23c9
             }
ae23c9
         }
ae23c9
 
ae23c9
diff --git a/block/io.c b/block/io.c
ae23c9
index 38ae299..d404088 100644
ae23c9
--- a/block/io.c
ae23c9
+++ b/block/io.c
ae23c9
@@ -52,9 +52,7 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
ae23c9
         if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
ae23c9
             continue;
ae23c9
         }
ae23c9
-        if (c->role->drained_begin) {
ae23c9
-            c->role->drained_begin(c);
ae23c9
-        }
ae23c9
+        bdrv_parent_drained_begin_single(c, false);
ae23c9
     }
ae23c9
 }
ae23c9
 
ae23c9
@@ -73,6 +71,14 @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
ae23c9
     }
ae23c9
 }
ae23c9
 
ae23c9
+static bool bdrv_parent_drained_poll_single(BdrvChild *c)
ae23c9
+{
ae23c9
+    if (c->role->drained_poll) {
ae23c9
+        return c->role->drained_poll(c);
ae23c9
+    }
ae23c9
+    return false;
ae23c9
+}
ae23c9
+
ae23c9
 static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
ae23c9
                                      bool ignore_bds_parents)
ae23c9
 {
ae23c9
@@ -83,14 +89,22 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
ae23c9
         if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
ae23c9
             continue;
ae23c9
         }
ae23c9
-        if (c->role->drained_poll) {
ae23c9
-            busy |= c->role->drained_poll(c);
ae23c9
-        }
ae23c9
+        busy |= bdrv_parent_drained_poll_single(c);
ae23c9
     }
ae23c9
 
ae23c9
     return busy;
ae23c9
 }
ae23c9
 
ae23c9
+void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
ae23c9
+{
ae23c9
+    if (c->role->drained_begin) {
ae23c9
+        c->role->drained_begin(c);
ae23c9
+    }
ae23c9
+    if (poll) {
ae23c9
+        BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
ae23c9
+    }
ae23c9
+}
ae23c9
+
ae23c9
 static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
ae23c9
 {
ae23c9
     dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
ae23c9
diff --git a/include/block/block.h b/include/block/block.h
ae23c9
index f9079ac..356712c 100644
ae23c9
--- a/include/block/block.h
ae23c9
+++ b/include/block/block.h
ae23c9
@@ -590,6 +590,14 @@ void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
ae23c9
                                bool ignore_bds_parents);
ae23c9
 
ae23c9
 /**
ae23c9
+ * bdrv_parent_drained_begin_single:
ae23c9
+ *
ae23c9
+ * Begin a quiesced section for the parent of @c. If @poll is true, wait for
ae23c9
+ * any pending activity to cease.
ae23c9
+ */
ae23c9
+void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
ae23c9
+
ae23c9
+/**
ae23c9
  * bdrv_parent_drained_end:
ae23c9
  *
ae23c9
  * End a quiesced section of all users of @bs. This is part of
ae23c9
diff --git a/include/block/block_int.h b/include/block/block_int.h
ae23c9
index 9757d5e..b7806e3 100644
ae23c9
--- a/include/block/block_int.h
ae23c9
+++ b/include/block/block_int.h
ae23c9
@@ -610,6 +610,9 @@ struct BdrvChildRole {
ae23c9
      * requests after returning from .drained_begin() until .drained_end() is
ae23c9
      * called.
ae23c9
      *
ae23c9
+     * These functions must not change the graph (and therefore also must not
ae23c9
+     * call aio_poll(), which could change the graph indirectly).
ae23c9
+     *
ae23c9
      * Note that this can be nested. If drained_begin() was called twice, new
ae23c9
      * I/O is allowed only after drained_end() was called twice, too.
ae23c9
      */
ae23c9
-- 
ae23c9
1.8.3.1
ae23c9