Blame SOURCES/kvm-block-Don-t-manually-poll-in-bdrv_drain_all.patch

1bdc94
From 9006222a2f826c5760f305bbd879f1b7ce3563b6 Mon Sep 17 00:00:00 2001
1bdc94
From: Kevin Wolf <kwolf@redhat.com>
1bdc94
Date: Fri, 14 Sep 2018 10:55:02 +0200
1bdc94
Subject: [PATCH 11/49] block: Don't manually poll in bdrv_drain_all()
1bdc94
1bdc94
RH-Author: Kevin Wolf <kwolf@redhat.com>
1bdc94
Message-id: <20180914105540.18077-5-kwolf@redhat.com>
1bdc94
Patchwork-id: 82157
1bdc94
O-Subject: [RHV-7.6 qemu-kvm-rhev PATCH 04/42] block: Don't manually poll in bdrv_drain_all()
1bdc94
Bugzilla: 1601212
1bdc94
RH-Acked-by: John Snow <jsnow@redhat.com>
1bdc94
RH-Acked-by: Max Reitz <mreitz@redhat.com>
1bdc94
RH-Acked-by: Fam Zheng <famz@redhat.com>
1bdc94
1bdc94
All involved nodes are already idle, we called bdrv_do_drain_begin() on
1bdc94
them.
1bdc94
1bdc94
The comment in the code suggested that this was not correct because the
1bdc94
completion of a request on one node could spawn a new request on a
1bdc94
different node (which might have been drained before, so we wouldn't
1bdc94
drain the new request). In reality, new requests to different nodes
1bdc94
aren't spawned out of nothing, but only in the context of a parent
1bdc94
request, and they aren't submitted to random nodes, but only to child
1bdc94
nodes. As long as we still poll for the completion of the parent request
1bdc94
(which we do), draining each root node separately is good enough.
1bdc94
1bdc94
Remove the additional polling code from bdrv_drain_all_begin() and
1bdc94
replace it with an assertion that all nodes are already idle after we
1bdc94
drained them separately.
1bdc94
1bdc94
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
1bdc94
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
1bdc94
(cherry picked from commit c13ad59f012cbbccb866a10477458e69bc868dbb)
1bdc94
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
1bdc94
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
1bdc94
---
1bdc94
 block/io.c | 41 ++++++++++++-----------------------------
1bdc94
 1 file changed, 12 insertions(+), 29 deletions(-)
1bdc94
1bdc94
diff --git a/block/io.c b/block/io.c
1bdc94
index aa41f1e..e5fc42c 100644
1bdc94
--- a/block/io.c
1bdc94
+++ b/block/io.c
1bdc94
@@ -376,6 +376,16 @@ void bdrv_drain(BlockDriverState *bs)
1bdc94
     bdrv_drained_end(bs);
1bdc94
 }
1bdc94
 
1bdc94
+static void bdrv_drain_assert_idle(BlockDriverState *bs)
1bdc94
+{
1bdc94
+    BdrvChild *child, *next;
1bdc94
+
1bdc94
+    assert(atomic_read(&bs->in_flight) == 0);
1bdc94
+    QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1bdc94
+        bdrv_drain_assert_idle(child->bs);
1bdc94
+    }
1bdc94
+}
1bdc94
+
1bdc94
 /*
1bdc94
  * Wait for pending requests to complete across all BlockDriverStates
1bdc94
  *
1bdc94
@@ -390,11 +400,8 @@ void bdrv_drain(BlockDriverState *bs)
1bdc94
  */
1bdc94
 void bdrv_drain_all_begin(void)
1bdc94
 {
1bdc94
-    /* Always run first iteration so any pending completion BHs run */
1bdc94
-    bool waited = true;
1bdc94
     BlockDriverState *bs;
1bdc94
     BdrvNextIterator it;
1bdc94
-    GSList *aio_ctxs = NULL, *ctx;
1bdc94
 
1bdc94
     /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
1bdc94
      * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
1bdc94
@@ -408,35 +415,11 @@ void bdrv_drain_all_begin(void)
1bdc94
         aio_context_acquire(aio_context);
1bdc94
         bdrv_do_drained_begin(bs, true, NULL);
1bdc94
         aio_context_release(aio_context);
1bdc94
-
1bdc94
-        if (!g_slist_find(aio_ctxs, aio_context)) {
1bdc94
-            aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
1bdc94
-        }
1bdc94
     }
1bdc94
 
1bdc94
-    /* Note that completion of an asynchronous I/O operation can trigger any
1bdc94
-     * number of other I/O operations on other devices---for example a
1bdc94
-     * coroutine can submit an I/O request to another device in response to
1bdc94
-     * request completion.  Therefore we must keep looping until there was no
1bdc94
-     * more activity rather than simply draining each device independently.
1bdc94
-     */
1bdc94
-    while (waited) {
1bdc94
-        waited = false;
1bdc94
-
1bdc94
-        for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
1bdc94
-            AioContext *aio_context = ctx->data;
1bdc94
-
1bdc94
-            aio_context_acquire(aio_context);
1bdc94
-            for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
1bdc94
-                if (aio_context == bdrv_get_aio_context(bs)) {
1bdc94
-                    waited |= bdrv_drain_recurse(bs);
1bdc94
-                }
1bdc94
-            }
1bdc94
-            aio_context_release(aio_context);
1bdc94
-        }
1bdc94
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
1bdc94
+        bdrv_drain_assert_idle(bs);
1bdc94
     }
1bdc94
-
1bdc94
-    g_slist_free(aio_ctxs);
1bdc94
 }
1bdc94
 
1bdc94
 void bdrv_drain_all_end(void)
1bdc94
-- 
1bdc94
1.8.3.1
1bdc94