Blame SOURCES/kvm-block-Allow-graph-changes-in-bdrv_drain_all_begin-en.patch

357786
From 78fde43fee8eaaa0fd1222844961614f576ebd7c Mon Sep 17 00:00:00 2001
357786
From: Kevin Wolf <kwolf@redhat.com>
357786
Date: Fri, 14 Sep 2018 10:55:17 +0200
357786
Subject: [PATCH 26/49] block: Allow graph changes in bdrv_drain_all_begin/end
357786
 sections
357786
357786
RH-Author: Kevin Wolf <kwolf@redhat.com>
357786
Message-id: <20180914105540.18077-20-kwolf@redhat.com>
357786
Patchwork-id: 82171
357786
O-Subject: [RHV-7.6 qemu-kvm-rhev PATCH 19/42] block: Allow graph changes in bdrv_drain_all_begin/end sections
357786
Bugzilla: 1601212
357786
RH-Acked-by: John Snow <jsnow@redhat.com>
357786
RH-Acked-by: Max Reitz <mreitz@redhat.com>
357786
RH-Acked-by: Fam Zheng <famz@redhat.com>
357786
357786
bdrv_drain_all_*() used bdrv_next() to iterate over all root nodes and
357786
did a subtree drain for each of them. This works fine as long as the
357786
graph is static, but sadly, reality looks different.
357786
357786
If the graph changes so that root nodes are added or removed, we would
357786
have to compensate for this. bdrv_next() returns each root node only
357786
once even if it's the root node for multiple BlockBackends or for a
357786
monitor-owned block driver tree, which would only complicate things.
357786
357786
The much easier and more obviously correct way is to fundamentally
357786
change the way the functions work: Iterate over all BlockDriverStates,
357786
no matter who owns them, and drain them individually. Compensation is
357786
only necessary when a new BDS is created inside a drain_all section.
357786
Removal of a BDS doesn't require any action because it's gone afterwards
357786
anyway.
357786
357786
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
357786
(cherry picked from commit 0f12264e7a41458179ad10276a7c33c72024861a)
357786
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
357786
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
357786
---
357786
 block.c                   | 34 ++++++++++++++++++++++++---
357786
 block/io.c                | 60 ++++++++++++++++++++++++++++++++++++-----------
357786
 include/block/block.h     |  1 +
357786
 include/block/block_int.h |  1 +
357786
 4 files changed, 79 insertions(+), 17 deletions(-)
357786
357786
diff --git a/block.c b/block.c
357786
index 519be6e..eea9c6f 100644
357786
--- a/block.c
357786
+++ b/block.c
357786
@@ -333,6 +333,10 @@ BlockDriverState *bdrv_new(void)
357786
 
357786
     qemu_co_queue_init(&bs->flush_queue);
357786
 
357786
+    for (i = 0; i < bdrv_drain_all_count; i++) {
357786
+        bdrv_drained_begin(bs);
357786
+    }
357786
+
357786
     QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
357786
 
357786
     return bs;
357786
@@ -1170,7 +1174,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
357786
                             int open_flags, Error **errp)
357786
 {
357786
     Error *local_err = NULL;
357786
-    int ret;
357786
+    int i, ret;
357786
 
357786
     bdrv_assign_node_name(bs, node_name, &local_err);
357786
     if (local_err) {
357786
@@ -1218,6 +1222,12 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
357786
     assert(bdrv_min_mem_align(bs) != 0);
357786
     assert(is_power_of_2(bs->bl.request_alignment));
357786
 
357786
+    for (i = 0; i < bs->quiesce_counter; i++) {
357786
+        if (drv->bdrv_co_drain_begin) {
357786
+            drv->bdrv_co_drain_begin(bs);
357786
+        }
357786
+    }
357786
+
357786
     return 0;
357786
 open_failed:
357786
     bs->drv = NULL;
357786
@@ -2039,7 +2049,12 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
357786
             child->role->detach(child);
357786
         }
357786
         if (old_bs->quiesce_counter && child->role->drained_end) {
357786
-            for (i = 0; i < old_bs->quiesce_counter; i++) {
357786
+            int num = old_bs->quiesce_counter;
357786
+            if (child->role->parent_is_bds) {
357786
+                num -= bdrv_drain_all_count;
357786
+            }
357786
+            assert(num >= 0);
357786
+            for (i = 0; i < num; i++) {
357786
                 child->role->drained_end(child);
357786
             }
357786
         }
357786
@@ -2051,7 +2066,12 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
357786
     if (new_bs) {
357786
         QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
357786
         if (new_bs->quiesce_counter && child->role->drained_begin) {
357786
-            for (i = 0; i < new_bs->quiesce_counter; i++) {
357786
+            int num = new_bs->quiesce_counter;
357786
+            if (child->role->parent_is_bds) {
357786
+                num -= bdrv_drain_all_count;
357786
+            }
357786
+            assert(num >= 0);
357786
+            for (i = 0; i < num; i++) {
357786
                 child->role->drained_begin(child);
357786
             }
357786
         }
357786
@@ -3993,6 +4013,14 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
357786
     return QTAILQ_NEXT(bs, node_list);
357786
 }
357786
 
357786
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
357786
+{
357786
+    if (!bs) {
357786
+        return QTAILQ_FIRST(&all_bdrv_states);
357786
+    }
357786
+    return QTAILQ_NEXT(bs, bs_list);
357786
+}
357786
+
357786
 const char *bdrv_get_node_name(const BlockDriverState *bs)
357786
 {
357786
     return bs->node_name;
357786
diff --git a/block/io.c b/block/io.c
357786
index 0021fefd..38ae299 100644
357786
--- a/block/io.c
357786
+++ b/block/io.c
357786
@@ -38,6 +38,8 @@
357786
 /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
357786
 #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
357786
 
357786
+static AioWait drain_all_aio_wait;
357786
+
357786
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
357786
     int64_t offset, int bytes, BdrvRequestFlags flags);
357786
 
357786
@@ -471,6 +473,29 @@ static void bdrv_drain_assert_idle(BlockDriverState *bs)
357786
     }
357786
 }
357786
 
357786
+unsigned int bdrv_drain_all_count = 0;
357786
+
357786
+static bool bdrv_drain_all_poll(void)
357786
+{
357786
+    BlockDriverState *bs = NULL;
357786
+    bool result = false;
357786
+
357786
+    /* Execute pending BHs first (may modify the graph) and check everything
357786
+     * else only after the BHs have executed. */
357786
+    while (aio_poll(qemu_get_aio_context(), false));
357786
+
357786
+    /* bdrv_drain_poll() can't make changes to the graph and we are holding the
357786
+     * main AioContext lock, so iterating bdrv_next_all_states() is safe. */
357786
+    while ((bs = bdrv_next_all_states(bs))) {
357786
+        AioContext *aio_context = bdrv_get_aio_context(bs);
357786
+        aio_context_acquire(aio_context);
357786
+        result |= bdrv_drain_poll(bs, false, NULL, true);
357786
+        aio_context_release(aio_context);
357786
+    }
357786
+
357786
+    return result;
357786
+}
357786
+
357786
 /*
357786
  * Wait for pending requests to complete across all BlockDriverStates
357786
  *
357786
@@ -485,45 +510,51 @@ static void bdrv_drain_assert_idle(BlockDriverState *bs)
357786
  */
357786
 void bdrv_drain_all_begin(void)
357786
 {
357786
-    BlockDriverState *bs;
357786
-    BdrvNextIterator it;
357786
+    BlockDriverState *bs = NULL;
357786
 
357786
     if (qemu_in_coroutine()) {
357786
-        bdrv_co_yield_to_drain(NULL, true, false, NULL, false, true);
357786
+        bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
357786
         return;
357786
     }
357786
 
357786
-    /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
357786
-     * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
357786
-     * nodes in several different AioContexts, so make sure we're in the main
357786
-     * context. */
357786
+    /* AIO_WAIT_WHILE() with a NULL context can only be called from the main
357786
+     * loop AioContext, so make sure we're in the main context. */
357786
     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
357786
+    assert(bdrv_drain_all_count < INT_MAX);
357786
+    bdrv_drain_all_count++;
357786
 
357786
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
357786
+    /* Quiesce all nodes, without polling in-flight requests yet. The graph
357786
+     * cannot change during this loop. */
357786
+    while ((bs = bdrv_next_all_states(bs))) {
357786
         AioContext *aio_context = bdrv_get_aio_context(bs);
357786
 
357786
         aio_context_acquire(aio_context);
357786
-        bdrv_do_drained_begin(bs, true, NULL, false, true);
357786
+        bdrv_do_drained_begin(bs, false, NULL, true, false);
357786
         aio_context_release(aio_context);
357786
     }
357786
 
357786
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
357786
+    /* Now poll the in-flight requests */
357786
+    AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
357786
+
357786
+    while ((bs = bdrv_next_all_states(bs))) {
357786
         bdrv_drain_assert_idle(bs);
357786
     }
357786
 }
357786
 
357786
 void bdrv_drain_all_end(void)
357786
 {
357786
-    BlockDriverState *bs;
357786
-    BdrvNextIterator it;
357786
+    BlockDriverState *bs = NULL;
357786
 
357786
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
357786
+    while ((bs = bdrv_next_all_states(bs))) {
357786
         AioContext *aio_context = bdrv_get_aio_context(bs);
357786
 
357786
         aio_context_acquire(aio_context);
357786
-        bdrv_do_drained_end(bs, true, NULL, false);
357786
+        bdrv_do_drained_end(bs, false, NULL, true);
357786
         aio_context_release(aio_context);
357786
     }
357786
+
357786
+    assert(bdrv_drain_all_count > 0);
357786
+    bdrv_drain_all_count--;
357786
 }
357786
 
357786
 void bdrv_drain_all(void)
357786
@@ -658,6 +689,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs)
357786
 void bdrv_wakeup(BlockDriverState *bs)
357786
 {
357786
     aio_wait_kick(bdrv_get_aio_wait(bs));
357786
+    aio_wait_kick(&drain_all_aio_wait);
357786
 }
357786
 
357786
 void bdrv_dec_in_flight(BlockDriverState *bs)
357786
diff --git a/include/block/block.h b/include/block/block.h
357786
index 6e91803..f9079ac 100644
357786
--- a/include/block/block.h
357786
+++ b/include/block/block.h
357786
@@ -449,6 +449,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
357786
                                  Error **errp);
357786
 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
357786
 BlockDriverState *bdrv_next_node(BlockDriverState *bs);
357786
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
357786
 
357786
 typedef struct BdrvNextIterator {
357786
     enum {
357786
diff --git a/include/block/block_int.h b/include/block/block_int.h
357786
index 0ad8a76..9757d5e 100644
357786
--- a/include/block/block_int.h
357786
+++ b/include/block/block_int.h
357786
@@ -845,6 +845,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
357786
     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
357786
     BdrvRequestFlags flags);
357786
 
357786
+extern unsigned int bdrv_drain_all_count;
357786
 void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
357786
 void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
357786
 
357786
-- 
357786
1.8.3.1
357786