Blame SOURCES/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch

ed5979
From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001
ed5979
From: Kevin Wolf <kwolf@redhat.com>
ed5979
Date: Fri, 18 Nov 2022 18:41:09 +0100
ed5979
Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm()
ed5979
ed5979
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
ed5979
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
ed5979
RH-Bugzilla: 2155112
ed5979
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
ed5979
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
ed5979
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
ed5979
RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s)
ed5979
ed5979
In order to make sure that bdrv_replace_child_noperm() doesn't have to
ed5979
poll any more, get rid of the bdrv_parent_drained_begin_single() call.
ed5979
ed5979
This is possible now because we can require that the parent is already
ed5979
drained through the child in question when the function is called and we
ed5979
don't call the parent drain callbacks more than once.
ed5979
ed5979
The additional drain calls needed in callers cause the test case to run
ed5979
its code in the drain handler too early (bdrv_attach_child() drains
ed5979
now), so modify it to only enable the code after the test setup has
ed5979
completed.
ed5979
ed5979
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ed5979
Message-Id: <20221118174110.55183-15-kwolf@redhat.com>
ed5979
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
ed5979
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
ed5979
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ed5979
(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4)
ed5979
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
ed5979
---
ed5979
 block.c                      | 103 ++++++++++++++++++++++++++++++-----
ed5979
 block/io.c                   |   2 +-
ed5979
 include/block/block-io.h     |   8 +++
ed5979
 tests/unit/test-bdrv-drain.c |  10 ++++
ed5979
 4 files changed, 108 insertions(+), 15 deletions(-)
ed5979
ed5979
diff --git a/block.c b/block.c
ed5979
index af31a94863..65588d313a 100644
ed5979
--- a/block.c
ed5979
+++ b/block.c
ed5979
@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque)
ed5979
 
ed5979
     GLOBAL_STATE_CODE();
ed5979
     /* old_bs reference is transparently moved from @s to @s->child */
ed5979
+    if (!s->child->bs) {
ed5979
+        /*
ed5979
+         * The parents were undrained when removing old_bs from the child. New
ed5979
+         * requests can't have been made, though, because the child was empty.
ed5979
+         *
ed5979
+         * TODO Make bdrv_replace_child_noperm() transactionable to avoid
ed5979
+         * undraining the parent in the first place. Once this is done, having
ed5979
+         * new_bs drained when calling bdrv_replace_child_tran() is not a
ed5979
+         * requirement any more.
ed5979
+         */
ed5979
+        bdrv_parent_drained_begin_single(s->child, false);
ed5979
+        assert(!bdrv_parent_drained_poll_single(s->child));
ed5979
+    }
ed5979
+    assert(s->child->quiesced_parent);
ed5979
     bdrv_replace_child_noperm(s->child, s->old_bs);
ed5979
     bdrv_unref(new_bs);
ed5979
 }
ed5979
@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = {
ed5979
  *
ed5979
  * Note: real unref of old_bs is done only on commit.
ed5979
  *
ed5979
+ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
ed5979
+ * kept drained until the transaction is completed.
ed5979
+ *
ed5979
  * The function doesn't update permissions, caller is responsible for this.
ed5979
  */
ed5979
 static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
ed5979
                                     Transaction *tran)
ed5979
 {
ed5979
     BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
ed5979
+
ed5979
+    assert(child->quiesced_parent);
ed5979
+    assert(!new_bs || new_bs->quiesce_counter);
ed5979
+
ed5979
     *s = (BdrvReplaceChildState) {
ed5979
         .child = child,
ed5979
         .old_bs = child->bs,
ed5979
@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
ed5979
     return permissions[qapi_perm];
ed5979
 }
ed5979
 
ed5979
+/*
ed5979
+ * Replaces the node that a BdrvChild points to without updating permissions.
ed5979
+ *
ed5979
+ * If @new_bs is non-NULL, the parent of @child must already be drained through
ed5979
+ * @child.
ed5979
+ *
ed5979
+ * This function does not poll.
ed5979
+ */
ed5979
 static void bdrv_replace_child_noperm(BdrvChild *child,
ed5979
                                       BlockDriverState *new_bs)
ed5979
 {
ed5979
@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
ed5979
     int new_bs_quiesce_counter;
ed5979
 
ed5979
     assert(!child->frozen);
ed5979
+
ed5979
+    /*
ed5979
+     * If we want to change the BdrvChild to point to a drained node as its new
ed5979
+     * child->bs, we need to make sure that its new parent is drained, too. In
ed5979
+     * other words, either child->quiesce_parent must already be true or we must
ed5979
+     * be able to set it and keep the parent's quiesce_counter consistent with
ed5979
+     * that, but without polling or starting new requests (this function
ed5979
+     * guarantees that it doesn't poll, and starting new requests would be
ed5979
+     * against the invariants of drain sections).
ed5979
+     *
ed5979
+     * To keep things simple, we pick the first option (child->quiesce_parent
ed5979
+     * must already be true). We also generalise the rule a bit to make it
ed5979
+     * easier to verify in callers and more likely to be covered in test cases:
ed5979
+     * The parent must be quiesced through this child even if new_bs isn't
ed5979
+     * currently drained.
ed5979
+     *
ed5979
+     * The only exception is for callers that always pass new_bs == NULL. In
ed5979
+     * this case, we obviously never need to consider the case of a drained
ed5979
+     * new_bs, so we can keep the callers simpler by allowing them not to drain
ed5979
+     * the parent.
ed5979
+     */
ed5979
+    assert(!new_bs || child->quiesced_parent);
ed5979
     assert(old_bs != new_bs);
ed5979
     GLOBAL_STATE_CODE();
ed5979
 
ed5979
@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
ed5979
         assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
ed5979
     }
ed5979
 
ed5979
-    /*
ed5979
-     * If the new child node is drained but the old one was not, flush
ed5979
-     * all outstanding requests to the old child node.
ed5979
-     */
ed5979
-    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
ed5979
-    if (new_bs_quiesce_counter && !child->quiesced_parent) {
ed5979
-        bdrv_parent_drained_begin_single(child, true);
ed5979
-    }
ed5979
-
ed5979
     if (old_bs) {
ed5979
         if (child->klass->detach) {
ed5979
             child->klass->detach(child);
ed5979
@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
ed5979
     }
ed5979
 
ed5979
     /*
ed5979
-     * If the old child node was drained but the new one is not, allow
ed5979
-     * requests to come in only after the new node has been attached.
ed5979
-     *
ed5979
-     * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
ed5979
-     * polls, which could have changed the value.
ed5979
+     * If the parent was drained through this BdrvChild previously, but new_bs
ed5979
+     * is not drained, allow requests to come in only after the new node has
ed5979
+     * been attached.
ed5979
      */
ed5979
     new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
ed5979
     if (!new_bs_quiesce_counter && child->quiesced_parent) {
ed5979
@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
ed5979
     }
ed5979
 
ed5979
     bdrv_ref(child_bs);
ed5979
+    /*
ed5979
+     * Let every new BdrvChild start with a drained parent. Inserting the child
ed5979
+     * in the graph with bdrv_replace_child_noperm() will undrain it if
ed5979
+     * @child_bs is not drained.
ed5979
+     *
ed5979
+     * The child was only just created and is not yet visible in global state
ed5979
+     * until bdrv_replace_child_noperm() inserts it into the graph, so nobody
ed5979
+     * could have sent requests and polling is not necessary.
ed5979
+     *
ed5979
+     * Note that this means that the parent isn't fully drained yet, we only
ed5979
+     * stop new requests from coming in. This is fine, we don't care about the
ed5979
+     * old requests here, they are not for this child. If another place enters a
ed5979
+     * drain section for the same parent, but wants it to be fully quiesced, it
ed5979
+     * will not run most of the the code in .drained_begin() again (which is not
ed5979
+     * a problem, we already did this), but it will still poll until the parent
ed5979
+     * is fully quiesced, so it will not be negatively affected either.
ed5979
+     */
ed5979
+    bdrv_parent_drained_begin_single(new_child, false);
ed5979
     bdrv_replace_child_noperm(new_child, child_bs);
ed5979
 
ed5979
     BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
ed5979
@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
ed5979
     }
ed5979
 
ed5979
     if (child->bs) {
ed5979
+        BlockDriverState *bs = child->bs;
ed5979
+        bdrv_drained_begin(bs);
ed5979
         bdrv_replace_child_tran(child, NULL, tran);
ed5979
+        bdrv_drained_end(bs);
ed5979
     }
ed5979
 
ed5979
     tran_add(tran, &bdrv_remove_child_drv, child);
ed5979
 }
ed5979
 
ed5979
+static void undrain_on_clean_cb(void *opaque)
ed5979
+{
ed5979
+    bdrv_drained_end(opaque);
ed5979
+}
ed5979
+
ed5979
+static TransactionActionDrv undrain_on_clean = {
ed5979
+    .clean = undrain_on_clean_cb,
ed5979
+};
ed5979
+
ed5979
 static int bdrv_replace_node_noperm(BlockDriverState *from,
ed5979
                                     BlockDriverState *to,
ed5979
                                     bool auto_skip, Transaction *tran,
ed5979
@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
ed5979
 
ed5979
     GLOBAL_STATE_CODE();
ed5979
 
ed5979
+    bdrv_drained_begin(from);
ed5979
+    bdrv_drained_begin(to);
ed5979
+    tran_add(tran, &undrain_on_clean, from);
ed5979
+    tran_add(tran, &undrain_on_clean, to);
ed5979
+
ed5979
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
ed5979
         assert(c->bs == from);
ed5979
         if (!should_update_child(c, to)) {
ed5979
diff --git a/block/io.c b/block/io.c
ed5979
index 5e9150d92c..ae64830eac 100644
ed5979
--- a/block/io.c
ed5979
+++ b/block/io.c
ed5979
@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
ed5979
     }
ed5979
 }
ed5979
 
ed5979
-static bool bdrv_parent_drained_poll_single(BdrvChild *c)
ed5979
+bool bdrv_parent_drained_poll_single(BdrvChild *c)
ed5979
 {
ed5979
     if (c->klass->drained_poll) {
ed5979
         return c->klass->drained_poll(c);
ed5979
diff --git a/include/block/block-io.h b/include/block/block-io.h
ed5979
index 8f5e75756a..65e6d2569b 100644
ed5979
--- a/include/block/block-io.h
ed5979
+++ b/include/block/block-io.h
ed5979
@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
ed5979
  */
ed5979
 void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
ed5979
 
ed5979
+/**
ed5979
+ * bdrv_parent_drained_poll_single:
ed5979
+ *
ed5979
+ * Returns true if there is any pending activity to cease before @c can be
ed5979
+ * called quiesced, false otherwise.
ed5979
+ */
ed5979
+bool bdrv_parent_drained_poll_single(BdrvChild *c);
ed5979
+
ed5979
 /**
ed5979
  * bdrv_parent_drained_end_single:
ed5979
  *
ed5979
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
ed5979
index 172bc6debc..2686a8acee 100644
ed5979
--- a/tests/unit/test-bdrv-drain.c
ed5979
+++ b/tests/unit/test-bdrv-drain.c
ed5979
@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void)
ed5979
 
ed5979
 
ed5979
 typedef struct BDRVReplaceTestState {
ed5979
+    bool setup_completed;
ed5979
     bool was_drained;
ed5979
     bool was_undrained;
ed5979
     bool has_read;
ed5979
@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
ed5979
 {
ed5979
     BDRVReplaceTestState *s = bs->opaque;
ed5979
 
ed5979
+    if (!s->setup_completed) {
ed5979
+        return;
ed5979
+    }
ed5979
+
ed5979
     if (!s->drain_count) {
ed5979
         s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs);
ed5979
         bdrv_inc_in_flight(bs);
ed5979
@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs)
ed5979
 {
ed5979
     BDRVReplaceTestState *s = bs->opaque;
ed5979
 
ed5979
+    if (!s->setup_completed) {
ed5979
+        return;
ed5979
+    }
ed5979
+
ed5979
     g_assert(s->drain_count > 0);
ed5979
     if (!--s->drain_count) {
ed5979
         s->was_undrained = true;
ed5979
@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
ed5979
     bdrv_ref(old_child_bs);
ed5979
     bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds,
ed5979
                       BDRV_CHILD_COW, &error_abort);
ed5979
+    parent_s->setup_completed = true;
ed5979
 
ed5979
     for (i = 0; i < old_drain_count; i++) {
ed5979
         bdrv_drained_begin(old_child_bs);
ed5979
-- 
ed5979
2.31.1
ed5979