7f1c5b
From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001
7f1c5b
From: Kevin Wolf <kwolf@redhat.com>
7f1c5b
Date: Fri, 18 Nov 2022 18:41:06 +0100
7f1c5b
Subject: [PATCH 24/31] block: Call drain callbacks only once
7f1c5b
7f1c5b
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
7f1c5b
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
7f1c5b
RH-Bugzilla: 2155112
7f1c5b
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
7f1c5b
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
7f1c5b
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
7f1c5b
RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s)
7f1c5b
7f1c5b
We only need to call both the BlockDriver's callback and the parent
7f1c5b
callbacks when going from undrained to drained or vice versa. A second
7f1c5b
drain section doesn't make a difference for the driver or the parent,
7f1c5b
they weren't supposed to send new requests before and after the second
7f1c5b
drain.
7f1c5b
7f1c5b
One thing that gets in the way is the 'ignore_bds_parents' parameter in
7f1c5b
bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that
7f1c5b
bdrv_drain_all_begin() increases bs->quiesce_counter, but does not
7f1c5b
quiesce the parent through BdrvChildClass callbacks. If an additional
7f1c5b
drain section is started now, bs->quiesce_counter will be non-zero, but
7f1c5b
we would still need to quiesce the parent through BdrvChildClass in
7f1c5b
order to keep things consistent (and unquiesce it on the matching
7f1c5b
bdrv_drained_end(), even though the counter would not reach 0 yet as
7f1c5b
long as the bdrv_drain_all() section is still active).
7f1c5b
7f1c5b
Instead of keeping track of this, let's just get rid of the parameter.
7f1c5b
It was introduced in commit 6cd5c9d7b2d as an optimisation so that
7f1c5b
during bdrv_drain_all(), we wouldn't recursively drain all parents up to
7f1c5b
the root for each node, resulting in quadratic complexity. As it happens,
7f1c5b
calling the callbacks only once solves the same problem, so as of this
7f1c5b
patch, we'll still have O(n) complexity and ignore_bds_parents is not
7f1c5b
needed any more.
7f1c5b
7f1c5b
This patch only ignores the 'ignore_bds_parents' parameter. It will be
7f1c5b
removed in a separate patch.
7f1c5b
7f1c5b
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7f1c5b
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
7f1c5b
Message-Id: <20221118174110.55183-12-kwolf@redhat.com>
7f1c5b
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
7f1c5b
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7f1c5b
(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1)
7f1c5b
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
7f1c5b
---
7f1c5b
 block.c                          | 25 +++++++------------------
7f1c5b
 block/io.c                       | 30 ++++++++++++++++++------------
7f1c5b
 include/block/block_int-common.h |  8 ++++----
7f1c5b
 tests/unit/test-bdrv-drain.c     | 16 ++++++++++------
7f1c5b
 4 files changed, 39 insertions(+), 40 deletions(-)
7f1c5b
7f1c5b
diff --git a/block.c b/block.c
7f1c5b
index e0e3b21790..5a583e260d 100644
7f1c5b
--- a/block.c
7f1c5b
+++ b/block.c
7f1c5b
@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
7f1c5b
 {
7f1c5b
     BlockDriverState *old_bs = child->bs;
7f1c5b
     int new_bs_quiesce_counter;
7f1c5b
-    int drain_saldo;
7f1c5b
 
7f1c5b
     assert(!child->frozen);
7f1c5b
     assert(old_bs != new_bs);
7f1c5b
@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
7f1c5b
         assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
7f1c5b
     }
7f1c5b
 
7f1c5b
-    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
7f1c5b
-    drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
7f1c5b
-
7f1c5b
     /*
7f1c5b
      * If the new child node is drained but the old one was not, flush
7f1c5b
      * all outstanding requests to the old child node.
7f1c5b
      */
7f1c5b
-    while (drain_saldo > 0 && child->klass->drained_begin) {
7f1c5b
+    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
7f1c5b
+    if (new_bs_quiesce_counter && !child->quiesced_parent) {
7f1c5b
         bdrv_parent_drained_begin_single(child, true);
7f1c5b
-        drain_saldo--;
7f1c5b
     }
7f1c5b
 
7f1c5b
     if (old_bs) {
7f1c5b
@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
7f1c5b
     if (new_bs) {
7f1c5b
         assert_bdrv_graph_writable(new_bs);
7f1c5b
         QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
7f1c5b
-
7f1c5b
-        /*
7f1c5b
-         * Polling in bdrv_parent_drained_begin_single() may have led to the new
7f1c5b
-         * node's quiesce_counter having been decreased.  Not a problem, we just
7f1c5b
-         * need to recognize this here and then invoke drained_end appropriately
7f1c5b
-         * more often.
7f1c5b
-         */
7f1c5b
-        assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
7f1c5b
-        drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
7f1c5b
-
7f1c5b
         if (child->klass->attach) {
7f1c5b
             child->klass->attach(child);
7f1c5b
         }
7f1c5b
@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
7f1c5b
     /*
7f1c5b
      * If the old child node was drained but the new one is not, allow
7f1c5b
      * requests to come in only after the new node has been attached.
7f1c5b
+     *
7f1c5b
+     * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
7f1c5b
+     * polls, which could have changed the value.
7f1c5b
      */
7f1c5b
-    while (drain_saldo < 0 && child->klass->drained_end) {
7f1c5b
+    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
7f1c5b
+    if (!new_bs_quiesce_counter && child->quiesced_parent) {
7f1c5b
         bdrv_parent_drained_end_single(child);
7f1c5b
-        drain_saldo++;
7f1c5b
     }
7f1c5b
 }
7f1c5b
 
7f1c5b
diff --git a/block/io.c b/block/io.c
7f1c5b
index 75224480d0..87d6f22ec4 100644
7f1c5b
--- a/block/io.c
7f1c5b
+++ b/block/io.c
7f1c5b
@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
7f1c5b
 {
7f1c5b
     IO_OR_GS_CODE();
7f1c5b
 
7f1c5b
-    assert(c->parent_quiesce_counter > 0);
7f1c5b
-    c->parent_quiesce_counter--;
7f1c5b
+    assert(c->quiesced_parent);
7f1c5b
+    c->quiesced_parent = false;
7f1c5b
+
7f1c5b
     if (c->klass->drained_end) {
7f1c5b
         c->klass->drained_end(c);
7f1c5b
     }
7f1c5b
@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
7f1c5b
 {
7f1c5b
     AioContext *ctx = bdrv_child_get_parent_aio_context(c);
7f1c5b
     IO_OR_GS_CODE();
7f1c5b
-    c->parent_quiesce_counter++;
7f1c5b
+
7f1c5b
+    assert(!c->quiesced_parent);
7f1c5b
+    c->quiesced_parent = true;
7f1c5b
+
7f1c5b
     if (c->klass->drained_begin) {
7f1c5b
         c->klass->drained_begin(c);
7f1c5b
     }
7f1c5b
@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
7f1c5b
     /* Stop things in parent-to-child order */
7f1c5b
     if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
7f1c5b
         aio_disable_external(bdrv_get_aio_context(bs));
7f1c5b
-    }
7f1c5b
 
7f1c5b
-    bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
7f1c5b
-    if (bs->drv && bs->drv->bdrv_drain_begin) {
7f1c5b
-        bs->drv->bdrv_drain_begin(bs);
7f1c5b
+        /* TODO Remove ignore_bds_parents, we don't consider it any more */
7f1c5b
+        bdrv_parent_drained_begin(bs, parent, false);
7f1c5b
+        if (bs->drv && bs->drv->bdrv_drain_begin) {
7f1c5b
+            bs->drv->bdrv_drain_begin(bs);
7f1c5b
+        }
7f1c5b
     }
7f1c5b
 }
7f1c5b
 
7f1c5b
@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
7f1c5b
     assert(bs->quiesce_counter > 0);
7f1c5b
 
7f1c5b
     /* Re-enable things in child-to-parent order */
7f1c5b
-    if (bs->drv && bs->drv->bdrv_drain_end) {
7f1c5b
-        bs->drv->bdrv_drain_end(bs);
7f1c5b
-    }
7f1c5b
-    bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
7f1c5b
-
7f1c5b
     old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
7f1c5b
     if (old_quiesce_counter == 1) {
7f1c5b
+        if (bs->drv && bs->drv->bdrv_drain_end) {
7f1c5b
+            bs->drv->bdrv_drain_end(bs);
7f1c5b
+        }
7f1c5b
+        /* TODO Remove ignore_bds_parents, we don't consider it any more */
7f1c5b
+        bdrv_parent_drained_end(bs, parent, false);
7f1c5b
+
7f1c5b
         aio_enable_external(bdrv_get_aio_context(bs));
7f1c5b
     }
7f1c5b
 }
7f1c5b
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
7f1c5b
index 791dddfd7d..a6bc6b7fe9 100644
7f1c5b
--- a/include/block/block_int-common.h
7f1c5b
+++ b/include/block/block_int-common.h
7f1c5b
@@ -980,13 +980,13 @@ struct BdrvChild {
7f1c5b
     bool frozen;
7f1c5b
 
7f1c5b
     /*
7f1c5b
-     * How many times the parent of this child has been drained
7f1c5b
+     * True if the parent of this child has been drained by this BdrvChild
7f1c5b
      * (through klass->drained_*).
7f1c5b
-     * Usually, this is equal to bs->quiesce_counter (potentially
7f1c5b
-     * reduced by bdrv_drain_all_count).  It may differ while the
7f1c5b
+     *
7f1c5b
+     * It is generally true if bs->quiesce_counter > 0. It may differ while the
7f1c5b
      * child is entering or leaving a drained section.
7f1c5b
      */
7f1c5b
-    int parent_quiesce_counter;
7f1c5b
+    bool quiesced_parent;
7f1c5b
 
7f1c5b
     QLIST_ENTRY(BdrvChild) next;
7f1c5b
     QLIST_ENTRY(BdrvChild) next_parent;
7f1c5b
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
7f1c5b
index dda08de8db..172bc6debc 100644
7f1c5b
--- a/tests/unit/test-bdrv-drain.c
7f1c5b
+++ b/tests/unit/test-bdrv-drain.c
7f1c5b
@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
7f1c5b
 
7f1c5b
     do_drain_begin(drain_type, bs);
7f1c5b
 
7f1c5b
-    g_assert_cmpint(bs->quiesce_counter, ==, 1);
7f1c5b
+    if (drain_type == BDRV_DRAIN_ALL) {
7f1c5b
+        g_assert_cmpint(bs->quiesce_counter, ==, 2);
7f1c5b
+    } else {
7f1c5b
+        g_assert_cmpint(bs->quiesce_counter, ==, 1);
7f1c5b
+    }
7f1c5b
     g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
7f1c5b
 
7f1c5b
     do_drain_end(drain_type, bs);
7f1c5b
@@ -348,8 +352,8 @@ static void test_nested(void)
7f1c5b
 
7f1c5b
     for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
7f1c5b
         for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
7f1c5b
-            int backing_quiesce = (outer != BDRV_DRAIN) +
7f1c5b
-                                  (inner != BDRV_DRAIN);
7f1c5b
+            int backing_quiesce = (outer == BDRV_DRAIN_ALL) +
7f1c5b
+                                  (inner == BDRV_DRAIN_ALL);
7f1c5b
 
7f1c5b
             g_assert_cmpint(bs->quiesce_counter, ==, 0);
7f1c5b
             g_assert_cmpint(backing->quiesce_counter, ==, 0);
7f1c5b
@@ -359,10 +363,10 @@ static void test_nested(void)
7f1c5b
             do_drain_begin(outer, bs);
7f1c5b
             do_drain_begin(inner, bs);
7f1c5b
 
7f1c5b
-            g_assert_cmpint(bs->quiesce_counter, ==, 2);
7f1c5b
+            g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce);
7f1c5b
             g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
7f1c5b
-            g_assert_cmpint(s->drain_count, ==, 2);
7f1c5b
-            g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
7f1c5b
+            g_assert_cmpint(s->drain_count, ==, 1);
7f1c5b
+            g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce);
7f1c5b
 
7f1c5b
             do_drain_end(inner, bs);
7f1c5b
             do_drain_end(outer, bs);
7f1c5b
-- 
7f1c5b
2.31.1
7f1c5b