thebeanogamer / rpms / qemu-kvm

Forked from rpms/qemu-kvm 5 months ago
Clone
ed5979
From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001
ed5979
From: Kevin Wolf <kwolf@redhat.com>
ed5979
Date: Fri, 18 Nov 2022 18:41:06 +0100
ed5979
Subject: [PATCH 24/31] block: Call drain callbacks only once
ed5979
ed5979
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
ed5979
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
ed5979
RH-Bugzilla: 2155112
ed5979
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
ed5979
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
ed5979
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
ed5979
RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s)
ed5979
ed5979
We only need to call both the BlockDriver's callback and the parent
ed5979
callbacks when going from undrained to drained or vice versa. A second
ed5979
drain section doesn't make a difference for the driver or the parent,
ed5979
they weren't supposed to send new requests before and after the second
ed5979
drain.
ed5979
ed5979
One thing that gets in the way is the 'ignore_bds_parents' parameter in
ed5979
bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that
ed5979
bdrv_drain_all_begin() increases bs->quiesce_counter, but does not
ed5979
quiesce the parent through BdrvChildClass callbacks. If an additional
ed5979
drain section is started now, bs->quiesce_counter will be non-zero, but
ed5979
we would still need to quiesce the parent through BdrvChildClass in
ed5979
order to keep things consistent (and unquiesce it on the matching
ed5979
bdrv_drained_end(), even though the counter would not reach 0 yet as
ed5979
long as the bdrv_drain_all() section is still active).
ed5979
ed5979
Instead of keeping track of this, let's just get rid of the parameter.
ed5979
It was introduced in commit 6cd5c9d7b2d as an optimisation so that
ed5979
during bdrv_drain_all(), we wouldn't recursively drain all parents up to
ed5979
the root for each node, resulting in quadratic complexity. As it happens,
ed5979
calling the callbacks only once solves the same problem, so as of this
ed5979
patch, we'll still have O(n) complexity and ignore_bds_parents is not
ed5979
needed any more.
ed5979
ed5979
This patch only ignores the 'ignore_bds_parents' parameter. It will be
ed5979
removed in a separate patch.
ed5979
ed5979
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ed5979
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
ed5979
Message-Id: <20221118174110.55183-12-kwolf@redhat.com>
ed5979
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
ed5979
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ed5979
(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1)
ed5979
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
ed5979
---
ed5979
 block.c                          | 25 +++++++------------------
ed5979
 block/io.c                       | 30 ++++++++++++++++++------------
ed5979
 include/block/block_int-common.h |  8 ++++----
ed5979
 tests/unit/test-bdrv-drain.c     | 16 ++++++++++------
ed5979
 4 files changed, 39 insertions(+), 40 deletions(-)
ed5979
ed5979
diff --git a/block.c b/block.c
ed5979
index e0e3b21790..5a583e260d 100644
ed5979
--- a/block.c
ed5979
+++ b/block.c
ed5979
@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
ed5979
 {
ed5979
     BlockDriverState *old_bs = child->bs;
ed5979
     int new_bs_quiesce_counter;
ed5979
-    int drain_saldo;
ed5979
 
ed5979
     assert(!child->frozen);
ed5979
     assert(old_bs != new_bs);
ed5979
@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
ed5979
         assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
ed5979
     }
ed5979
 
ed5979
-    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
ed5979
-    drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
ed5979
-
ed5979
     /*
ed5979
      * If the new child node is drained but the old one was not, flush
ed5979
      * all outstanding requests to the old child node.
ed5979
      */
ed5979
-    while (drain_saldo > 0 && child->klass->drained_begin) {
ed5979
+    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
ed5979
+    if (new_bs_quiesce_counter && !child->quiesced_parent) {
ed5979
         bdrv_parent_drained_begin_single(child, true);
ed5979
-        drain_saldo--;
ed5979
     }
ed5979
 
ed5979
     if (old_bs) {
ed5979
@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
ed5979
     if (new_bs) {
ed5979
         assert_bdrv_graph_writable(new_bs);
ed5979
         QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
ed5979
-
ed5979
-        /*
ed5979
-         * Polling in bdrv_parent_drained_begin_single() may have led to the new
ed5979
-         * node's quiesce_counter having been decreased.  Not a problem, we just
ed5979
-         * need to recognize this here and then invoke drained_end appropriately
ed5979
-         * more often.
ed5979
-         */
ed5979
-        assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
ed5979
-        drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
ed5979
-
ed5979
         if (child->klass->attach) {
ed5979
             child->klass->attach(child);
ed5979
         }
ed5979
@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
ed5979
     /*
ed5979
      * If the old child node was drained but the new one is not, allow
ed5979
      * requests to come in only after the new node has been attached.
ed5979
+     *
ed5979
+     * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
ed5979
+     * polls, which could have changed the value.
ed5979
      */
ed5979
-    while (drain_saldo < 0 && child->klass->drained_end) {
ed5979
+    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
ed5979
+    if (!new_bs_quiesce_counter && child->quiesced_parent) {
ed5979
         bdrv_parent_drained_end_single(child);
ed5979
-        drain_saldo++;
ed5979
     }
ed5979
 }
ed5979
 
ed5979
diff --git a/block/io.c b/block/io.c
ed5979
index 75224480d0..87d6f22ec4 100644
ed5979
--- a/block/io.c
ed5979
+++ b/block/io.c
ed5979
@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
ed5979
 {
ed5979
     IO_OR_GS_CODE();
ed5979
 
ed5979
-    assert(c->parent_quiesce_counter > 0);
ed5979
-    c->parent_quiesce_counter--;
ed5979
+    assert(c->quiesced_parent);
ed5979
+    c->quiesced_parent = false;
ed5979
+
ed5979
     if (c->klass->drained_end) {
ed5979
         c->klass->drained_end(c);
ed5979
     }
ed5979
@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
ed5979
 {
ed5979
     AioContext *ctx = bdrv_child_get_parent_aio_context(c);
ed5979
     IO_OR_GS_CODE();
ed5979
-    c->parent_quiesce_counter++;
ed5979
+
ed5979
+    assert(!c->quiesced_parent);
ed5979
+    c->quiesced_parent = true;
ed5979
+
ed5979
     if (c->klass->drained_begin) {
ed5979
         c->klass->drained_begin(c);
ed5979
     }
ed5979
@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
ed5979
     /* Stop things in parent-to-child order */
ed5979
     if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
ed5979
         aio_disable_external(bdrv_get_aio_context(bs));
ed5979
-    }
ed5979
 
ed5979
-    bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
ed5979
-    if (bs->drv && bs->drv->bdrv_drain_begin) {
ed5979
-        bs->drv->bdrv_drain_begin(bs);
ed5979
+        /* TODO Remove ignore_bds_parents, we don't consider it any more */
ed5979
+        bdrv_parent_drained_begin(bs, parent, false);
ed5979
+        if (bs->drv && bs->drv->bdrv_drain_begin) {
ed5979
+            bs->drv->bdrv_drain_begin(bs);
ed5979
+        }
ed5979
     }
ed5979
 }
ed5979
 
ed5979
@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
ed5979
     assert(bs->quiesce_counter > 0);
ed5979
 
ed5979
     /* Re-enable things in child-to-parent order */
ed5979
-    if (bs->drv && bs->drv->bdrv_drain_end) {
ed5979
-        bs->drv->bdrv_drain_end(bs);
ed5979
-    }
ed5979
-    bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
ed5979
-
ed5979
     old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
ed5979
     if (old_quiesce_counter == 1) {
ed5979
+        if (bs->drv && bs->drv->bdrv_drain_end) {
ed5979
+            bs->drv->bdrv_drain_end(bs);
ed5979
+        }
ed5979
+        /* TODO Remove ignore_bds_parents, we don't consider it any more */
ed5979
+        bdrv_parent_drained_end(bs, parent, false);
ed5979
+
ed5979
         aio_enable_external(bdrv_get_aio_context(bs));
ed5979
     }
ed5979
 }
ed5979
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
ed5979
index 791dddfd7d..a6bc6b7fe9 100644
ed5979
--- a/include/block/block_int-common.h
ed5979
+++ b/include/block/block_int-common.h
ed5979
@@ -980,13 +980,13 @@ struct BdrvChild {
ed5979
     bool frozen;
ed5979
 
ed5979
     /*
ed5979
-     * How many times the parent of this child has been drained
ed5979
+     * True if the parent of this child has been drained by this BdrvChild
ed5979
      * (through klass->drained_*).
ed5979
-     * Usually, this is equal to bs->quiesce_counter (potentially
ed5979
-     * reduced by bdrv_drain_all_count).  It may differ while the
ed5979
+     *
ed5979
+     * It is generally true if bs->quiesce_counter > 0. It may differ while the
ed5979
      * child is entering or leaving a drained section.
ed5979
      */
ed5979
-    int parent_quiesce_counter;
ed5979
+    bool quiesced_parent;
ed5979
 
ed5979
     QLIST_ENTRY(BdrvChild) next;
ed5979
     QLIST_ENTRY(BdrvChild) next_parent;
ed5979
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
ed5979
index dda08de8db..172bc6debc 100644
ed5979
--- a/tests/unit/test-bdrv-drain.c
ed5979
+++ b/tests/unit/test-bdrv-drain.c
ed5979
@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
ed5979
 
ed5979
     do_drain_begin(drain_type, bs);
ed5979
 
ed5979
-    g_assert_cmpint(bs->quiesce_counter, ==, 1);
ed5979
+    if (drain_type == BDRV_DRAIN_ALL) {
ed5979
+        g_assert_cmpint(bs->quiesce_counter, ==, 2);
ed5979
+    } else {
ed5979
+        g_assert_cmpint(bs->quiesce_counter, ==, 1);
ed5979
+    }
ed5979
     g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
ed5979
 
ed5979
     do_drain_end(drain_type, bs);
ed5979
@@ -348,8 +352,8 @@ static void test_nested(void)
ed5979
 
ed5979
     for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
ed5979
         for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
ed5979
-            int backing_quiesce = (outer != BDRV_DRAIN) +
ed5979
-                                  (inner != BDRV_DRAIN);
ed5979
+            int backing_quiesce = (outer == BDRV_DRAIN_ALL) +
ed5979
+                                  (inner == BDRV_DRAIN_ALL);
ed5979
 
ed5979
             g_assert_cmpint(bs->quiesce_counter, ==, 0);
ed5979
             g_assert_cmpint(backing->quiesce_counter, ==, 0);
ed5979
@@ -359,10 +363,10 @@ static void test_nested(void)
ed5979
             do_drain_begin(outer, bs);
ed5979
             do_drain_begin(inner, bs);
ed5979
 
ed5979
-            g_assert_cmpint(bs->quiesce_counter, ==, 2);
ed5979
+            g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce);
ed5979
             g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
ed5979
-            g_assert_cmpint(s->drain_count, ==, 2);
ed5979
-            g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
ed5979
+            g_assert_cmpint(s->drain_count, ==, 1);
ed5979
+            g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce);
ed5979
 
ed5979
             do_drain_end(inner, bs);
ed5979
             do_drain_end(outer, bs);
ed5979
-- 
ed5979
2.31.1
ed5979