yeahuh / rpms / qemu-kvm

Forked from rpms/qemu-kvm 2 years ago
Clone

Blame SOURCES/kvm-block-Don-t-inactivate-children-before-parents.patch

ae23c9
From ff19fad07e21bac2534edd867120645fa2a0d7d3 Mon Sep 17 00:00:00 2001
ae23c9
From: Kevin Wolf <kwolf@redhat.com>
ae23c9
Date: Fri, 14 Dec 2018 09:49:45 +0000
ae23c9
Subject: [PATCH 1/2] block: Don't inactivate children before parents
ae23c9
ae23c9
RH-Author: Kevin Wolf <kwolf@redhat.com>
ae23c9
Message-id: <20181214094946.26226-2-kwolf@redhat.com>
ae23c9
Patchwork-id: 83511
ae23c9
O-Subject: [RHEL-8.0 qemu-kvm PATCH 1/2] block: Don't inactivate children before parents
ae23c9
Bugzilla: 1659395
ae23c9
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
ae23c9
RH-Acked-by: Max Reitz <mreitz@redhat.com>
ae23c9
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
ae23c9
ae23c9
bdrv_child_cb_inactivate() asserts that parents are already inactive
ae23c9
when children get inactivated. This precondition is necessary because
ae23c9
parents could still issue requests in their inactivation code.
ae23c9
ae23c9
When block nodes are created individually with -blockdev, all of them
ae23c9
are monitor owned and will be returned by bdrv_next() in an undefined
ae23c9
order (in practice, in the order of their creation, which is usually
ae23c9
children before parents), which obviously fails the assertion:
ae23c9
ae23c9
qemu: block.c:899: bdrv_child_cb_inactivate: Assertion `bs->open_flags & BDRV_O_INACTIVE' failed.
ae23c9
ae23c9
This patch fixes the ordering by skipping nodes with still active
ae23c9
parents in bdrv_inactivate_recurse() because we know that they will be
ae23c9
covered by recursion when the last active parent becomes inactive.
ae23c9
ae23c9
With the correct parents-before-children ordering, we also got rid of
ae23c9
the reason why commit aad0b7a0bfb introduced two passes, so we can go
ae23c9
back to a single-pass recursion. This is necessary so we can rely on the
ae23c9
BDRV_O_INACTIVE flag to skip nodes with active parents (the flag used
ae23c9
to be set only in pass 2, so we would always skip non-root nodes in
ae23c9
pass 1 because all parents would still be considered active; setting the
ae23c9
flag in pass 1 would mean, that we never skip anything in pass 2 because
ae23c9
all parents are already considered inactive).
ae23c9
ae23c9
Because of the change to single pass, this patch is best reviewed with
ae23c9
whitespace changes ignored.
ae23c9
ae23c9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ae23c9
Reviewed-by: Max Reitz <mreitz@redhat.com>
ae23c9
(cherry picked from commit 9e37271f50ec2e95f299dc297ac08f9be0096b48)
ae23c9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ae23c9
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ae23c9
---
ae23c9
 block.c | 84 +++++++++++++++++++++++++++++++++++++++++------------------------
ae23c9
 1 file changed, 53 insertions(+), 31 deletions(-)
ae23c9
ae23c9
diff --git a/block.c b/block.c
ae23c9
index 18ae591..73f55a1 100644
ae23c9
--- a/block.c
ae23c9
+++ b/block.c
ae23c9
@@ -4442,45 +4442,68 @@ void bdrv_invalidate_cache_all(Error **errp)
ae23c9
     }
ae23c9
 }
ae23c9
 
ae23c9
-static int bdrv_inactivate_recurse(BlockDriverState *bs,
ae23c9
-                                   bool setting_flag)
ae23c9
+static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
ae23c9
+{
ae23c9
+    BdrvChild *parent;
ae23c9
+
ae23c9
+    QLIST_FOREACH(parent, &bs->parents, next_parent) {
ae23c9
+        if (parent->role->parent_is_bds) {
ae23c9
+            BlockDriverState *parent_bs = parent->opaque;
ae23c9
+            if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) {
ae23c9
+                return true;
ae23c9
+            }
ae23c9
+        }
ae23c9
+    }
ae23c9
+
ae23c9
+    return false;
ae23c9
+}
ae23c9
+
ae23c9
+static int bdrv_inactivate_recurse(BlockDriverState *bs)
ae23c9
 {
ae23c9
     BdrvChild *child, *parent;
ae23c9
+    uint64_t perm, shared_perm;
ae23c9
     int ret;
ae23c9
 
ae23c9
     if (!bs->drv) {
ae23c9
         return -ENOMEDIUM;
ae23c9
     }
ae23c9
 
ae23c9
-    if (!setting_flag && bs->drv->bdrv_inactivate) {
ae23c9
+    /* Make sure that we don't inactivate a child before its parent.
ae23c9
+     * It will be covered by recursion from the yet active parent. */
ae23c9
+    if (bdrv_has_bds_parent(bs, true)) {
ae23c9
+        return 0;
ae23c9
+    }
ae23c9
+
ae23c9
+    assert(!(bs->open_flags & BDRV_O_INACTIVE));
ae23c9
+
ae23c9
+    /* Inactivate this node */
ae23c9
+    if (bs->drv->bdrv_inactivate) {
ae23c9
         ret = bs->drv->bdrv_inactivate(bs);
ae23c9
         if (ret < 0) {
ae23c9
             return ret;
ae23c9
         }
ae23c9
     }
ae23c9
 
ae23c9
-    if (setting_flag && !(bs->open_flags & BDRV_O_INACTIVE)) {
ae23c9
-        uint64_t perm, shared_perm;
ae23c9
-
ae23c9
-        QLIST_FOREACH(parent, &bs->parents, next_parent) {
ae23c9
-            if (parent->role->inactivate) {
ae23c9
-                ret = parent->role->inactivate(parent);
ae23c9
-                if (ret < 0) {
ae23c9
-                    return ret;
ae23c9
-                }
ae23c9
+    QLIST_FOREACH(parent, &bs->parents, next_parent) {
ae23c9
+        if (parent->role->inactivate) {
ae23c9
+            ret = parent->role->inactivate(parent);
ae23c9
+            if (ret < 0) {
ae23c9
+                return ret;
ae23c9
             }
ae23c9
         }
ae23c9
+    }
ae23c9
 
ae23c9
-        bs->open_flags |= BDRV_O_INACTIVE;
ae23c9
+    bs->open_flags |= BDRV_O_INACTIVE;
ae23c9
+
ae23c9
+    /* Update permissions, they may differ for inactive nodes */
ae23c9
+    bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
ae23c9
+    bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &error_abort);
ae23c9
+    bdrv_set_perm(bs, perm, shared_perm);
ae23c9
 
ae23c9
-        /* Update permissions, they may differ for inactive nodes */
ae23c9
-        bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
ae23c9
-        bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &error_abort);
ae23c9
-        bdrv_set_perm(bs, perm, shared_perm);
ae23c9
-    }
ae23c9
 
ae23c9
+    /* Recursively inactivate children */
ae23c9
     QLIST_FOREACH(child, &bs->children, next) {
ae23c9
-        ret = bdrv_inactivate_recurse(child->bs, setting_flag);
ae23c9
+        ret = bdrv_inactivate_recurse(child->bs);
ae23c9
         if (ret < 0) {
ae23c9
             return ret;
ae23c9
         }
ae23c9
@@ -4494,7 +4517,6 @@ int bdrv_inactivate_all(void)
ae23c9
     BlockDriverState *bs = NULL;
ae23c9
     BdrvNextIterator it;
ae23c9
     int ret = 0;
ae23c9
-    int pass;
ae23c9
     GSList *aio_ctxs = NULL, *ctx;
ae23c9
 
ae23c9
     for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
ae23c9
@@ -4506,17 +4528,17 @@ int bdrv_inactivate_all(void)
ae23c9
         }
ae23c9
     }
ae23c9
 
ae23c9
-    /* We do two passes of inactivation. The first pass calls to drivers'
ae23c9
-     * .bdrv_inactivate callbacks recursively so all cache is flushed to disk;
ae23c9
-     * the second pass sets the BDRV_O_INACTIVE flag so that no further write
ae23c9
-     * is allowed. */
ae23c9
-    for (pass = 0; pass < 2; pass++) {
ae23c9
-        for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
ae23c9
-            ret = bdrv_inactivate_recurse(bs, pass);
ae23c9
-            if (ret < 0) {
ae23c9
-                bdrv_next_cleanup(&it);
ae23c9
-                goto out;
ae23c9
-            }
ae23c9
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
ae23c9
+        /* Nodes with BDS parents are covered by recursion from the last
ae23c9
+         * parent that gets inactivated. Don't inactivate them a second
ae23c9
+         * time if that has already happened. */
ae23c9
+        if (bdrv_has_bds_parent(bs, false)) {
ae23c9
+            continue;
ae23c9
+        }
ae23c9
+        ret = bdrv_inactivate_recurse(bs);
ae23c9
+        if (ret < 0) {
ae23c9
+            bdrv_next_cleanup(&it);
ae23c9
+            goto out;
ae23c9
         }
ae23c9
     }
ae23c9
 
ae23c9
-- 
ae23c9
1.8.3.1
ae23c9