b38b0f
From 808d2c94f53b9b29f44c2c5d9baea66d63ceddfc Mon Sep 17 00:00:00 2001
b38b0f
From: Kevin Wolf <kwolf@redhat.com>
b38b0f
Date: Wed, 14 Aug 2019 09:20:43 +0100
b38b0f
Subject: [PATCH 07/10] block: Fix AioContext switch for bs->drv == NULL
b38b0f
b38b0f
RH-Author: Kevin Wolf <kwolf@redhat.com>
b38b0f
Message-id: <20190814092043.15421-2-kwolf@redhat.com>
b38b0f
Patchwork-id: 89976
b38b0f
O-Subject: [RHEL-8.1.0 qemu-kvm PATCH 1/1] block: Fix AioContext switch for bs->drv == NULL
b38b0f
Bugzilla: 1716347
b38b0f
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
b38b0f
RH-Acked-by: Max Reitz <mreitz@redhat.com>
b38b0f
RH-Acked-by: John Snow <jsnow@redhat.com>
b38b0f
b38b0f
Even for block nodes with bs->drv == NULL, we can't just ignore a
b38b0f
bdrv_set_aio_context() call. Leaving the node in its old context can
b38b0f
mean that it's still in an iothread context in bdrv_close_all() during
b38b0f
shutdown, resulting in an attempted unlock of the AioContext lock which
b38b0f
we don't hold.
b38b0f
b38b0f
This is an example stack trace of a related crash:
b38b0f
b38b0f
 #0  0x00007ffff59da57f in raise () at /lib64/libc.so.6
b38b0f
 #1  0x00007ffff59c4895 in abort () at /lib64/libc.so.6
b38b0f
 #2  0x0000555555b97b1e in error_exit (err=<optimized out>, msg=msg@entry=0x555555d386d0 <__func__.19059> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36
b38b0f
 #3  0x0000555555b97f7f in qemu_mutex_unlock_impl (mutex=mutex@entry=0x5555568002f0, file=file@entry=0x555555d378df "util/async.c", line=line@entry=507) at util/qemu-thread-posix.c:97
b38b0f
 #4  0x0000555555b92f55 in aio_context_release (ctx=ctx@entry=0x555556800290) at util/async.c:507
b38b0f
 #5  0x0000555555b05cf8 in bdrv_prwv_co (child=child@entry=0x7fffc80012f0, offset=offset@entry=131072, qiov=qiov@entry=0x7fffffffd4f0, is_write=is_write@entry=true, flags=flags@entry=0)
b38b0f
         at block/io.c:833
b38b0f
 #6  0x0000555555b060a9 in bdrv_pwritev (qiov=0x7fffffffd4f0, offset=131072, child=0x7fffc80012f0) at block/io.c:990
b38b0f
 #7  0x0000555555b060a9 in bdrv_pwrite (child=0x7fffc80012f0, offset=131072, buf=<optimized out>, bytes=<optimized out>) at block/io.c:990
b38b0f
 #8  0x0000555555ae172b in qcow2_cache_entry_flush (bs=bs@entry=0x555556810680, c=c@entry=0x5555568cc740, i=i@entry=0) at block/qcow2-cache.c:51
b38b0f
 #9  0x0000555555ae18dd in qcow2_cache_write (bs=bs@entry=0x555556810680, c=0x5555568cc740) at block/qcow2-cache.c:248
b38b0f
 #10 0x0000555555ae15de in qcow2_cache_flush (bs=0x555556810680, c=<optimized out>) at block/qcow2-cache.c:259
b38b0f
 #11 0x0000555555ae16b1 in qcow2_cache_flush_dependency (c=0x5555568a1700, c=0x5555568a1700, bs=0x555556810680) at block/qcow2-cache.c:194
b38b0f
 #12 0x0000555555ae16b1 in qcow2_cache_entry_flush (bs=bs@entry=0x555556810680, c=c@entry=0x5555568a1700, i=i@entry=0) at block/qcow2-cache.c:194
b38b0f
 #13 0x0000555555ae18dd in qcow2_cache_write (bs=bs@entry=0x555556810680, c=0x5555568a1700) at block/qcow2-cache.c:248
b38b0f
 #14 0x0000555555ae15de in qcow2_cache_flush (bs=bs@entry=0x555556810680, c=<optimized out>) at block/qcow2-cache.c:259
b38b0f
 #15 0x0000555555ad242c in qcow2_inactivate (bs=bs@entry=0x555556810680) at block/qcow2.c:2124
b38b0f
 #16 0x0000555555ad2590 in qcow2_close (bs=0x555556810680) at block/qcow2.c:2153
b38b0f
 #17 0x0000555555ab0c62 in bdrv_close (bs=0x555556810680) at block.c:3358
b38b0f
 #18 0x0000555555ab0c62 in bdrv_delete (bs=0x555556810680) at block.c:3542
b38b0f
 #19 0x0000555555ab0c62 in bdrv_unref (bs=0x555556810680) at block.c:4598
b38b0f
 #20 0x0000555555af4d72 in blk_remove_bs (blk=blk@entry=0x5555568103d0) at block/block-backend.c:785
b38b0f
 #21 0x0000555555af4dbb in blk_remove_all_bs () at block/block-backend.c:483
b38b0f
 #22 0x0000555555aae02f in bdrv_close_all () at block.c:3412
b38b0f
 #23 0x00005555557f9796 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4776
b38b0f
b38b0f
The reproducer I used is a qcow2 image on gluster volume, where the
b38b0f
virtual disk size (4 GB) is larger than the gluster volume size (64M),
b38b0f
so we can easily trigger an ENOSPC. This backend is assigned to a
b38b0f
virtio-blk device using an iothread, and then from the guest a
b38b0f
'dd if=/dev/zero of=/dev/vda bs=1G count=1' causes the VM to stop
b38b0f
because of an I/O error. qemu_gluster_co_flush_to_disk() sets
b38b0f
bs->drv = NULL on error, so when virtio-blk stops the dataplane, the
b38b0f
block nodes stay in the iothread AioContext. A 'quit' monitor command
b38b0f
issued from this paused state crashes the process.
b38b0f
b38b0f
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1631227
b38b0f
Cc: qemu-stable@nongnu.org
b38b0f
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
b38b0f
Reviewed-by: Eric Blake <eblake@redhat.com>
b38b0f
Reviewed-by: Max Reitz <mreitz@redhat.com>
b38b0f
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
b38b0f
(cherry picked from commit 1bffe1ae7a7b707c3a14ea2ccd00d3609d3ce4d8)
b38b0f
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
b38b0f
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
b38b0f
---
b38b0f
 block.c | 12 ++----------
b38b0f
 1 file changed, 2 insertions(+), 10 deletions(-)
b38b0f
b38b0f
diff --git a/block.c b/block.c
b38b0f
index 8f3ceea..37af100 100644
b38b0f
--- a/block.c
b38b0f
+++ b/block.c
b38b0f
@@ -4923,10 +4923,6 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
b38b0f
     BdrvAioNotifier *baf, *baf_tmp;
b38b0f
     BdrvChild *child;
b38b0f
 
b38b0f
-    if (!bs->drv) {
b38b0f
-        return;
b38b0f
-    }
b38b0f
-
b38b0f
     assert(!bs->walking_aio_notifiers);
b38b0f
     bs->walking_aio_notifiers = true;
b38b0f
     QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
b38b0f
@@ -4941,7 +4937,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
b38b0f
      */
b38b0f
     bs->walking_aio_notifiers = false;
b38b0f
 
b38b0f
-    if (bs->drv->bdrv_detach_aio_context) {
b38b0f
+    if (bs->drv && bs->drv->bdrv_detach_aio_context) {
b38b0f
         bs->drv->bdrv_detach_aio_context(bs);
b38b0f
     }
b38b0f
     QLIST_FOREACH(child, &bs->children, next) {
b38b0f
@@ -4960,10 +4956,6 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
b38b0f
     BdrvAioNotifier *ban, *ban_tmp;
b38b0f
     BdrvChild *child;
b38b0f
 
b38b0f
-    if (!bs->drv) {
b38b0f
-        return;
b38b0f
-    }
b38b0f
-
b38b0f
     if (bs->quiesce_counter) {
b38b0f
         aio_disable_external(new_context);
b38b0f
     }
b38b0f
@@ -4973,7 +4965,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
b38b0f
     QLIST_FOREACH(child, &bs->children, next) {
b38b0f
         bdrv_attach_aio_context(child->bs, new_context);
b38b0f
     }
b38b0f
-    if (bs->drv->bdrv_attach_aio_context) {
b38b0f
+    if (bs->drv && bs->drv->bdrv_attach_aio_context) {
b38b0f
         bs->drv->bdrv_attach_aio_context(bs, new_context);
b38b0f
     }
b38b0f
 
b38b0f
-- 
b38b0f
1.8.3.1
b38b0f