| From 808d2c94f53b9b29f44c2c5d9baea66d63ceddfc Mon Sep 17 00:00:00 2001 |
| From: Kevin Wolf <kwolf@redhat.com> |
| Date: Wed, 14 Aug 2019 09:20:43 +0100 |
| Subject: [PATCH 07/10] block: Fix AioContext switch for bs->drv == NULL |
| |
| RH-Author: Kevin Wolf <kwolf@redhat.com> |
| Message-id: <20190814092043.15421-2-kwolf@redhat.com> |
| Patchwork-id: 89976 |
| O-Subject: [RHEL-8.1.0 qemu-kvm PATCH 1/1] block: Fix AioContext switch for bs->drv == NULL |
| Bugzilla: 1716347 |
| RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com> |
| RH-Acked-by: Max Reitz <mreitz@redhat.com> |
| RH-Acked-by: John Snow <jsnow@redhat.com> |
| |
| Even for block nodes with bs->drv == NULL, we can't just ignore a |
| bdrv_set_aio_context() call. Leaving the node in its old context can |
| mean that it's still in an iothread context in bdrv_close_all() during |
| shutdown, resulting in an attempted unlock of the AioContext lock which |
| we don't hold. |
| |
| This is an example stack trace of a related crash: |
| |
| #0 0x00007ffff59da57f in raise () at /lib64/libc.so.6 |
| #1 0x00007ffff59c4895 in abort () at /lib64/libc.so.6 |
| #2 0x0000555555b97b1e in error_exit (err=<optimized out>, msg=msg@entry=0x555555d386d0 <__func__.19059> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36 |
| #3 0x0000555555b97f7f in qemu_mutex_unlock_impl (mutex=mutex@entry=0x5555568002f0, file=file@entry=0x555555d378df "util/async.c", line=line@entry=507) at util/qemu-thread-posix.c:97 |
| #4 0x0000555555b92f55 in aio_context_release (ctx=ctx@entry=0x555556800290) at util/async.c:507 |
| #5 0x0000555555b05cf8 in bdrv_prwv_co (child=child@entry=0x7fffc80012f0, offset=offset@entry=131072, qiov=qiov@entry=0x7fffffffd4f0, is_write=is_write@entry=true, flags=flags@entry=0) |
| at block/io.c:833 |
| #6 0x0000555555b060a9 in bdrv_pwritev (qiov=0x7fffffffd4f0, offset=131072, child=0x7fffc80012f0) at block/io.c:990 |
| #7 0x0000555555b060a9 in bdrv_pwrite (child=0x7fffc80012f0, offset=131072, buf=<optimized out>, bytes=<optimized out>) at block/io.c:990 |
| #8 0x0000555555ae172b in qcow2_cache_entry_flush (bs=bs@entry=0x555556810680, c=c@entry=0x5555568cc740, i=i@entry=0) at block/qcow2-cache.c:51 |
| #9 0x0000555555ae18dd in qcow2_cache_write (bs=bs@entry=0x555556810680, c=0x5555568cc740) at block/qcow2-cache.c:248 |
| #10 0x0000555555ae15de in qcow2_cache_flush (bs=0x555556810680, c=<optimized out>) at block/qcow2-cache.c:259 |
| #11 0x0000555555ae16b1 in qcow2_cache_flush_dependency (c=0x5555568a1700, c=0x5555568a1700, bs=0x555556810680) at block/qcow2-cache.c:194 |
| #12 0x0000555555ae16b1 in qcow2_cache_entry_flush (bs=bs@entry=0x555556810680, c=c@entry=0x5555568a1700, i=i@entry=0) at block/qcow2-cache.c:194 |
| #13 0x0000555555ae18dd in qcow2_cache_write (bs=bs@entry=0x555556810680, c=0x5555568a1700) at block/qcow2-cache.c:248 |
| #14 0x0000555555ae15de in qcow2_cache_flush (bs=bs@entry=0x555556810680, c=<optimized out>) at block/qcow2-cache.c:259 |
| #15 0x0000555555ad242c in qcow2_inactivate (bs=bs@entry=0x555556810680) at block/qcow2.c:2124 |
| #16 0x0000555555ad2590 in qcow2_close (bs=0x555556810680) at block/qcow2.c:2153 |
| #17 0x0000555555ab0c62 in bdrv_close (bs=0x555556810680) at block.c:3358 |
| #18 0x0000555555ab0c62 in bdrv_delete (bs=0x555556810680) at block.c:3542 |
| #19 0x0000555555ab0c62 in bdrv_unref (bs=0x555556810680) at block.c:4598 |
| #20 0x0000555555af4d72 in blk_remove_bs (blk=blk@entry=0x5555568103d0) at block/block-backend.c:785 |
| #21 0x0000555555af4dbb in blk_remove_all_bs () at block/block-backend.c:483 |
| #22 0x0000555555aae02f in bdrv_close_all () at block.c:3412 |
| #23 0x00005555557f9796 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4776 |
| |
| The reproducer I used is a qcow2 image on gluster volume, where the |
| virtual disk size (4 GB) is larger than the gluster volume size (64M), |
| so we can easily trigger an ENOSPC. This backend is assigned to a |
| virtio-blk device using an iothread, and then from the guest a |
| 'dd if=/dev/zero of=/dev/vda bs=1G count=1' causes the VM to stop |
| because of an I/O error. qemu_gluster_co_flush_to_disk() sets |
| bs->drv = NULL on error, so when virtio-blk stops the dataplane, the |
| block nodes stay in the iothread AioContext. A 'quit' monitor command |
| issued from this paused state crashes the process. |
| |
| Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1631227 |
| Cc: qemu-stable@nongnu.org |
| Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
| Reviewed-by: Eric Blake <eblake@redhat.com> |
| Reviewed-by: Max Reitz <mreitz@redhat.com> |
| Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> |
| (cherry picked from commit 1bffe1ae7a7b707c3a14ea2ccd00d3609d3ce4d8) |
| Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
| Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com> |
| |
| block.c | 12 ++---------- |
| 1 file changed, 2 insertions(+), 10 deletions(-) |
| |
| diff --git a/block.c b/block.c |
| index 8f3ceea..37af100 100644 |
| |
| |
| @@ -4923,10 +4923,6 @@ void bdrv_detach_aio_context(BlockDriverState *bs) |
| BdrvAioNotifier *baf, *baf_tmp; |
| BdrvChild *child; |
| |
| - if (!bs->drv) { |
| - return; |
| - } |
| - |
| assert(!bs->walking_aio_notifiers); |
| bs->walking_aio_notifiers = true; |
| QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { |
| @@ -4941,7 +4937,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs) |
| */ |
| bs->walking_aio_notifiers = false; |
| |
| - if (bs->drv->bdrv_detach_aio_context) { |
| + if (bs->drv && bs->drv->bdrv_detach_aio_context) { |
| bs->drv->bdrv_detach_aio_context(bs); |
| } |
| QLIST_FOREACH(child, &bs->children, next) { |
| @@ -4960,10 +4956,6 @@ void bdrv_attach_aio_context(BlockDriverState *bs, |
| BdrvAioNotifier *ban, *ban_tmp; |
| BdrvChild *child; |
| |
| - if (!bs->drv) { |
| - return; |
| - } |
| - |
| if (bs->quiesce_counter) { |
| aio_disable_external(new_context); |
| } |
| @@ -4973,7 +4965,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs, |
| QLIST_FOREACH(child, &bs->children, next) { |
| bdrv_attach_aio_context(child->bs, new_context); |
| } |
| - if (bs->drv->bdrv_attach_aio_context) { |
| + if (bs->drv && bs->drv->bdrv_attach_aio_context) { |
| bs->drv->bdrv_attach_aio_context(bs, new_context); |
| } |
| |
| -- |
| 1.8.3.1 |
| |