|
|
1bdc94 |
From b700e58ee749512368c40a5f84b01c11d24903b9 Mon Sep 17 00:00:00 2001
|
|
|
1bdc94 |
From: Kevin Wolf <kwolf@redhat.com>
|
|
|
1bdc94 |
Date: Fri, 14 Sep 2018 10:55:22 +0200
|
|
|
1bdc94 |
Subject: [PATCH 31/49] util/async: use qemu_aio_coroutine_enter in
|
|
|
1bdc94 |
co_schedule_bh_cb
|
|
|
1bdc94 |
|
|
|
1bdc94 |
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
|
1bdc94 |
Message-id: <20180914105540.18077-25-kwolf@redhat.com>
|
|
|
1bdc94 |
Patchwork-id: 82176
|
|
|
1bdc94 |
O-Subject: [RHV-7.6 qemu-kvm-rhev PATCH 24/42] util/async: use qemu_aio_coroutine_enter in co_schedule_bh_cb
|
|
|
1bdc94 |
Bugzilla: 1601212
|
|
|
1bdc94 |
RH-Acked-by: John Snow <jsnow@redhat.com>
|
|
|
1bdc94 |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
1bdc94 |
RH-Acked-by: Fam Zheng <famz@redhat.com>
|
|
|
1bdc94 |
|
|
|
1bdc94 |
From: Sergio Lopez <slp@redhat.com>
|
|
|
1bdc94 |
|
|
|
1bdc94 |
AIO Coroutines shouldn't by managed by an AioContext different than the
|
|
|
1bdc94 |
one assigned when they are created. aio_co_enter avoids entering a
|
|
|
1bdc94 |
coroutine from a different AioContext, calling aio_co_schedule instead.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Scheduled coroutines are then entered by co_schedule_bh_cb using
|
|
|
1bdc94 |
qemu_coroutine_enter, which just calls qemu_aio_coroutine_enter with the
|
|
|
1bdc94 |
current AioContext obtained with qemu_get_current_aio_context.
|
|
|
1bdc94 |
Eventually, co->ctx will be set to the AioContext passed as an argument
|
|
|
1bdc94 |
to qemu_aio_coroutine_enter.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
This means that, if an IO Thread's AioConext is being processed by the
|
|
|
1bdc94 |
Main Thread (due to aio_poll being called with a BDS AioContext, as it
|
|
|
1bdc94 |
happens in AIO_WAIT_WHILE among other places), the AioContext from some
|
|
|
1bdc94 |
coroutines may be wrongly replaced with the one from the Main Thread.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
This is the root cause behind some crashes, mainly triggered by the
|
|
|
1bdc94 |
drain code at block/io.c. The most common are these abort and failed
|
|
|
1bdc94 |
assertion:
|
|
|
1bdc94 |
|
|
|
1bdc94 |
util/async.c:aio_co_schedule
|
|
|
1bdc94 |
456 if (scheduled) {
|
|
|
1bdc94 |
457 fprintf(stderr,
|
|
|
1bdc94 |
458 "%s: Co-routine was already scheduled in '%s'\n",
|
|
|
1bdc94 |
459 __func__, scheduled);
|
|
|
1bdc94 |
460 abort();
|
|
|
1bdc94 |
461 }
|
|
|
1bdc94 |
|
|
|
1bdc94 |
util/qemu-coroutine-lock.c:
|
|
|
1bdc94 |
286 assert(mutex->holder == self);
|
|
|
1bdc94 |
|
|
|
1bdc94 |
But it's also known to cause random errors at different locations, and
|
|
|
1bdc94 |
even SIGSEGV with broken coroutine backtraces.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
By using qemu_aio_coroutine_enter directly in co_schedule_bh_cb, we can
|
|
|
1bdc94 |
pass the correct AioContext as an argument, making sure co->ctx is not
|
|
|
1bdc94 |
wrongly altered.
|
|
|
1bdc94 |
|
|
|
1bdc94 |
Signed-off-by: Sergio Lopez <slp@redhat.com>
|
|
|
1bdc94 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
1bdc94 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
1bdc94 |
---
|
|
|
1bdc94 |
util/async.c | 2 +-
|
|
|
1bdc94 |
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
1bdc94 |
|
|
|
1bdc94 |
diff --git a/util/async.c b/util/async.c
|
|
|
1bdc94 |
index 4dd9d95..5693191 100644
|
|
|
1bdc94 |
--- a/util/async.c
|
|
|
1bdc94 |
+++ b/util/async.c
|
|
|
1bdc94 |
@@ -391,7 +391,7 @@ static void co_schedule_bh_cb(void *opaque)
|
|
|
1bdc94 |
|
|
|
1bdc94 |
/* Protected by write barrier in qemu_aio_coroutine_enter */
|
|
|
1bdc94 |
atomic_set(&co->scheduled, NULL);
|
|
|
1bdc94 |
- qemu_coroutine_enter(co);
|
|
|
1bdc94 |
+ qemu_aio_coroutine_enter(ctx, co);
|
|
|
1bdc94 |
aio_context_release(ctx);
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
}
|
|
|
1bdc94 |
--
|
|
|
1bdc94 |
1.8.3.1
|
|
|
1bdc94 |
|