|
|
357786 |
From a174500971a42552a30d238bf9ec065d27fd4b32 Mon Sep 17 00:00:00 2001
|
|
|
357786 |
From: Kevin Wolf <kwolf@redhat.com>
|
|
|
357786 |
Date: Fri, 14 Sep 2018 10:55:36 +0200
|
|
|
357786 |
Subject: [PATCH 45/49] blockjob: Lie better in child_job_drained_poll()
|
|
|
357786 |
|
|
|
357786 |
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
|
357786 |
Message-id: <20180914105540.18077-39-kwolf@redhat.com>
|
|
|
357786 |
Patchwork-id: 82192
|
|
|
357786 |
O-Subject: [RHV-7.6 qemu-kvm-rhev PATCH 38/42] blockjob: Lie better in child_job_drained_poll()
|
|
|
357786 |
Bugzilla: 1601212
|
|
|
357786 |
RH-Acked-by: John Snow <jsnow@redhat.com>
|
|
|
357786 |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
357786 |
RH-Acked-by: Fam Zheng <famz@redhat.com>
|
|
|
357786 |
|
|
|
357786 |
Block jobs claim in .drained_poll() that they are in a quiescent state
|
|
|
357786 |
as soon as job->deferred_to_main_loop is true. This is obviously wrong,
|
|
|
357786 |
they still have a completion BH to run. We only get away with this
|
|
|
357786 |
because commit 91af091f923 added an unconditional aio_poll(false) to the
|
|
|
357786 |
drain functions, but this is bypassing the regular drain mechanisms.
|
|
|
357786 |
|
|
|
357786 |
However, just removing this and telling that the job is still active
|
|
|
357786 |
doesn't work either: The completion callbacks themselves call drain
|
|
|
357786 |
functions (directly, or indirectly with bdrv_reopen), so they would
|
|
|
357786 |
deadlock then.
|
|
|
357786 |
|
|
|
357786 |
As a better lie, tell that the job is active as long as the BH is
|
|
|
357786 |
pending, but falsely call it quiescent from the point in the BH when the
|
|
|
357786 |
completion callback is called. At this point, nested drain calls won't
|
|
|
357786 |
deadlock because they ignore the job, and outer drains will wait for the
|
|
|
357786 |
job to really reach a quiescent state because the callback is already
|
|
|
357786 |
running.
|
|
|
357786 |
|
|
|
357786 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
357786 |
Reviewed-by: Max Reitz <mreitz@redhat.com>
|
|
|
357786 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
357786 |
---
|
|
|
357786 |
blockjob.c | 2 +-
|
|
|
357786 |
include/qemu/job.h | 3 +++
|
|
|
357786 |
job.c | 11 ++++++++++-
|
|
|
357786 |
3 files changed, 14 insertions(+), 2 deletions(-)
|
|
|
357786 |
|
|
|
357786 |
diff --git a/blockjob.c b/blockjob.c
|
|
|
357786 |
index 8d27e8e..617d86f 100644
|
|
|
357786 |
--- a/blockjob.c
|
|
|
357786 |
+++ b/blockjob.c
|
|
|
357786 |
@@ -164,7 +164,7 @@ static bool child_job_drained_poll(BdrvChild *c)
|
|
|
357786 |
/* An inactive or completed job doesn't have any pending requests. Jobs
|
|
|
357786 |
* with !job->busy are either already paused or have a pause point after
|
|
|
357786 |
* being reentered, so no job driver code will run before they pause. */
|
|
|
357786 |
- if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) {
|
|
|
357786 |
+ if (!job->busy || job_is_completed(job)) {
|
|
|
357786 |
return false;
|
|
|
357786 |
}
|
|
|
357786 |
|
|
|
357786 |
diff --git a/include/qemu/job.h b/include/qemu/job.h
|
|
|
357786 |
index 35ac7a9..d1710f3 100644
|
|
|
357786 |
--- a/include/qemu/job.h
|
|
|
357786 |
+++ b/include/qemu/job.h
|
|
|
357786 |
@@ -76,6 +76,9 @@ typedef struct Job {
|
|
|
357786 |
* Set to false by the job while the coroutine has yielded and may be
|
|
|
357786 |
* re-entered by job_enter(). There may still be I/O or event loop activity
|
|
|
357786 |
* pending. Accessed under block_job_mutex (in blockjob.c).
|
|
|
357786 |
+ *
|
|
|
357786 |
+ * When the job is deferred to the main loop, busy is true as long as the
|
|
|
357786 |
+ * bottom half is still pending.
|
|
|
357786 |
*/
|
|
|
357786 |
bool busy;
|
|
|
357786 |
|
|
|
357786 |
diff --git a/job.c b/job.c
|
|
|
357786 |
index 47b5a11..42af9e2 100644
|
|
|
357786 |
--- a/job.c
|
|
|
357786 |
+++ b/job.c
|
|
|
357786 |
@@ -852,7 +852,16 @@ static void job_exit(void *opaque)
|
|
|
357786 |
AioContext *ctx = job->aio_context;
|
|
|
357786 |
|
|
|
357786 |
aio_context_acquire(ctx);
|
|
|
357786 |
+
|
|
|
357786 |
+ /* This is a lie, we're not quiescent, but still doing the completion
|
|
|
357786 |
+ * callbacks. However, completion callbacks tend to involve operations that
|
|
|
357786 |
+ * drain block nodes, and if .drained_poll still returned true, we would
|
|
|
357786 |
+ * deadlock. */
|
|
|
357786 |
+ job->busy = false;
|
|
|
357786 |
+ job_event_idle(job);
|
|
|
357786 |
+
|
|
|
357786 |
job_completed(job);
|
|
|
357786 |
+
|
|
|
357786 |
aio_context_release(ctx);
|
|
|
357786 |
}
|
|
|
357786 |
|
|
|
357786 |
@@ -867,8 +876,8 @@ static void coroutine_fn job_co_entry(void *opaque)
|
|
|
357786 |
assert(job && job->driver && job->driver->run);
|
|
|
357786 |
job_pause_point(job);
|
|
|
357786 |
job->ret = job->driver->run(job, &job->err);
|
|
|
357786 |
- job_event_idle(job);
|
|
|
357786 |
job->deferred_to_main_loop = true;
|
|
|
357786 |
+ job->busy = true;
|
|
|
357786 |
aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
|
|
|
357786 |
}
|
|
|
357786 |
|
|
|
357786 |
--
|
|
|
357786 |
1.8.3.1
|
|
|
357786 |
|