|
|
8b1478 |
From ba6d773a79eef6f59687771bc5664814d96b6b03 Mon Sep 17 00:00:00 2001
|
|
|
8b1478 |
From: Sergio Lopez Pascual <slp@redhat.com>
|
|
|
8b1478 |
Date: Mon, 9 Sep 2019 12:46:40 +0200
|
|
|
8b1478 |
Subject: [PATCH 4/4] mirror: Confirm we're quiesced only if the job is paused
|
|
|
8b1478 |
or cancelled
|
|
|
8b1478 |
|
|
|
8b1478 |
RH-Author: Sergio Lopez Pascual <slp@redhat.com>
|
|
|
8b1478 |
Message-id: <20190909124640.53625-2-slp@redhat.com>
|
|
|
8b1478 |
Patchwork-id: 90339
|
|
|
8b1478 |
O-Subject: [RHEL-7.7.z qemu-kvm-rhev PATCH 1/1] mirror: Confirm we're quiesced only if the job is paused or cancelled
|
|
|
8b1478 |
Bugzilla: 1665256
|
|
|
8b1478 |
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
8b1478 |
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
|
|
8b1478 |
RH-Acked-by: John Snow <jsnow@redhat.com>
|
|
|
8b1478 |
|
|
|
8b1478 |
While child_job_drained_begin() calls to job_pause(), the job doesn't
|
|
|
8b1478 |
actually transition between states until it runs again and reaches a
|
|
|
8b1478 |
pause point. This means bdrv_drained_begin() may return with some jobs
|
|
|
8b1478 |
using the node still having 'busy == true'.
|
|
|
8b1478 |
|
|
|
8b1478 |
As a consequence, block_job_detach_aio_context() may get into a
|
|
|
8b1478 |
deadlock, waiting for the job to be actually paused, while the coroutine
|
|
|
8b1478 |
servicing the job is yielding and doesn't get the opportunity to get
|
|
|
8b1478 |
scheduled again. This situation can be reproduced by issuing a
|
|
|
8b1478 |
'block-commit' immediately followed by a 'device_del'.
|
|
|
8b1478 |
|
|
|
8b1478 |
To ensure bdrv_drained_begin() only returns when the jobs have been
|
|
|
8b1478 |
paused, we change mirror_drained_poll() to only confirm it's quiesced
|
|
|
8b1478 |
when job->paused == true and there aren't any in-flight requests, except
|
|
|
8b1478 |
if we reached that point by a drained section initiated by the
|
|
|
8b1478 |
mirror/commit job itself.
|
|
|
8b1478 |
|
|
|
8b1478 |
The other block jobs shouldn't need any changes, as the default
|
|
|
8b1478 |
drained_poll() behavior is to only confirm it's quiesced if the job is
|
|
|
8b1478 |
not busy or completed.
|
|
|
8b1478 |
|
|
|
8b1478 |
Signed-off-by: Sergio Lopez <slp@redhat.com>
|
|
|
8b1478 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
8b1478 |
(cherry picked from commit 5e771752a1ffba3a99d7d75b6d492b4a86b59e1b)
|
|
|
8b1478 |
Signed-off-by: Sergio Lopez <slp@redhat.com>
|
|
|
8b1478 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
8b1478 |
---
|
|
|
8b1478 |
block/mirror.c | 16 ++++++++++++++++
|
|
|
8b1478 |
1 file changed, 16 insertions(+)
|
|
|
8b1478 |
|
|
|
8b1478 |
diff --git a/block/mirror.c b/block/mirror.c
|
|
|
8b1478 |
index 55dc94f..48c907f 100644
|
|
|
8b1478 |
--- a/block/mirror.c
|
|
|
8b1478 |
+++ b/block/mirror.c
|
|
|
8b1478 |
@@ -72,6 +72,7 @@ typedef struct MirrorBlockJob {
|
|
|
8b1478 |
int max_iov;
|
|
|
8b1478 |
bool initial_zeroing_ongoing;
|
|
|
8b1478 |
bool prepared;
|
|
|
8b1478 |
+ bool in_drain;
|
|
|
8b1478 |
} MirrorBlockJob;
|
|
|
8b1478 |
|
|
|
8b1478 |
typedef struct MirrorOp {
|
|
|
8b1478 |
@@ -551,6 +552,7 @@ static int mirror_exit_common(Job *job)
|
|
|
8b1478 |
|
|
|
8b1478 |
/* The mirror job has no requests in flight any more, but we need to
|
|
|
8b1478 |
* drain potential other users of the BDS before changing the graph. */
|
|
|
8b1478 |
+ assert(s->in_drain);
|
|
|
8b1478 |
bdrv_drained_begin(target_bs);
|
|
|
8b1478 |
bdrv_replace_node(to_replace, target_bs, &local_err);
|
|
|
8b1478 |
bdrv_drained_end(target_bs);
|
|
|
8b1478 |
@@ -587,6 +589,7 @@ static int mirror_exit_common(Job *job)
|
|
|
8b1478 |
blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
|
|
|
8b1478 |
|
|
|
8b1478 |
bdrv_drained_end(src);
|
|
|
8b1478 |
+ s->in_drain = false;
|
|
|
8b1478 |
bdrv_unref(mirror_top_bs);
|
|
|
8b1478 |
bdrv_unref(src);
|
|
|
8b1478 |
|
|
|
8b1478 |
@@ -860,10 +863,12 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
|
|
|
8b1478 |
*/
|
|
|
8b1478 |
trace_mirror_before_drain(s, cnt);
|
|
|
8b1478 |
|
|
|
8b1478 |
+ s->in_drain = true;
|
|
|
8b1478 |
bdrv_drained_begin(bs);
|
|
|
8b1478 |
cnt = bdrv_get_dirty_count(s->dirty_bitmap);
|
|
|
8b1478 |
if (cnt > 0 || mirror_flush(s) < 0) {
|
|
|
8b1478 |
bdrv_drained_end(bs);
|
|
|
8b1478 |
+ s->in_drain = false;
|
|
|
8b1478 |
continue;
|
|
|
8b1478 |
}
|
|
|
8b1478 |
|
|
|
8b1478 |
@@ -911,6 +916,7 @@ immediate_exit:
|
|
|
8b1478 |
bdrv_dirty_iter_free(s->dbi);
|
|
|
8b1478 |
|
|
|
8b1478 |
if (need_drain) {
|
|
|
8b1478 |
+ s->in_drain = true;
|
|
|
8b1478 |
bdrv_drained_begin(bs);
|
|
|
8b1478 |
}
|
|
|
8b1478 |
|
|
|
8b1478 |
@@ -979,6 +985,16 @@ static void mirror_pause(Job *job)
|
|
|
8b1478 |
static bool mirror_drained_poll(BlockJob *job)
|
|
|
8b1478 |
{
|
|
|
8b1478 |
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
|
|
|
8b1478 |
+
|
|
|
8b1478 |
+ /* If the job isn't paused nor cancelled, we can't be sure that it won't
|
|
|
8b1478 |
+ * issue more requests. We make an exception if we've reached this point
|
|
|
8b1478 |
+ * from one of our own drain sections, to avoid a deadlock waiting for
|
|
|
8b1478 |
+ * ourselves.
|
|
|
8b1478 |
+ */
|
|
|
8b1478 |
+ if (!s->common.job.paused && !s->common.job.cancelled && !s->in_drain) {
|
|
|
8b1478 |
+ return true;
|
|
|
8b1478 |
+ }
|
|
|
8b1478 |
+
|
|
|
8b1478 |
return !!s->in_flight;
|
|
|
8b1478 |
}
|
|
|
8b1478 |
|
|
|
8b1478 |
--
|
|
|
8b1478 |
1.8.3.1
|
|
|
8b1478 |
|