Blame SOURCES/kvm-mirror-Confirm-we-re-quiesced-only-if-the-job-is-pau.patch

8b1478
From ba6d773a79eef6f59687771bc5664814d96b6b03 Mon Sep 17 00:00:00 2001
8b1478
From: Sergio Lopez Pascual <slp@redhat.com>
8b1478
Date: Mon, 9 Sep 2019 12:46:40 +0200
8b1478
Subject: [PATCH 4/4] mirror: Confirm we're quiesced only if the job is paused
8b1478
 or cancelled
8b1478
8b1478
RH-Author: Sergio Lopez Pascual <slp@redhat.com>
8b1478
Message-id: <20190909124640.53625-2-slp@redhat.com>
8b1478
Patchwork-id: 90339
8b1478
O-Subject: [RHEL-7.7.z qemu-kvm-rhev PATCH 1/1] mirror: Confirm we're quiesced only if the job is paused or cancelled
8b1478
Bugzilla: 1665256
8b1478
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
8b1478
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
8b1478
RH-Acked-by: John Snow <jsnow@redhat.com>
8b1478
8b1478
While child_job_drained_begin() calls to job_pause(), the job doesn't
8b1478
actually transition between states until it runs again and reaches a
8b1478
pause point. This means bdrv_drained_begin() may return with some jobs
8b1478
using the node still having 'busy == true'.
8b1478
8b1478
As a consequence, block_job_detach_aio_context() may get into a
8b1478
deadlock, waiting for the job to be actually paused, while the coroutine
8b1478
servicing the job is yielding and doesn't get the opportunity to get
8b1478
scheduled again. This situation can be reproduced by issuing a
8b1478
'block-commit' immediately followed by a 'device_del'.
8b1478
8b1478
To ensure bdrv_drained_begin() only returns when the jobs have been
8b1478
paused, we change mirror_drained_poll() to only confirm it's quiesced
8b1478
when job->paused == true and there aren't any in-flight requests, except
8b1478
if we reached that point by a drained section initiated by the
8b1478
mirror/commit job itself.
8b1478
8b1478
The other block jobs shouldn't need any changes, as the default
8b1478
drained_poll() behavior is to only confirm it's quiesced if the job is
8b1478
not busy or completed.
8b1478
8b1478
Signed-off-by: Sergio Lopez <slp@redhat.com>
8b1478
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8b1478
(cherry picked from commit 5e771752a1ffba3a99d7d75b6d492b4a86b59e1b)
8b1478
Signed-off-by: Sergio Lopez <slp@redhat.com>
8b1478
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
8b1478
---
8b1478
 block/mirror.c | 16 ++++++++++++++++
8b1478
 1 file changed, 16 insertions(+)
8b1478
8b1478
diff --git a/block/mirror.c b/block/mirror.c
8b1478
index 55dc94f..48c907f 100644
8b1478
--- a/block/mirror.c
8b1478
+++ b/block/mirror.c
8b1478
@@ -72,6 +72,7 @@ typedef struct MirrorBlockJob {
8b1478
     int max_iov;
8b1478
     bool initial_zeroing_ongoing;
8b1478
     bool prepared;
8b1478
+    bool in_drain;
8b1478
 } MirrorBlockJob;
8b1478
 
8b1478
 typedef struct MirrorOp {
8b1478
@@ -551,6 +552,7 @@ static int mirror_exit_common(Job *job)
8b1478
 
8b1478
         /* The mirror job has no requests in flight any more, but we need to
8b1478
          * drain potential other users of the BDS before changing the graph. */
8b1478
+        assert(s->in_drain);
8b1478
         bdrv_drained_begin(target_bs);
8b1478
         bdrv_replace_node(to_replace, target_bs, &local_err);
8b1478
         bdrv_drained_end(target_bs);
8b1478
@@ -587,6 +589,7 @@ static int mirror_exit_common(Job *job)
8b1478
     blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
8b1478
 
8b1478
     bdrv_drained_end(src);
8b1478
+    s->in_drain = false;
8b1478
     bdrv_unref(mirror_top_bs);
8b1478
     bdrv_unref(src);
8b1478
 
8b1478
@@ -860,10 +863,12 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
8b1478
              */
8b1478
             trace_mirror_before_drain(s, cnt);
8b1478
 
8b1478
+            s->in_drain = true;
8b1478
             bdrv_drained_begin(bs);
8b1478
             cnt = bdrv_get_dirty_count(s->dirty_bitmap);
8b1478
             if (cnt > 0 || mirror_flush(s) < 0) {
8b1478
                 bdrv_drained_end(bs);
8b1478
+                s->in_drain = false;
8b1478
                 continue;
8b1478
             }
8b1478
 
8b1478
@@ -911,6 +916,7 @@ immediate_exit:
8b1478
     bdrv_dirty_iter_free(s->dbi);
8b1478
 
8b1478
     if (need_drain) {
8b1478
+        s->in_drain = true;
8b1478
         bdrv_drained_begin(bs);
8b1478
     }
8b1478
 
8b1478
@@ -979,6 +985,16 @@ static void mirror_pause(Job *job)
8b1478
 static bool mirror_drained_poll(BlockJob *job)
8b1478
 {
8b1478
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
8b1478
+
8b1478
+    /* If the job isn't paused nor cancelled, we can't be sure that it won't
8b1478
+     * issue more requests. We make an exception if we've reached this point
8b1478
+     * from one of our own drain sections, to avoid a deadlock waiting for
8b1478
+     * ourselves.
8b1478
+     */
8b1478
+    if (!s->common.job.paused && !s->common.job.cancelled && !s->in_drain) {
8b1478
+        return true;
8b1478
+    }
8b1478
+
8b1478
     return !!s->in_flight;
8b1478
 }
8b1478
 
8b1478
-- 
8b1478
1.8.3.1
8b1478