Blame SOURCES/kvm-job-Avoid-deadlocks-in-job_completed_txn_abort.patch

ae23c9
From 42e244782774dc971c83c4fabc16f46c57d86f21 Mon Sep 17 00:00:00 2001
ae23c9
From: Kevin Wolf <kwolf@redhat.com>
ae23c9
Date: Wed, 10 Oct 2018 20:22:09 +0100
ae23c9
Subject: [PATCH 43/49] job: Avoid deadlocks in job_completed_txn_abort()
ae23c9
ae23c9
RH-Author: Kevin Wolf <kwolf@redhat.com>
ae23c9
Message-id: <20181010202213.7372-31-kwolf@redhat.com>
ae23c9
Patchwork-id: 82622
ae23c9
O-Subject: [RHEL-8 qemu-kvm PATCH 40/44] job: Avoid deadlocks in job_completed_txn_abort()
ae23c9
Bugzilla: 1637976
ae23c9
RH-Acked-by: Max Reitz <mreitz@redhat.com>
ae23c9
RH-Acked-by: John Snow <jsnow@redhat.com>
ae23c9
RH-Acked-by: Thomas Huth <thuth@redhat.com>
ae23c9
ae23c9
Amongst others, job_finalize_single() calls the .prepare/.commit/.abort
ae23c9
callbacks of the individual job driver. Recently, their use was adapted
ae23c9
for all block jobs so that they involve code calling AIO_WAIT_WHILE()
ae23c9
now. Such code must be called under the AioContext lock for the
ae23c9
respective job, but without holding any other AioContext lock.
ae23c9
ae23c9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ae23c9
Reviewed-by: Max Reitz <mreitz@redhat.com>
ae23c9
(cherry picked from commit 644f3a29bd4974aefd46d2adb5062d86063c8a50)
ae23c9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ae23c9
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ae23c9
---
ae23c9
 job.c | 16 +++++++++++-----
ae23c9
 1 file changed, 11 insertions(+), 5 deletions(-)
ae23c9
ae23c9
diff --git a/job.c b/job.c
ae23c9
index 42af9e2..5b53e43 100644
ae23c9
--- a/job.c
ae23c9
+++ b/job.c
ae23c9
@@ -713,6 +713,7 @@ static void job_cancel_async(Job *job, bool force)
ae23c9
 
ae23c9
 static void job_completed_txn_abort(Job *job)
ae23c9
 {
ae23c9
+    AioContext *outer_ctx = job->aio_context;
ae23c9
     AioContext *ctx;
ae23c9
     JobTxn *txn = job->txn;
ae23c9
     Job *other_job;
ae23c9
@@ -726,23 +727,26 @@ static void job_completed_txn_abort(Job *job)
ae23c9
     txn->aborting = true;
ae23c9
     job_txn_ref(txn);
ae23c9
 
ae23c9
-    /* We are the first failed job. Cancel other jobs. */
ae23c9
-    QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
ae23c9
-        ctx = other_job->aio_context;
ae23c9
-        aio_context_acquire(ctx);
ae23c9
-    }
ae23c9
+    /* We can only hold the single job's AioContext lock while calling
ae23c9
+     * job_finalize_single() because the finalization callbacks can involve
ae23c9
+     * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */
ae23c9
+    aio_context_release(outer_ctx);
ae23c9
 
ae23c9
     /* Other jobs are effectively cancelled by us, set the status for
ae23c9
      * them; this job, however, may or may not be cancelled, depending
ae23c9
      * on the caller, so leave it. */
ae23c9
     QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
ae23c9
         if (other_job != job) {
ae23c9
+            ctx = other_job->aio_context;
ae23c9
+            aio_context_acquire(ctx);
ae23c9
             job_cancel_async(other_job, false);
ae23c9
+            aio_context_release(ctx);
ae23c9
         }
ae23c9
     }
ae23c9
     while (!QLIST_EMPTY(&txn->jobs)) {
ae23c9
         other_job = QLIST_FIRST(&txn->jobs);
ae23c9
         ctx = other_job->aio_context;
ae23c9
+        aio_context_acquire(ctx);
ae23c9
         if (!job_is_completed(other_job)) {
ae23c9
             assert(job_is_cancelled(other_job));
ae23c9
             job_finish_sync(other_job, NULL, NULL);
ae23c9
@@ -751,6 +755,8 @@ static void job_completed_txn_abort(Job *job)
ae23c9
         aio_context_release(ctx);
ae23c9
     }
ae23c9
 
ae23c9
+    aio_context_acquire(outer_ctx);
ae23c9
+
ae23c9
     job_txn_unref(txn);
ae23c9
 }
ae23c9
 
ae23c9
-- 
ae23c9
1.8.3.1
ae23c9