Blame SOURCES/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch

22c213
From 3f16b8a33bd7503cbe857fbeb45fff7301b6bb5f Mon Sep 17 00:00:00 2001
22c213
From: Kevin Wolf <kwolf@redhat.com>
22c213
Date: Wed, 8 Apr 2020 17:29:12 +0100
22c213
Subject: [PATCH 1/6] job: take each job's lock individually in job_txn_apply
22c213
22c213
RH-Author: Kevin Wolf <kwolf@redhat.com>
22c213
Message-id: <20200408172917.18712-2-kwolf@redhat.com>
22c213
Patchwork-id: 94597
22c213
O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] job: take each job's lock individually in job_txn_apply
22c213
Bugzilla: 1817621
22c213
RH-Acked-by: Eric Blake <eblake@redhat.com>
22c213
RH-Acked-by: Danilo de Paula <ddepaula@redhat.com>
22c213
RH-Acked-by: Max Reitz <mreitz@redhat.com>
22c213
22c213
From: Stefan Reiter <s.reiter@proxmox.com>
22c213
22c213
All callers of job_txn_apply hold a single job's lock, but different
22c213
jobs within a transaction can have different contexts, thus we need to
22c213
lock each one individually before applying the callback function.
22c213
22c213
Similar to job_completed_txn_abort this also requires releasing the
22c213
caller's context before and reacquiring it after to avoid recursive
22c213
locks which might break AIO_WAIT_WHILE in the callback. This is safe, since
22c213
existing code would already have to take this into account, lest
22c213
job_completed_txn_abort might have broken.
22c213
22c213
This also brings to light a different issue: When a callback function in
22c213
job_txn_apply moves it's job to a different AIO context, callers will
22c213
try to release the wrong lock (now that we re-acquire the lock
22c213
correctly, previously it would just continue with the old lock, leaving
22c213
the job unlocked for the rest of the return path). Fix this by not caching
22c213
the job's context.
22c213
22c213
This is only necessary for qmp_block_job_finalize, qmp_job_finalize and
22c213
job_exit, since everyone else calls through job_exit.
22c213
22c213
One test needed adapting, since it calls job_finalize directly, so it
22c213
manually needs to acquire the correct context.
22c213
22c213
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
22c213
Message-Id: <20200407115651.69472-2-s.reiter@proxmox.com>
22c213
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
22c213
(cherry picked from commit b660a84bbb0eb1a76b505648d31d5e82594fb75e)
22c213
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
22c213
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
22c213
---
22c213
 blockdev.c            |  9 +++++++++
22c213
 job-qmp.c             |  9 +++++++++
22c213
 job.c                 | 50 ++++++++++++++++++++++++++++++++++++++++----------
22c213
 tests/test-blockjob.c |  2 ++
22c213
 4 files changed, 60 insertions(+), 10 deletions(-)
22c213
22c213
diff --git a/blockdev.c b/blockdev.c
22c213
index c8d4b51..86eb115 100644
22c213
--- a/blockdev.c
22c213
+++ b/blockdev.c
22c213
@@ -4215,7 +4215,16 @@ void qmp_block_job_finalize(const char *id, Error **errp)
22c213
     }
22c213
 
22c213
     trace_qmp_block_job_finalize(job);
22c213
+    job_ref(&job->job);
22c213
     job_finalize(&job->job, errp);
22c213
+
22c213
+    /*
22c213
+     * Job's context might have changed via job_finalize (and job_txn_apply
22c213
+     * automatically acquires the new one), so make sure we release the correct
22c213
+     * one.
22c213
+     */
22c213
+    aio_context = blk_get_aio_context(job->blk);
22c213
+    job_unref(&job->job);
22c213
     aio_context_release(aio_context);
22c213
 }
22c213
 
22c213
diff --git a/job-qmp.c b/job-qmp.c
22c213
index fbfed25..a201220 100644
22c213
--- a/job-qmp.c
22c213
+++ b/job-qmp.c
22c213
@@ -114,7 +114,16 @@ void qmp_job_finalize(const char *id, Error **errp)
22c213
     }
22c213
 
22c213
     trace_qmp_job_finalize(job);
22c213
+    job_ref(job);
22c213
     job_finalize(job, errp);
22c213
+
22c213
+    /*
22c213
+     * Job's context might have changed via job_finalize (and job_txn_apply
22c213
+     * automatically acquires the new one), so make sure we release the correct
22c213
+     * one.
22c213
+     */
22c213
+    aio_context = job->aio_context;
22c213
+    job_unref(job);
22c213
     aio_context_release(aio_context);
22c213
 }
22c213
 
22c213
diff --git a/job.c b/job.c
22c213
index 04409b4..48fc4ad 100644
22c213
--- a/job.c
22c213
+++ b/job.c
22c213
@@ -136,17 +136,38 @@ static void job_txn_del_job(Job *job)
22c213
     }
22c213
 }
22c213
 
22c213
-static int job_txn_apply(JobTxn *txn, int fn(Job *))
22c213
+static int job_txn_apply(Job *job, int fn(Job *))
22c213
 {
22c213
-    Job *job, *next;
22c213
+    AioContext *inner_ctx;
22c213
+    Job *other_job, *next;
22c213
+    JobTxn *txn = job->txn;
22c213
     int rc = 0;
22c213
 
22c213
-    QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) {
22c213
-        rc = fn(job);
22c213
+    /*
22c213
+     * Similar to job_completed_txn_abort, we take each job's lock before
22c213
+     * applying fn, but since we assume that outer_ctx is held by the caller,
22c213
+     * we need to release it here to avoid holding the lock twice - which would
22c213
+     * break AIO_WAIT_WHILE from within fn.
22c213
+     */
22c213
+    job_ref(job);
22c213
+    aio_context_release(job->aio_context);
22c213
+
22c213
+    QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
22c213
+        inner_ctx = other_job->aio_context;
22c213
+        aio_context_acquire(inner_ctx);
22c213
+        rc = fn(other_job);
22c213
+        aio_context_release(inner_ctx);
22c213
         if (rc) {
22c213
             break;
22c213
         }
22c213
     }
22c213
+
22c213
+    /*
22c213
+     * Note that job->aio_context might have been changed by calling fn, so we
22c213
+     * can't use a local variable to cache it.
22c213
+     */
22c213
+    aio_context_acquire(job->aio_context);
22c213
+    job_unref(job);
22c213
     return rc;
22c213
 }
22c213
 
22c213
@@ -774,11 +795,11 @@ static void job_do_finalize(Job *job)
22c213
     assert(job && job->txn);
22c213
 
22c213
     /* prepare the transaction to complete */
22c213
-    rc = job_txn_apply(job->txn, job_prepare);
22c213
+    rc = job_txn_apply(job, job_prepare);
22c213
     if (rc) {
22c213
         job_completed_txn_abort(job);
22c213
     } else {
22c213
-        job_txn_apply(job->txn, job_finalize_single);
22c213
+        job_txn_apply(job, job_finalize_single);
22c213
     }
22c213
 }
22c213
 
22c213
@@ -824,10 +845,10 @@ static void job_completed_txn_success(Job *job)
22c213
         assert(other_job->ret == 0);
22c213
     }
22c213
 
22c213
-    job_txn_apply(txn, job_transition_to_pending);
22c213
+    job_txn_apply(job, job_transition_to_pending);
22c213
 
22c213
     /* If no jobs need manual finalization, automatically do so */
22c213
-    if (job_txn_apply(txn, job_needs_finalize) == 0) {
22c213
+    if (job_txn_apply(job, job_needs_finalize) == 0) {
22c213
         job_do_finalize(job);
22c213
     }
22c213
 }
22c213
@@ -849,9 +870,10 @@ static void job_completed(Job *job)
22c213
 static void job_exit(void *opaque)
22c213
 {
22c213
     Job *job = (Job *)opaque;
22c213
-    AioContext *ctx = job->aio_context;
22c213
+    AioContext *ctx;
22c213
 
22c213
-    aio_context_acquire(ctx);
22c213
+    job_ref(job);
22c213
+    aio_context_acquire(job->aio_context);
22c213
 
22c213
     /* This is a lie, we're not quiescent, but still doing the completion
22c213
      * callbacks. However, completion callbacks tend to involve operations that
22c213
@@ -862,6 +884,14 @@ static void job_exit(void *opaque)
22c213
 
22c213
     job_completed(job);
22c213
 
22c213
+    /*
22c213
+     * Note that calling job_completed can move the job to a different
22c213
+     * aio_context, so we cannot cache from above. job_txn_apply takes care of
22c213
+     * acquiring the new lock, and we ref/unref to avoid job_completed freeing
22c213
+     * the job underneath us.
22c213
+     */
22c213
+    ctx = job->aio_context;
22c213
+    job_unref(job);
22c213
     aio_context_release(ctx);
22c213
 }
22c213
 
22c213
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
22c213
index 7844c9f..6d857fd 100644
22c213
--- a/tests/test-blockjob.c
22c213
+++ b/tests/test-blockjob.c
22c213
@@ -368,7 +368,9 @@ static void test_cancel_concluded(void)
22c213
     aio_poll(qemu_get_aio_context(), true);
22c213
     assert(job->status == JOB_STATUS_PENDING);
22c213
 
22c213
+    aio_context_acquire(job->aio_context);
22c213
     job_finalize(job, &error_abort);
22c213
+    aio_context_release(job->aio_context);
22c213
     assert(job->status == JOB_STATUS_CONCLUDED);
22c213
 
22c213
     cancel_common(s);
22c213
-- 
22c213
1.8.3.1
22c213