|
|
902636 |
From 3f16b8a33bd7503cbe857fbeb45fff7301b6bb5f Mon Sep 17 00:00:00 2001
|
|
|
902636 |
From: Kevin Wolf <kwolf@redhat.com>
|
|
|
902636 |
Date: Wed, 8 Apr 2020 17:29:12 +0100
|
|
|
902636 |
Subject: [PATCH 1/6] job: take each job's lock individually in job_txn_apply
|
|
|
902636 |
|
|
|
902636 |
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
|
902636 |
Message-id: <20200408172917.18712-2-kwolf@redhat.com>
|
|
|
902636 |
Patchwork-id: 94597
|
|
|
902636 |
O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] job: take each job's lock individually in job_txn_apply
|
|
|
902636 |
Bugzilla: 1817621
|
|
|
902636 |
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
|
|
902636 |
RH-Acked-by: Danilo de Paula <ddepaula@redhat.com>
|
|
|
902636 |
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
|
902636 |
|
|
|
902636 |
From: Stefan Reiter <s.reiter@proxmox.com>
|
|
|
902636 |
|
|
|
902636 |
All callers of job_txn_apply hold a single job's lock, but different
|
|
|
902636 |
jobs within a transaction can have different contexts, thus we need to
|
|
|
902636 |
lock each one individually before applying the callback function.
|
|
|
902636 |
|
|
|
902636 |
Similar to job_completed_txn_abort this also requires releasing the
|
|
|
902636 |
caller's context before and reacquiring it after to avoid recursive
|
|
|
902636 |
locks which might break AIO_WAIT_WHILE in the callback. This is safe, since
|
|
|
902636 |
existing code would already have to take this into account, lest
|
|
|
902636 |
job_completed_txn_abort might have broken.
|
|
|
902636 |
|
|
|
902636 |
This also brings to light a different issue: When a callback function in
|
|
|
902636 |
job_txn_apply moves it's job to a different AIO context, callers will
|
|
|
902636 |
try to release the wrong lock (now that we re-acquire the lock
|
|
|
902636 |
correctly, previously it would just continue with the old lock, leaving
|
|
|
902636 |
the job unlocked for the rest of the return path). Fix this by not caching
|
|
|
902636 |
the job's context.
|
|
|
902636 |
|
|
|
902636 |
This is only necessary for qmp_block_job_finalize, qmp_job_finalize and
|
|
|
902636 |
job_exit, since everyone else calls through job_exit.
|
|
|
902636 |
|
|
|
902636 |
One test needed adapting, since it calls job_finalize directly, so it
|
|
|
902636 |
manually needs to acquire the correct context.
|
|
|
902636 |
|
|
|
902636 |
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
|
|
902636 |
Message-Id: <20200407115651.69472-2-s.reiter@proxmox.com>
|
|
|
902636 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
902636 |
(cherry picked from commit b660a84bbb0eb1a76b505648d31d5e82594fb75e)
|
|
|
902636 |
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
902636 |
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
|
902636 |
---
|
|
|
902636 |
blockdev.c | 9 +++++++++
|
|
|
902636 |
job-qmp.c | 9 +++++++++
|
|
|
902636 |
job.c | 50 ++++++++++++++++++++++++++++++++++++++++----------
|
|
|
902636 |
tests/test-blockjob.c | 2 ++
|
|
|
902636 |
4 files changed, 60 insertions(+), 10 deletions(-)
|
|
|
902636 |
|
|
|
902636 |
diff --git a/blockdev.c b/blockdev.c
|
|
|
902636 |
index c8d4b51..86eb115 100644
|
|
|
902636 |
--- a/blockdev.c
|
|
|
902636 |
+++ b/blockdev.c
|
|
|
902636 |
@@ -4215,7 +4215,16 @@ void qmp_block_job_finalize(const char *id, Error **errp)
|
|
|
902636 |
}
|
|
|
902636 |
|
|
|
902636 |
trace_qmp_block_job_finalize(job);
|
|
|
902636 |
+ job_ref(&job->job);
|
|
|
902636 |
job_finalize(&job->job, errp);
|
|
|
902636 |
+
|
|
|
902636 |
+ /*
|
|
|
902636 |
+ * Job's context might have changed via job_finalize (and job_txn_apply
|
|
|
902636 |
+ * automatically acquires the new one), so make sure we release the correct
|
|
|
902636 |
+ * one.
|
|
|
902636 |
+ */
|
|
|
902636 |
+ aio_context = blk_get_aio_context(job->blk);
|
|
|
902636 |
+ job_unref(&job->job);
|
|
|
902636 |
aio_context_release(aio_context);
|
|
|
902636 |
}
|
|
|
902636 |
|
|
|
902636 |
diff --git a/job-qmp.c b/job-qmp.c
|
|
|
902636 |
index fbfed25..a201220 100644
|
|
|
902636 |
--- a/job-qmp.c
|
|
|
902636 |
+++ b/job-qmp.c
|
|
|
902636 |
@@ -114,7 +114,16 @@ void qmp_job_finalize(const char *id, Error **errp)
|
|
|
902636 |
}
|
|
|
902636 |
|
|
|
902636 |
trace_qmp_job_finalize(job);
|
|
|
902636 |
+ job_ref(job);
|
|
|
902636 |
job_finalize(job, errp);
|
|
|
902636 |
+
|
|
|
902636 |
+ /*
|
|
|
902636 |
+ * Job's context might have changed via job_finalize (and job_txn_apply
|
|
|
902636 |
+ * automatically acquires the new one), so make sure we release the correct
|
|
|
902636 |
+ * one.
|
|
|
902636 |
+ */
|
|
|
902636 |
+ aio_context = job->aio_context;
|
|
|
902636 |
+ job_unref(job);
|
|
|
902636 |
aio_context_release(aio_context);
|
|
|
902636 |
}
|
|
|
902636 |
|
|
|
902636 |
diff --git a/job.c b/job.c
|
|
|
902636 |
index 04409b4..48fc4ad 100644
|
|
|
902636 |
--- a/job.c
|
|
|
902636 |
+++ b/job.c
|
|
|
902636 |
@@ -136,17 +136,38 @@ static void job_txn_del_job(Job *job)
|
|
|
902636 |
}
|
|
|
902636 |
}
|
|
|
902636 |
|
|
|
902636 |
-static int job_txn_apply(JobTxn *txn, int fn(Job *))
|
|
|
902636 |
+static int job_txn_apply(Job *job, int fn(Job *))
|
|
|
902636 |
{
|
|
|
902636 |
- Job *job, *next;
|
|
|
902636 |
+ AioContext *inner_ctx;
|
|
|
902636 |
+ Job *other_job, *next;
|
|
|
902636 |
+ JobTxn *txn = job->txn;
|
|
|
902636 |
int rc = 0;
|
|
|
902636 |
|
|
|
902636 |
- QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) {
|
|
|
902636 |
- rc = fn(job);
|
|
|
902636 |
+ /*
|
|
|
902636 |
+ * Similar to job_completed_txn_abort, we take each job's lock before
|
|
|
902636 |
+ * applying fn, but since we assume that outer_ctx is held by the caller,
|
|
|
902636 |
+ * we need to release it here to avoid holding the lock twice - which would
|
|
|
902636 |
+ * break AIO_WAIT_WHILE from within fn.
|
|
|
902636 |
+ */
|
|
|
902636 |
+ job_ref(job);
|
|
|
902636 |
+ aio_context_release(job->aio_context);
|
|
|
902636 |
+
|
|
|
902636 |
+ QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
|
|
|
902636 |
+ inner_ctx = other_job->aio_context;
|
|
|
902636 |
+ aio_context_acquire(inner_ctx);
|
|
|
902636 |
+ rc = fn(other_job);
|
|
|
902636 |
+ aio_context_release(inner_ctx);
|
|
|
902636 |
if (rc) {
|
|
|
902636 |
break;
|
|
|
902636 |
}
|
|
|
902636 |
}
|
|
|
902636 |
+
|
|
|
902636 |
+ /*
|
|
|
902636 |
+ * Note that job->aio_context might have been changed by calling fn, so we
|
|
|
902636 |
+ * can't use a local variable to cache it.
|
|
|
902636 |
+ */
|
|
|
902636 |
+ aio_context_acquire(job->aio_context);
|
|
|
902636 |
+ job_unref(job);
|
|
|
902636 |
return rc;
|
|
|
902636 |
}
|
|
|
902636 |
|
|
|
902636 |
@@ -774,11 +795,11 @@ static void job_do_finalize(Job *job)
|
|
|
902636 |
assert(job && job->txn);
|
|
|
902636 |
|
|
|
902636 |
/* prepare the transaction to complete */
|
|
|
902636 |
- rc = job_txn_apply(job->txn, job_prepare);
|
|
|
902636 |
+ rc = job_txn_apply(job, job_prepare);
|
|
|
902636 |
if (rc) {
|
|
|
902636 |
job_completed_txn_abort(job);
|
|
|
902636 |
} else {
|
|
|
902636 |
- job_txn_apply(job->txn, job_finalize_single);
|
|
|
902636 |
+ job_txn_apply(job, job_finalize_single);
|
|
|
902636 |
}
|
|
|
902636 |
}
|
|
|
902636 |
|
|
|
902636 |
@@ -824,10 +845,10 @@ static void job_completed_txn_success(Job *job)
|
|
|
902636 |
assert(other_job->ret == 0);
|
|
|
902636 |
}
|
|
|
902636 |
|
|
|
902636 |
- job_txn_apply(txn, job_transition_to_pending);
|
|
|
902636 |
+ job_txn_apply(job, job_transition_to_pending);
|
|
|
902636 |
|
|
|
902636 |
/* If no jobs need manual finalization, automatically do so */
|
|
|
902636 |
- if (job_txn_apply(txn, job_needs_finalize) == 0) {
|
|
|
902636 |
+ if (job_txn_apply(job, job_needs_finalize) == 0) {
|
|
|
902636 |
job_do_finalize(job);
|
|
|
902636 |
}
|
|
|
902636 |
}
|
|
|
902636 |
@@ -849,9 +870,10 @@ static void job_completed(Job *job)
|
|
|
902636 |
static void job_exit(void *opaque)
|
|
|
902636 |
{
|
|
|
902636 |
Job *job = (Job *)opaque;
|
|
|
902636 |
- AioContext *ctx = job->aio_context;
|
|
|
902636 |
+ AioContext *ctx;
|
|
|
902636 |
|
|
|
902636 |
- aio_context_acquire(ctx);
|
|
|
902636 |
+ job_ref(job);
|
|
|
902636 |
+ aio_context_acquire(job->aio_context);
|
|
|
902636 |
|
|
|
902636 |
/* This is a lie, we're not quiescent, but still doing the completion
|
|
|
902636 |
* callbacks. However, completion callbacks tend to involve operations that
|
|
|
902636 |
@@ -862,6 +884,14 @@ static void job_exit(void *opaque)
|
|
|
902636 |
|
|
|
902636 |
job_completed(job);
|
|
|
902636 |
|
|
|
902636 |
+ /*
|
|
|
902636 |
+ * Note that calling job_completed can move the job to a different
|
|
|
902636 |
+ * aio_context, so we cannot cache from above. job_txn_apply takes care of
|
|
|
902636 |
+ * acquiring the new lock, and we ref/unref to avoid job_completed freeing
|
|
|
902636 |
+ * the job underneath us.
|
|
|
902636 |
+ */
|
|
|
902636 |
+ ctx = job->aio_context;
|
|
|
902636 |
+ job_unref(job);
|
|
|
902636 |
aio_context_release(ctx);
|
|
|
902636 |
}
|
|
|
902636 |
|
|
|
902636 |
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
|
|
|
902636 |
index 7844c9f..6d857fd 100644
|
|
|
902636 |
--- a/tests/test-blockjob.c
|
|
|
902636 |
+++ b/tests/test-blockjob.c
|
|
|
902636 |
@@ -368,7 +368,9 @@ static void test_cancel_concluded(void)
|
|
|
902636 |
aio_poll(qemu_get_aio_context(), true);
|
|
|
902636 |
assert(job->status == JOB_STATUS_PENDING);
|
|
|
902636 |
|
|
|
902636 |
+ aio_context_acquire(job->aio_context);
|
|
|
902636 |
job_finalize(job, &error_abort);
|
|
|
902636 |
+ aio_context_release(job->aio_context);
|
|
|
902636 |
assert(job->status == JOB_STATUS_CONCLUDED);
|
|
|
902636 |
|
|
|
902636 |
cancel_common(s);
|
|
|
902636 |
--
|
|
|
902636 |
1.8.3.1
|
|
|
902636 |
|