|
|
a9339c |
From 13bcf85ffab4b4e67039599246604a3f5b503975 Mon Sep 17 00:00:00 2001
|
|
|
a9339c |
From: David Tardon <dtardon@redhat.com>
|
|
|
a9339c |
Date: Tue, 24 Apr 2018 15:19:38 +0200
|
|
|
a9339c |
Subject: [PATCH] fix race between daemon-reload and other commands
|
|
|
a9339c |
|
|
|
a9339c |
When "systemctl daemon-reload" is run at the same time as "systemctl
|
|
|
a9339c |
start foo", the latter might hang. That's because commands like start
|
|
|
a9339c |
wait for JobRemoved signal to know when the job is finished. But if the
|
|
|
a9339c |
job is finished during reloading, the signal is never sent.
|
|
|
a9339c |
|
|
|
a9339c |
The hang can be easily reproduced by running
|
|
|
a9339c |
|
|
|
a9339c |
# for ((N=1; N>0; N++)) ; do echo $N ; systemctl daemon-reload ; done
|
|
|
a9339c |
# for ((N=1; N>0; N++)) ; do echo $N ; systemctl start systemd-coredump.socket ; done
|
|
|
a9339c |
|
|
|
a9339c |
in two different terminals. The start command will hang after 1-2
|
|
|
a9339c |
iterations.
|
|
|
a9339c |
|
|
|
a9339c |
This keeps track of jobs that were started before reload and finished
|
|
|
a9339c |
during it and sends JobRemoved after the reload has finished.
|
|
|
a9339c |
|
|
|
a9339c |
(cherry picked from commit a7a7163df7fc8a9f794f6803b2f6c9c9b0745a1f)
|
|
|
a9339c |
---
|
|
|
a9339c |
src/core/job.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
|
|
|
a9339c |
src/core/job.h | 2 ++
|
|
|
a9339c |
src/core/manager.c | 15 +++++++++++++++
|
|
|
a9339c |
src/core/manager.h | 3 +++
|
|
|
a9339c |
4 files changed, 60 insertions(+), 5 deletions(-)
|
|
|
a9339c |
|
|
|
a9339c |
diff --git a/src/core/job.c b/src/core/job.c
|
|
|
a9339c |
index 1861c8a63..275503169 100644
|
|
|
a9339c |
--- a/src/core/job.c
|
|
|
a9339c |
+++ b/src/core/job.c
|
|
|
a9339c |
@@ -53,6 +53,7 @@ Job* job_new_raw(Unit *unit) {
|
|
|
a9339c |
j->manager = unit->manager;
|
|
|
a9339c |
j->unit = unit;
|
|
|
a9339c |
j->type = _JOB_TYPE_INVALID;
|
|
|
a9339c |
+ j->reloaded = false;
|
|
|
a9339c |
|
|
|
a9339c |
return j;
|
|
|
a9339c |
}
|
|
|
a9339c |
@@ -74,7 +75,7 @@ Job* job_new(Unit *unit, JobType type) {
|
|
|
a9339c |
return j;
|
|
|
a9339c |
}
|
|
|
a9339c |
|
|
|
a9339c |
-void job_free(Job *j) {
|
|
|
a9339c |
+void job_unlink(Job *j) {
|
|
|
a9339c |
assert(j);
|
|
|
a9339c |
assert(!j->installed);
|
|
|
a9339c |
assert(!j->transaction_prev);
|
|
|
a9339c |
@@ -82,13 +83,28 @@ void job_free(Job *j) {
|
|
|
a9339c |
assert(!j->subject_list);
|
|
|
a9339c |
assert(!j->object_list);
|
|
|
a9339c |
|
|
|
a9339c |
- if (j->in_run_queue)
|
|
|
a9339c |
+ if (j->in_run_queue) {
|
|
|
a9339c |
LIST_REMOVE(run_queue, j->manager->run_queue, j);
|
|
|
a9339c |
+ j->in_run_queue = false;
|
|
|
a9339c |
+ }
|
|
|
a9339c |
|
|
|
a9339c |
- if (j->in_dbus_queue)
|
|
|
a9339c |
+ if (j->in_dbus_queue) {
|
|
|
a9339c |
LIST_REMOVE(dbus_queue, j->manager->dbus_job_queue, j);
|
|
|
a9339c |
+ j->in_dbus_queue = false;
|
|
|
a9339c |
+ }
|
|
|
a9339c |
+
|
|
|
a9339c |
+ j->timer_event_source = sd_event_source_unref(j->timer_event_source);
|
|
|
a9339c |
+}
|
|
|
a9339c |
+
|
|
|
a9339c |
+void job_free(Job *j) {
|
|
|
a9339c |
+ assert(j);
|
|
|
a9339c |
+ assert(!j->installed);
|
|
|
a9339c |
+ assert(!j->transaction_prev);
|
|
|
a9339c |
+ assert(!j->transaction_next);
|
|
|
a9339c |
+ assert(!j->subject_list);
|
|
|
a9339c |
+ assert(!j->object_list);
|
|
|
a9339c |
|
|
|
a9339c |
- sd_event_source_unref(j->timer_event_source);
|
|
|
a9339c |
+ job_unlink(j);
|
|
|
a9339c |
|
|
|
a9339c |
sd_bus_track_unref(j->clients);
|
|
|
a9339c |
strv_free(j->deserialized_clients);
|
|
|
a9339c |
@@ -246,6 +262,7 @@ int job_install_deserialized(Job *j) {
|
|
|
a9339c |
|
|
|
a9339c |
*pj = j;
|
|
|
a9339c |
j->installed = true;
|
|
|
a9339c |
+ j->reloaded = true;
|
|
|
a9339c |
|
|
|
a9339c |
if (j->state == JOB_RUNNING)
|
|
|
a9339c |
j->unit->manager->n_running_jobs++;
|
|
|
a9339c |
@@ -790,6 +807,19 @@ static void job_emit_status_message(Unit *u, JobType t, JobResult result) {
|
|
|
a9339c |
job_print_status_message(u, t, result);
|
|
|
a9339c |
}
|
|
|
a9339c |
|
|
|
a9339c |
+static int job_save_pending_finished_job(Job *j) {
|
|
|
a9339c |
+ int r;
|
|
|
a9339c |
+
|
|
|
a9339c |
+ assert(j);
|
|
|
a9339c |
+
|
|
|
a9339c |
+ r = set_ensure_allocated(&j->manager->pending_finished_jobs, NULL);
|
|
|
a9339c |
+ if (r < 0)
|
|
|
a9339c |
+ return r;
|
|
|
a9339c |
+
|
|
|
a9339c |
+ job_unlink(j);
|
|
|
a9339c |
+ return set_put(j->manager->pending_finished_jobs, j);
|
|
|
a9339c |
+}
|
|
|
a9339c |
+
|
|
|
a9339c |
int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already) {
|
|
|
a9339c |
Unit *u;
|
|
|
a9339c |
Unit *other;
|
|
|
a9339c |
@@ -829,7 +859,12 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
|
|
|
a9339c |
j->manager->n_failed_jobs ++;
|
|
|
a9339c |
|
|
|
a9339c |
job_uninstall(j);
|
|
|
a9339c |
- job_free(j);
|
|
|
a9339c |
+ /* Remember jobs started before the reload */
|
|
|
a9339c |
+ if (j->manager->n_reloading > 0 && j->reloaded) {
|
|
|
a9339c |
+ if (job_save_pending_finished_job(j) < 0)
|
|
|
a9339c |
+ job_free(j);
|
|
|
a9339c |
+ } else
|
|
|
a9339c |
+ job_free(j);
|
|
|
a9339c |
|
|
|
a9339c |
/* Fail depending jobs on failure */
|
|
|
a9339c |
if (result != JOB_DONE && recursive) {
|
|
|
a9339c |
diff --git a/src/core/job.h b/src/core/job.h
|
|
|
a9339c |
index 535052b48..4ae6f2802 100644
|
|
|
a9339c |
--- a/src/core/job.h
|
|
|
a9339c |
+++ b/src/core/job.h
|
|
|
a9339c |
@@ -172,10 +172,12 @@ struct Job {
|
|
|
a9339c |
bool sent_dbus_new_signal:1;
|
|
|
a9339c |
bool ignore_order:1;
|
|
|
a9339c |
bool irreversible:1;
|
|
|
a9339c |
+ bool reloaded:1;
|
|
|
a9339c |
};
|
|
|
a9339c |
|
|
|
a9339c |
Job* job_new(Unit *unit, JobType type);
|
|
|
a9339c |
Job* job_new_raw(Unit *unit);
|
|
|
a9339c |
+void job_unlink(Job *job);
|
|
|
a9339c |
void job_free(Job *job);
|
|
|
a9339c |
Job* job_install(Job *j);
|
|
|
a9339c |
int job_install_deserialized(Job *j);
|
|
|
a9339c |
diff --git a/src/core/manager.c b/src/core/manager.c
|
|
|
a9339c |
index 47b09e1e9..9c406bb5b 100644
|
|
|
a9339c |
--- a/src/core/manager.c
|
|
|
a9339c |
+++ b/src/core/manager.c
|
|
|
a9339c |
@@ -2702,6 +2702,18 @@ finish:
|
|
|
a9339c |
return r;
|
|
|
a9339c |
}
|
|
|
a9339c |
|
|
|
a9339c |
+static void manager_flush_finished_jobs(Manager *m) {
|
|
|
a9339c |
+ Job *j;
|
|
|
a9339c |
+
|
|
|
a9339c |
+ while ((j = set_steal_first(m->pending_finished_jobs))) {
|
|
|
a9339c |
+ bus_job_send_removed_signal(j);
|
|
|
a9339c |
+ job_free(j);
|
|
|
a9339c |
+ }
|
|
|
a9339c |
+
|
|
|
a9339c |
+ set_free(m->pending_finished_jobs);
|
|
|
a9339c |
+ m->pending_finished_jobs = NULL;
|
|
|
a9339c |
+}
|
|
|
a9339c |
+
|
|
|
a9339c |
int manager_reload(Manager *m) {
|
|
|
a9339c |
int r, q;
|
|
|
a9339c |
_cleanup_fclose_ FILE *f = NULL;
|
|
|
a9339c |
@@ -2784,6 +2796,9 @@ int manager_reload(Manager *m) {
|
|
|
a9339c |
assert(m->n_reloading > 0);
|
|
|
a9339c |
m->n_reloading--;
|
|
|
a9339c |
|
|
|
a9339c |
+ if (m->n_reloading <= 0)
|
|
|
a9339c |
+ manager_flush_finished_jobs(m);
|
|
|
a9339c |
+
|
|
|
a9339c |
m->send_reloading_done = true;
|
|
|
a9339c |
|
|
|
a9339c |
return r;
|
|
|
a9339c |
diff --git a/src/core/manager.h b/src/core/manager.h
|
|
|
a9339c |
index e91e7bd8b..90d2d982e 100644
|
|
|
a9339c |
--- a/src/core/manager.h
|
|
|
a9339c |
+++ b/src/core/manager.h
|
|
|
a9339c |
@@ -270,6 +270,9 @@ struct Manager {
|
|
|
a9339c |
|
|
|
a9339c |
/* non-zero if we are reloading or reexecuting, */
|
|
|
a9339c |
int n_reloading;
|
|
|
a9339c |
+ /* A set which contains all jobs that started before reload and finished
|
|
|
a9339c |
+ * during it */
|
|
|
a9339c |
+ Set *pending_finished_jobs;
|
|
|
a9339c |
|
|
|
a9339c |
unsigned n_installed_jobs;
|
|
|
a9339c |
unsigned n_failed_jobs;
|