valeriyvdovin / rpms / systemd

Forked from rpms/systemd 4 years ago
Clone
923a60
From 13bcf85ffab4b4e67039599246604a3f5b503975 Mon Sep 17 00:00:00 2001
923a60
From: David Tardon <dtardon@redhat.com>
923a60
Date: Tue, 24 Apr 2018 15:19:38 +0200
923a60
Subject: [PATCH] fix race between daemon-reload and other commands
923a60
923a60
When "systemctl daemon-reload" is run at the same time as "systemctl
923a60
start foo", the latter might hang. That's because commands like start
923a60
wait for JobRemoved signal to know when the job is finished. But if the
923a60
job is finished during reloading, the signal is never sent.
923a60
923a60
The hang can be easily reproduced by running
923a60
923a60
    # for ((N=1; N>0; N++)) ; do echo $N ; systemctl daemon-reload ; done
923a60
    # for ((N=1; N>0; N++)) ; do echo $N ; systemctl start systemd-coredump.socket ; done
923a60
923a60
in two different terminals. The start command will hang after 1-2
923a60
iterations.
923a60
923a60
This keeps track of jobs that were started before reload and finished
923a60
during it and sends JobRemoved after the reload has finished.
923a60
923a60
(cherry picked from commit a7a7163df7fc8a9f794f6803b2f6c9c9b0745a1f)
923a60
---
923a60
 src/core/job.c     | 45 ++++++++++++++++++++++++++++++++++++++++-----
923a60
 src/core/job.h     |  2 ++
923a60
 src/core/manager.c | 15 +++++++++++++++
923a60
 src/core/manager.h |  3 +++
923a60
 4 files changed, 60 insertions(+), 5 deletions(-)
923a60
923a60
diff --git a/src/core/job.c b/src/core/job.c
923a60
index 1861c8a633..275503169b 100644
923a60
--- a/src/core/job.c
923a60
+++ b/src/core/job.c
923a60
@@ -53,6 +53,7 @@ Job* job_new_raw(Unit *unit) {
923a60
         j->manager = unit->manager;
923a60
         j->unit = unit;
923a60
         j->type = _JOB_TYPE_INVALID;
923a60
+        j->reloaded = false;
923a60
 
923a60
         return j;
923a60
 }
923a60
@@ -74,7 +75,7 @@ Job* job_new(Unit *unit, JobType type) {
923a60
         return j;
923a60
 }
923a60
 
923a60
-void job_free(Job *j) {
923a60
+void job_unlink(Job *j) {
923a60
         assert(j);
923a60
         assert(!j->installed);
923a60
         assert(!j->transaction_prev);
923a60
@@ -82,13 +83,28 @@ void job_free(Job *j) {
923a60
         assert(!j->subject_list);
923a60
         assert(!j->object_list);
923a60
 
923a60
-        if (j->in_run_queue)
923a60
+        if (j->in_run_queue) {
923a60
                 LIST_REMOVE(run_queue, j->manager->run_queue, j);
923a60
+                j->in_run_queue = false;
923a60
+        }
923a60
 
923a60
-        if (j->in_dbus_queue)
923a60
+        if (j->in_dbus_queue) {
923a60
                 LIST_REMOVE(dbus_queue, j->manager->dbus_job_queue, j);
923a60
+                j->in_dbus_queue = false;
923a60
+        }
923a60
+
923a60
+        j->timer_event_source = sd_event_source_unref(j->timer_event_source);
923a60
+}
923a60
+
923a60
+void job_free(Job *j) {
923a60
+        assert(j);
923a60
+        assert(!j->installed);
923a60
+        assert(!j->transaction_prev);
923a60
+        assert(!j->transaction_next);
923a60
+        assert(!j->subject_list);
923a60
+        assert(!j->object_list);
923a60
 
923a60
-        sd_event_source_unref(j->timer_event_source);
923a60
+        job_unlink(j);
923a60
 
923a60
         sd_bus_track_unref(j->clients);
923a60
         strv_free(j->deserialized_clients);
923a60
@@ -246,6 +262,7 @@ int job_install_deserialized(Job *j) {
923a60
 
923a60
         *pj = j;
923a60
         j->installed = true;
923a60
+        j->reloaded = true;
923a60
 
923a60
         if (j->state == JOB_RUNNING)
923a60
                 j->unit->manager->n_running_jobs++;
923a60
@@ -790,6 +807,19 @@ static void job_emit_status_message(Unit *u, JobType t, JobResult result) {
923a60
                 job_print_status_message(u, t, result);
923a60
 }
923a60
 
923a60
+static int job_save_pending_finished_job(Job *j) {
923a60
+        int r;
923a60
+
923a60
+        assert(j);
923a60
+
923a60
+        r = set_ensure_allocated(&j->manager->pending_finished_jobs, NULL);
923a60
+        if (r < 0)
923a60
+                return r;
923a60
+
923a60
+        job_unlink(j);
923a60
+        return set_put(j->manager->pending_finished_jobs, j);
923a60
+}
923a60
+
923a60
 int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already) {
923a60
         Unit *u;
923a60
         Unit *other;
923a60
@@ -829,7 +859,12 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
923a60
                 j->manager->n_failed_jobs ++;
923a60
 
923a60
         job_uninstall(j);
923a60
-        job_free(j);
923a60
+        /* Remember jobs started before the reload */
923a60
+        if (j->manager->n_reloading > 0 && j->reloaded) {
923a60
+                if (job_save_pending_finished_job(j) < 0)
923a60
+                        job_free(j);
923a60
+        } else
923a60
+                job_free(j);
923a60
 
923a60
         /* Fail depending jobs on failure */
923a60
         if (result != JOB_DONE && recursive) {
923a60
diff --git a/src/core/job.h b/src/core/job.h
923a60
index 535052b48f..4ae6f28020 100644
923a60
--- a/src/core/job.h
923a60
+++ b/src/core/job.h
923a60
@@ -172,10 +172,12 @@ struct Job {
923a60
         bool sent_dbus_new_signal:1;
923a60
         bool ignore_order:1;
923a60
         bool irreversible:1;
923a60
+        bool reloaded:1;
923a60
 };
923a60
 
923a60
 Job* job_new(Unit *unit, JobType type);
923a60
 Job* job_new_raw(Unit *unit);
923a60
+void job_unlink(Job *job);
923a60
 void job_free(Job *job);
923a60
 Job* job_install(Job *j);
923a60
 int job_install_deserialized(Job *j);
923a60
diff --git a/src/core/manager.c b/src/core/manager.c
923a60
index 47b09e1e93..9c406bb5bf 100644
923a60
--- a/src/core/manager.c
923a60
+++ b/src/core/manager.c
923a60
@@ -2702,6 +2702,18 @@ finish:
923a60
         return r;
923a60
 }
923a60
 
923a60
+static void manager_flush_finished_jobs(Manager *m) {
923a60
+        Job *j;
923a60
+
923a60
+        while ((j = set_steal_first(m->pending_finished_jobs))) {
923a60
+                bus_job_send_removed_signal(j);
923a60
+                job_free(j);
923a60
+        }
923a60
+
923a60
+        set_free(m->pending_finished_jobs);
923a60
+        m->pending_finished_jobs = NULL;
923a60
+}
923a60
+
923a60
 int manager_reload(Manager *m) {
923a60
         int r, q;
923a60
         _cleanup_fclose_ FILE *f = NULL;
923a60
@@ -2784,6 +2796,9 @@ int manager_reload(Manager *m) {
923a60
         assert(m->n_reloading > 0);
923a60
         m->n_reloading--;
923a60
 
923a60
+        if (m->n_reloading <= 0)
923a60
+                manager_flush_finished_jobs(m);
923a60
+
923a60
         m->send_reloading_done = true;
923a60
 
923a60
         return r;
923a60
diff --git a/src/core/manager.h b/src/core/manager.h
923a60
index e91e7bd8b3..90d2d982e1 100644
923a60
--- a/src/core/manager.h
923a60
+++ b/src/core/manager.h
923a60
@@ -270,6 +270,9 @@ struct Manager {
923a60
 
923a60
         /* non-zero if we are reloading or reexecuting, */
923a60
         int n_reloading;
923a60
+        /* A set which contains all jobs that started before reload and finished
923a60
+         * during it */
923a60
+        Set *pending_finished_jobs;
923a60
 
923a60
         unsigned n_installed_jobs;
923a60
         unsigned n_failed_jobs;