ff2b41
From c7c8a7648623a25c00dc585bd42b9037a20396c8 Mon Sep 17 00:00:00 2001
ff2b41
From: Lennart Poettering <lennart@poettering.net>
ff2b41
Date: Thu, 9 Aug 2018 16:26:27 +0200
ff2b41
Subject: [PATCH] core: rework StopWhenUnneeded= logic
ff2b41
ff2b41
Previously, we'd act immediately on StopWhenUnneeded= when a unit state
ff2b41
changes. With this rework we'll maintain a queue instead: whenever
ff2b41
there's the chance that StopWhenUneeded= might have an effect we enqueue
ff2b41
the unit, and process it later when we have nothing better to do.
ff2b41
ff2b41
This should make the implementation a bit more reliable, as the unit notify event
ff2b41
cannot immediately enqueue tons of side-effect jobs that might
ff2b41
contradict each other, but we do so only in a strictly ordered fashion,
ff2b41
from the main event loop.
ff2b41
ff2b41
This slightly changes the check when to consider a unit "unneeded".
ff2b41
Previously, we'd assume that a unit in "deactivating" state could also
ff2b41
be cleaned up. With this new logic we'll only consider units unneeded
ff2b41
that are fully up and have no job queued. This means that whenever
ff2b41
there's something pending for a unit we won't clean it up.
ff2b41
ff2b41
(cherry picked from commit a3c1168ac293f16d9343d248795bb4c246aaff4a)
ff2b41
(cherry picked from commit 65ebf5e453846e29ab5894670a83b5d8b942c858)
ff2b41
ff2b41
Resolves: #1810576
ff2b41
---
ff2b41
 src/core/manager.c |  43 +++++++++++++++
ff2b41
 src/core/manager.h |   3 ++
ff2b41
 src/core/unit.c    | 132 ++++++++++++++++++++++++---------------------
ff2b41
 src/core/unit.h    |   7 +++
ff2b41
 4 files changed, 124 insertions(+), 61 deletions(-)
ff2b41
ff2b41
diff --git a/src/core/manager.c b/src/core/manager.c
ff2b41
index 4c87ad8a2f..f0553b4df9 100644
ff2b41
--- a/src/core/manager.c
ff2b41
+++ b/src/core/manager.c
ff2b41
@@ -961,6 +961,45 @@ static unsigned manager_dispatch_gc_queue(Manager *m) {
ff2b41
         return n;
ff2b41
 }
ff2b41
 
ff2b41
+static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) {
ff2b41
+        unsigned n = 0;
ff2b41
+        Unit *u;
ff2b41
+        int r;
ff2b41
+
ff2b41
+        assert(m);
ff2b41
+
ff2b41
+        while ((u = m->stop_when_unneeded_queue)) {
ff2b41
+                _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
ff2b41
+                assert(m->stop_when_unneeded_queue);
ff2b41
+
ff2b41
+                assert(u->in_stop_when_unneeded_queue);
ff2b41
+                LIST_REMOVE(stop_when_unneeded_queue, m->stop_when_unneeded_queue, u);
ff2b41
+                u->in_stop_when_unneeded_queue = false;
ff2b41
+
ff2b41
+                n++;
ff2b41
+
ff2b41
+                if (!unit_is_unneeded(u))
ff2b41
+                        continue;
ff2b41
+
ff2b41
+                log_unit_debug(u->id, "Unit is not needed anymore.");
ff2b41
+
ff2b41
+                /* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
ff2b41
+                 * service being unnecessary after a while. */
ff2b41
+
ff2b41
+                if (!ratelimit_test(&u->check_unneeded_ratelimit)) {
ff2b41
+                        log_unit_warning(u->id, "Unit not needed anymore, but not stopping since we tried this too often recently.");
ff2b41
+                        continue;
ff2b41
+                }
ff2b41
+
ff2b41
+                /* Ok, nobody needs us anymore. Sniff. Then let's commit suicide */
ff2b41
+                r = manager_add_job(u->manager, JOB_STOP, u, JOB_FAIL, true, &error, NULL);
ff2b41
+                if (r < 0)
ff2b41
+                        log_unit_warning_errno(u->id, r, "Failed to enqueue stop job, ignoring: %s", bus_error_message(&error, r));
ff2b41
+        }
ff2b41
+
ff2b41
+        return n;
ff2b41
+}
ff2b41
+
ff2b41
 static void manager_clear_jobs_and_units(Manager *m) {
ff2b41
         Unit *u;
ff2b41
 
ff2b41
@@ -977,6 +1016,7 @@ static void manager_clear_jobs_and_units(Manager *m) {
ff2b41
         assert(!m->dbus_job_queue);
ff2b41
         assert(!m->cleanup_queue);
ff2b41
         assert(!m->gc_queue);
ff2b41
+        assert(!m->stop_when_unneeded_queue);
ff2b41
 
ff2b41
         assert(hashmap_isempty(m->jobs));
ff2b41
         assert(hashmap_isempty(m->units));
ff2b41
@@ -2259,6 +2299,9 @@ int manager_loop(Manager *m) {
ff2b41
                 if (manager_dispatch_cgroup_queue(m) > 0)
ff2b41
                         continue;
ff2b41
 
ff2b41
+                if (manager_dispatch_stop_when_unneeded_queue(m) > 0)
ff2b41
+                        continue;
ff2b41
+
ff2b41
                 if (manager_dispatch_dbus_queue(m) > 0)
ff2b41
                         continue;
ff2b41
 
ff2b41
diff --git a/src/core/manager.h b/src/core/manager.h
ff2b41
index cfc564dfb6..f9280956e9 100644
ff2b41
--- a/src/core/manager.h
ff2b41
+++ b/src/core/manager.h
ff2b41
@@ -117,6 +117,9 @@ struct Manager {
ff2b41
         /* Target units whose default target dependencies haven't been set yet */
ff2b41
         LIST_HEAD(Unit, target_deps_queue);
ff2b41
 
ff2b41
+        /* Units that might be subject to StopWhenUnneeded= clean-up */
ff2b41
+        LIST_HEAD(Unit, stop_when_unneeded_queue);
ff2b41
+
ff2b41
         sd_event *event;
ff2b41
 
ff2b41
         /* We use two hash tables here, since the same PID might be
ff2b41
diff --git a/src/core/unit.c b/src/core/unit.c
ff2b41
index 583b9fae28..dc2df4c89c 100644
ff2b41
--- a/src/core/unit.c
ff2b41
+++ b/src/core/unit.c
ff2b41
@@ -380,6 +380,22 @@ void unit_add_to_dbus_queue(Unit *u) {
ff2b41
         u->in_dbus_queue = true;
ff2b41
 }
ff2b41
 
ff2b41
+void unit_add_to_stop_when_unneeded_queue(Unit *u) {
ff2b41
+        assert(u);
ff2b41
+
ff2b41
+        if (u->in_stop_when_unneeded_queue)
ff2b41
+                return;
ff2b41
+
ff2b41
+        if (!u->stop_when_unneeded)
ff2b41
+                return;
ff2b41
+
ff2b41
+        if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
ff2b41
+                return;
ff2b41
+
ff2b41
+        LIST_PREPEND(stop_when_unneeded_queue, u->manager->stop_when_unneeded_queue, u);
ff2b41
+        u->in_stop_when_unneeded_queue = true;
ff2b41
+}
ff2b41
+
ff2b41
 static void bidi_set_free(Unit *u, Set *s) {
ff2b41
         Iterator i;
ff2b41
         Unit *other;
ff2b41
@@ -544,6 +560,9 @@ void unit_free(Unit *u) {
ff2b41
                 u->manager->n_in_gc_queue--;
ff2b41
         }
ff2b41
 
ff2b41
+        if (u->in_stop_when_unneeded_queue)
ff2b41
+                LIST_REMOVE(stop_when_unneeded_queue, u->manager->stop_when_unneeded_queue, u);
ff2b41
+
ff2b41
         if (u->on_console)
ff2b41
                 manager_unref_console(u->manager);
ff2b41
 
ff2b41
@@ -1565,49 +1584,68 @@ bool unit_can_reload(Unit *u) {
ff2b41
         return UNIT_VTABLE(u)->can_reload(u);
ff2b41
 }
ff2b41
 
ff2b41
-static void unit_check_unneeded(Unit *u) {
ff2b41
-        Iterator i;
ff2b41
-        Unit *other;
ff2b41
+bool unit_is_unneeded(Unit *u) {
ff2b41
+        static const UnitDependency deps[] = {
ff2b41
+                UNIT_REQUIRED_BY,
ff2b41
+                UNIT_REQUIRED_BY_OVERRIDABLE,
ff2b41
+                UNIT_WANTED_BY,
ff2b41
+                UNIT_BOUND_BY,
ff2b41
+        };
ff2b41
+        size_t j;
ff2b41
 
ff2b41
         assert(u);
ff2b41
 
ff2b41
-        /* If this service shall be shut down when unneeded then do
ff2b41
-         * so. */
ff2b41
-
ff2b41
         if (!u->stop_when_unneeded)
ff2b41
-                return;
ff2b41
+                return false;
ff2b41
 
ff2b41
-        if (!UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u)))
ff2b41
-                return;
ff2b41
+        /* Don't clean up while the unit is transitioning or is even inactive. */
ff2b41
+        if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u)))
ff2b41
+                return false;
ff2b41
+        if (u->job)
ff2b41
+                return false;
ff2b41
 
ff2b41
-        SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY], i)
ff2b41
-                if (unit_active_or_pending(other))
ff2b41
-                        return;
ff2b41
+        for (j = 0; j < ELEMENTSOF(deps); j++) {
ff2b41
+                Unit *other;
ff2b41
+                Iterator i;
ff2b41
 
ff2b41
-        SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY_OVERRIDABLE], i)
ff2b41
-                if (unit_active_or_pending(other))
ff2b41
-                        return;
ff2b41
+                /* If a dependending unit has a job queued, or is active (or in transitioning), or is marked for
ff2b41
+                 * restart, then don't clean this one up. */
ff2b41
 
ff2b41
-        SET_FOREACH(other, u->dependencies[UNIT_WANTED_BY], i)
ff2b41
-                if (unit_active_or_pending(other))
ff2b41
-                        return;
ff2b41
+                SET_FOREACH(other, u->dependencies[deps[j]], i) {
ff2b41
+                        if (u->job)
ff2b41
+                                return false;
ff2b41
 
ff2b41
-        SET_FOREACH(other, u->dependencies[UNIT_BOUND_BY], i)
ff2b41
-                if (unit_active_or_pending(other))
ff2b41
-                        return;
ff2b41
-
ff2b41
-        /* If stopping a unit fails continously we might enter a stop
ff2b41
-         * loop here, hence stop acting on the service being
ff2b41
-         * unnecessary after a while. */
ff2b41
-        if (!ratelimit_test(&u->check_unneeded_ratelimit)) {
ff2b41
-                log_unit_warning(u->id, "Unit not needed anymore, but not stopping since we tried this too often recently.");
ff2b41
-                return;
ff2b41
+                        if (!UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(other)))
ff2b41
+                                return false;
ff2b41
+                }
ff2b41
         }
ff2b41
 
ff2b41
-        log_unit_info(u->id, "Unit %s is not needed anymore. Stopping.", u->id);
ff2b41
+        return true;
ff2b41
+}
ff2b41
 
ff2b41
-        /* Ok, nobody needs us anymore. Sniff. Then let's commit suicide */
ff2b41
-        manager_add_job(u->manager, JOB_STOP, u, JOB_FAIL, true, NULL, NULL);
ff2b41
+static void check_unneeded_dependencies(Unit *u) {
ff2b41
+
ff2b41
+        static const UnitDependency deps[] = {
ff2b41
+                UNIT_REQUIRES,
ff2b41
+                UNIT_REQUIRES_OVERRIDABLE,
ff2b41
+                UNIT_REQUISITE,
ff2b41
+                UNIT_REQUISITE_OVERRIDABLE,
ff2b41
+                UNIT_WANTS,
ff2b41
+                UNIT_BINDS_TO,
ff2b41
+        };
ff2b41
+        size_t j;
ff2b41
+
ff2b41
+        assert(u);
ff2b41
+
ff2b41
+        /* Add all units this unit depends on to the queue that processes StopWhenUnneeded= behaviour. */
ff2b41
+
ff2b41
+        for (j = 0; j < ELEMENTSOF(deps); j++) {
ff2b41
+                Unit *other;
ff2b41
+                Iterator i;
ff2b41
+
ff2b41
+                SET_FOREACH(other, u->dependencies[deps[j]], i)
ff2b41
+                        unit_add_to_stop_when_unneeded_queue(other);
ff2b41
+        }
ff2b41
 }
ff2b41
 
ff2b41
 static void unit_check_binds_to(Unit *u) {
ff2b41
@@ -1693,34 +1731,6 @@ static void retroactively_stop_dependencies(Unit *u) {
ff2b41
                         manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, true, NULL, NULL);
ff2b41
 }
ff2b41
 
ff2b41
-static void check_unneeded_dependencies(Unit *u) {
ff2b41
-        Iterator i;
ff2b41
-        Unit *other;
ff2b41
-
ff2b41
-        assert(u);
ff2b41
-        assert(UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(u)));
ff2b41
-
ff2b41
-        /* Garbage collect services that might not be needed anymore, if enabled */
ff2b41
-        SET_FOREACH(other, u->dependencies[UNIT_REQUIRES], i)
ff2b41
-                if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
ff2b41
-                        unit_check_unneeded(other);
ff2b41
-        SET_FOREACH(other, u->dependencies[UNIT_REQUIRES_OVERRIDABLE], i)
ff2b41
-                if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
ff2b41
-                        unit_check_unneeded(other);
ff2b41
-        SET_FOREACH(other, u->dependencies[UNIT_WANTS], i)
ff2b41
-                if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
ff2b41
-                        unit_check_unneeded(other);
ff2b41
-        SET_FOREACH(other, u->dependencies[UNIT_REQUISITE], i)
ff2b41
-                if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
ff2b41
-                        unit_check_unneeded(other);
ff2b41
-        SET_FOREACH(other, u->dependencies[UNIT_REQUISITE_OVERRIDABLE], i)
ff2b41
-                if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
ff2b41
-                        unit_check_unneeded(other);
ff2b41
-        SET_FOREACH(other, u->dependencies[UNIT_BINDS_TO], i)
ff2b41
-                if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
ff2b41
-                        unit_check_unneeded(other);
ff2b41
-}
ff2b41
-
ff2b41
 void unit_start_on_failure(Unit *u) {
ff2b41
         Unit *other;
ff2b41
         Iterator i;
ff2b41
@@ -1898,7 +1908,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
ff2b41
                 }
ff2b41
 
ff2b41
                 /* stop unneeded units regardless if going down was expected or not */
ff2b41
-                if (UNIT_IS_INACTIVE_OR_DEACTIVATING(ns))
ff2b41
+                if (UNIT_IS_INACTIVE_OR_FAILED(ns))
ff2b41
                         check_unneeded_dependencies(u);
ff2b41
 
ff2b41
                 if (ns != os && ns == UNIT_FAILED) {
ff2b41
@@ -1959,7 +1969,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
ff2b41
         if (u->manager->n_reloading <= 0) {
ff2b41
                 /* Maybe we finished startup and are now ready for
ff2b41
                  * being stopped because unneeded? */
ff2b41
-                unit_check_unneeded(u);
ff2b41
+                unit_add_to_stop_when_unneeded_queue(u);
ff2b41
 
ff2b41
                 /* Maybe we finished startup, but something we needed
ff2b41
                  * has vanished? Let's die then. (This happens when
ff2b41
diff --git a/src/core/unit.h b/src/core/unit.h
ff2b41
index 29353ea81c..38c97397ee 100644
ff2b41
--- a/src/core/unit.h
ff2b41
+++ b/src/core/unit.h
ff2b41
@@ -165,6 +165,9 @@ struct Unit {
ff2b41
         /* Target dependencies queue */
ff2b41
         LIST_FIELDS(Unit, target_deps_queue);
ff2b41
 
ff2b41
+        /* Queue of units with StopWhenUnneeded set that shell be checked for clean-up. */
ff2b41
+        LIST_FIELDS(Unit, stop_when_unneeded_queue);
ff2b41
+
ff2b41
         /* PIDs we keep an eye on. Note that a unit might have many
ff2b41
          * more, but these are the ones we care enough about to
ff2b41
          * process SIGCHLD for */
ff2b41
@@ -235,6 +238,7 @@ struct Unit {
ff2b41
         bool in_gc_queue:1;
ff2b41
         bool in_cgroup_queue:1;
ff2b41
         bool in_target_deps_queue:1;
ff2b41
+        bool in_stop_when_unneeded_queue:1;
ff2b41
 
ff2b41
         bool sent_dbus_new_signal:1;
ff2b41
 
ff2b41
@@ -508,6 +512,7 @@ void unit_add_to_dbus_queue(Unit *u);
ff2b41
 void unit_add_to_cleanup_queue(Unit *u);
ff2b41
 void unit_add_to_gc_queue(Unit *u);
ff2b41
 void unit_add_to_target_deps_queue(Unit *u);
ff2b41
+void unit_add_to_stop_when_unneeded_queue(Unit *u);
ff2b41
 
ff2b41
 int unit_merge(Unit *u, Unit *other);
ff2b41
 int unit_merge_by_name(Unit *u, const char *other);
ff2b41
@@ -627,6 +632,8 @@ int unit_make_transient(Unit *u);
ff2b41
 
ff2b41
 int unit_require_mounts_for(Unit *u, const char *path);
ff2b41
 
ff2b41
+bool unit_is_unneeded(Unit *u);
ff2b41
+
ff2b41
 pid_t unit_control_pid(Unit *u);
ff2b41
 pid_t unit_main_pid(Unit *u);
ff2b41