572a44
From 1386240aee3f78a9101a118f11a7028571d33a71 Mon Sep 17 00:00:00 2001
572a44
From: Michal Sekletar <msekleta@redhat.com>
572a44
Date: Thu, 27 Feb 2014 18:16:19 +0100
572a44
Subject: [PATCH] core: watch SIGCHLD more closely to track processes of units
572a44
 with no reliable cgroup empty notifier
572a44
572a44
When a process dies that we can associate with a specific unit, start
572a44
watching all other processes of that unit, so that we can associate
572a44
those processes with the unit too.
572a44
572a44
Also, for service units start doing this as soon as we get the first
572a44
SIGCHLD for either control or main process, so that we can follow the
572a44
processes of the service from one to the other, as long as process that
572a44
remain are processes of the ones we watched that died and got reassigned
572a44
to us as parent.
572a44
572a44
Similar, for scope units start doing this as soon as the scope
572a44
controller abandons the unit, and thus management entirely reverts to
572a44
systemd. To abandon a unit introduce a new Abandon() scope unit method
572a44
call.
572a44
572a44
Based-on: a911bb9ab27ac0eb3bbf4e8b4109e5da9b88eee3
572a44
---
572a44
 src/core/dbus-scope.c |  36 +++++++++----
572a44
 src/core/manager.c    |   2 +-
572a44
 src/core/scope.c      |  87 ++++++++++++++++++++++---------
572a44
 src/core/scope.h      |   5 +-
572a44
 src/core/service.c    | 140 ++++++++++++++++++++++++++++++--------------------
572a44
 src/core/unit.c       | 112 +++++++++++++++++++++++++++++++++++++++-
572a44
 src/core/unit.h       |   9 ++++
572a44
 7 files changed, 298 insertions(+), 93 deletions(-)
572a44
572a44
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
572a44
index b576f76..58dd9ff 100644
572a44
--- a/src/core/dbus-scope.c
572a44
+++ b/src/core/dbus-scope.c
572a44
@@ -30,6 +30,7 @@
572a44
 
572a44
 #define BUS_SCOPE_INTERFACE                                             \
572a44
         " <interface name=\"org.freedesktop.systemd1.Scope\">\n"        \
572a44
+        "  <method name=\"Abandon\"/>\n"                                \
572a44
         BUS_UNIT_CGROUP_INTERFACE                                       \
572a44
         "  <property name=\"Controller\" type=\"s\" access=\"read\"/>\n"\
572a44
         "  <property name=\"TimeoutStopUSec\" type=\"t\" access=\"read\"/>\n" \
572a44
@@ -66,19 +67,40 @@ static const BusProperty bus_scope_properties[] = {
572a44
 
572a44
 DBusHandlerResult bus_scope_message_handler(Unit *u, DBusConnection *c, DBusMessage *message) {
572a44
         Scope *s = SCOPE(u);
572a44
+        _cleanup_dbus_message_unref_ DBusMessage *reply = NULL;
572a44
 
572a44
-        const BusBoundProperties bps[] = {
572a44
+        SELINUX_UNIT_ACCESS_CHECK(u, c, message, "status");
572a44
+
572a44
+        if (dbus_message_is_method_call(message, "org.freedesktop.systemd1.Scope", "Abandon")) {
572a44
+                int r;
572a44
+
572a44
+                r = scope_abandon(s);
572a44
+                if (r < 0)
572a44
+                        log_error("Failed to mark scope %s as abandoned : %s", UNIT(s)->id, strerror(-r));
572a44
+
572a44
+                reply = dbus_message_new_method_return(message);
572a44
+                if (!reply)
572a44
+                        goto oom;
572a44
+        } else {
572a44
+                const BusBoundProperties bps[] = {
572a44
                 { "org.freedesktop.systemd1.Unit",  bus_unit_properties,           u },
572a44
                 { "org.freedesktop.systemd1.Scope", bus_unit_cgroup_properties,    u },
572a44
                 { "org.freedesktop.systemd1.Scope", bus_scope_properties,          s },
572a44
                 { "org.freedesktop.systemd1.Scope", bus_cgroup_context_properties, &s->cgroup_context },
572a44
                 { "org.freedesktop.systemd1.Scope", bus_kill_context_properties,   &s->kill_context   },
572a44
                 {}
572a44
-        };
572a44
+                };
572a44
 
572a44
-        SELINUX_UNIT_ACCESS_CHECK(u, c, message, "status");
572a44
+               return  bus_default_message_handler(c, message, INTROSPECTION, INTERFACES_LIST, bps);
572a44
+        }
572a44
+
572a44
+        if (reply)
572a44
+                if (!bus_maybe_send_reply(c, message, reply))
572a44
+                        goto oom;
572a44
 
572a44
-        return bus_default_message_handler(c, message, INTROSPECTION, INTERFACES_LIST, bps);
572a44
+        return DBUS_HANDLER_RESULT_HANDLED;
572a44
+oom:
572a44
+        return DBUS_HANDLER_RESULT_NEED_MEMORY;
572a44
 }
572a44
 
572a44
 static int bus_scope_set_transient_property(
572a44
@@ -102,10 +124,6 @@ static int bus_scope_set_transient_property(
572a44
                     dbus_message_iter_get_element_type(i) != DBUS_TYPE_UINT32)
572a44
                         return -EINVAL;
572a44
 
572a44
-                r = set_ensure_allocated(&s->pids, trivial_hash_func, trivial_compare_func);
572a44
-                if (r < 0)
572a44
-                        return r;
572a44
-
572a44
                 dbus_message_iter_recurse(i, &sub);
572a44
                 while (dbus_message_iter_get_arg_type(&sub) == DBUS_TYPE_UINT32) {
572a44
                         uint32_t pid;
572a44
@@ -116,7 +134,7 @@ static int bus_scope_set_transient_property(
572a44
                                 return -EINVAL;
572a44
 
572a44
                         if (mode != UNIT_CHECK) {
572a44
-                                r = set_put(s->pids, LONG_TO_PTR(pid));
572a44
+                                r = unit_watch_pid(UNIT(s), pid);
572a44
                                 if (r < 0 && r != -EEXIST)
572a44
                                         return r;
572a44
                         }
572a44
diff --git a/src/core/manager.c b/src/core/manager.c
572a44
index a34a3c6..db5094f 100644
572a44
--- a/src/core/manager.c
572a44
+++ b/src/core/manager.c
572a44
@@ -1389,7 +1389,7 @@ static int manager_dispatch_sigchld(Manager *m) {
572a44
                 log_debug_unit(u->id,
572a44
                                "Child %lu belongs to %s", (long unsigned) si.si_pid, u->id);
572a44
 
572a44
-                hashmap_remove(m->watch_pids, LONG_TO_PTR(si.si_pid));
572a44
+                unit_unwatch_pid(u, si.si_pid);
572a44
                 UNIT_VTABLE(u)->sigchld_event(u, si.si_pid, si.si_code, si.si_status);
572a44
         }
572a44
 
572a44
diff --git a/src/core/scope.c b/src/core/scope.c
572a44
index e75fc2b..22bdfb2 100644
572a44
--- a/src/core/scope.c
572a44
+++ b/src/core/scope.c
572a44
@@ -35,6 +35,7 @@
572a44
 static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = {
572a44
         [SCOPE_DEAD] = UNIT_INACTIVE,
572a44
         [SCOPE_RUNNING] = UNIT_ACTIVE,
572a44
+        [SCOPE_ABANDONED] = UNIT_ACTIVE,
572a44
         [SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING,
572a44
         [SCOPE_STOP_SIGKILL] = UNIT_DEACTIVATING,
572a44
         [SCOPE_FAILED] = UNIT_FAILED
572a44
@@ -67,9 +68,6 @@ static void scope_done(Unit *u) {
572a44
         free(s->controller);
572a44
         s->controller = NULL;
572a44
 
572a44
-        set_free(s->pids);
572a44
-        s->pids = NULL;
572a44
-
572a44
         unit_unwatch_timer(u, &s->timer_watch);
572a44
 }
572a44
 
572a44
@@ -84,6 +82,9 @@ static void scope_set_state(Scope *s, ScopeState state) {
572a44
             state != SCOPE_STOP_SIGKILL)
572a44
                 unit_unwatch_timer(UNIT(s), &s->timer_watch);
572a44
 
572a44
+        if (state == SCOPE_DEAD || state == SCOPE_FAILED)
572a44
+                unit_unwatch_all_pids(UNIT(s));
572a44
+
572a44
         if (state != old_state)
572a44
                 log_debug("%s changed %s -> %s",
572a44
                           UNIT(s)->id,
572a44
@@ -115,7 +116,7 @@ static int scope_verify(Scope *s) {
572a44
         if (UNIT(s)->load_state != UNIT_LOADED)
572a44
                 return 0;
572a44
 
572a44
-        if (set_size(s->pids) <= 0 && UNIT(s)->manager->n_reloading <= 0) {
572a44
+        if (set_size(UNIT(s)->pids) <= 0 && UNIT(s)->manager->n_reloading <= 0) {
572a44
                 log_error_unit(UNIT(s)->id, "Scope %s has no PIDs. Refusing.", UNIT(s)->id);
572a44
                 return -EINVAL;
572a44
         }
572a44
@@ -169,6 +170,9 @@ static int scope_coldplug(Unit *u) {
572a44
                                 return r;
572a44
                 }
572a44
 
572a44
+                if (s->deserialized_state != SCOPE_DEAD && s->deserialized_state != SCOPE_FAILED)
572a44
+                        unit_watch_all_pids(UNIT(s));
572a44
+
572a44
                 scope_set_state(s, s->deserialized_state);
572a44
         }
572a44
 
572a44
@@ -209,6 +213,8 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
572a44
         if (f != SCOPE_SUCCESS)
572a44
                 s->result = f;
572a44
 
572a44
+        unit_watch_all_pids(UNIT(s));
572a44
+
572a44
         /* If we have a controller set let's ask the controller nicely
572a44
          * to terminate the scope, instead of us going directly into
572a44
          * SIGTERM beserk mode */
572a44
@@ -271,13 +277,10 @@ static int scope_start(Unit *u) {
572a44
                 return r;
572a44
         }
572a44
 
572a44
-        r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, s->pids);
572a44
+        r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, UNIT(s)->pids);
572a44
         if (r < 0)
572a44
                 return r;
572a44
 
572a44
-        set_free(s->pids);
572a44
-        s->pids = NULL;
572a44
-
572a44
         s->result = SCOPE_SUCCESS;
572a44
 
572a44
         scope_set_state(s, SCOPE_RUNNING);
572a44
@@ -288,13 +291,13 @@ static int scope_stop(Unit *u) {
572a44
         Scope *s = SCOPE(u);
572a44
 
572a44
         assert(s);
572a44
-        assert(s->state == SCOPE_RUNNING);
572a44
 
572a44
         if (s->state == SCOPE_STOP_SIGTERM ||
572a44
             s->state == SCOPE_STOP_SIGKILL)
572a44
                 return 0;
572a44
 
572a44
-        assert(s->state == SCOPE_RUNNING);
572a44
+        assert(s->state == SCOPE_RUNNING ||
572a44
+               s->state == SCOPE_ABANDONED);
572a44
 
572a44
         scope_enter_signal(s, SCOPE_STOP_SIGTERM, SCOPE_SUCCESS);
572a44
         return 0;
572a44
@@ -358,7 +361,7 @@ static bool scope_check_gc(Unit *u) {
572a44
         /* Never clean up scopes that still have a process around,
572a44
          * even if the scope is formally dead. */
572a44
 
572a44
-        if (UNIT(s)->cgroup_path) {
572a44
+        if (u->cgroup_path) {
572a44
                 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path, true);
572a44
                 if (r <= 0)
572a44
                         return true;
572a44
@@ -367,6 +370,33 @@ static bool scope_check_gc(Unit *u) {
572a44
         return false;
572a44
 }
572a44
 
572a44
+static void scope_notify_cgroup_empty_event(Unit *u) {
572a44
+        Scope *s = SCOPE(u);
572a44
+
572a44
+        assert(u);
572a44
+
572a44
+        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
572a44
+
572a44
+        if (s->state == SCOPE_RUNNING || s->state == SCOPE_ABANDONED ||
572a44
+            s->state == SCOPE_STOP_SIGTERM || SCOPE_STOP_SIGKILL)
572a44
+                scope_enter_dead(s, SCOPE_SUCCESS);
572a44
+}
572a44
+
572a44
+static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
572a44
+        /* If we get a SIGCHLD event for one of the processes we were
572a44
+           interested in, then we look for others to watch, under the
572a44
+           assumption that we'll sooner or later get a SIGCHLD for
572a44
+           them, as the original process we watched was probably the
572a44
+           parent of them, and they are hence now our children. */
572a44
+
572a44
+        unit_tidy_watch_pids(u, 0, 0);
572a44
+        unit_watch_all_pids(u);
572a44
+
572a44
+        /* If the PID set is empty now, then let's finish this off */
572a44
+        if (set_isempty(u->pids))
572a44
+                scope_notify_cgroup_empty_event(u);
572a44
+}
572a44
+
572a44
 static void scope_timer_event(Unit *u, uint64_t elapsed, Watch*w) {
572a44
         Scope *s = SCOPE(u);
572a44
 
572a44
@@ -397,24 +427,30 @@ static void scope_timer_event(Unit *u, uint64_t elapsed, Watch*w) {
572a44
         }
572a44
 }
572a44
 
572a44
-static void scope_notify_cgroup_empty_event(Unit *u) {
572a44
-        Scope *s = SCOPE(u);
572a44
-        assert(u);
572a44
+int scope_abandon(Scope *s) {
572a44
+        assert(s);
572a44
 
572a44
-        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
572a44
+        if (s->state != SCOPE_RUNNING && s->state != SCOPE_ABANDONED)
572a44
+                return -ESTALE;
572a44
 
572a44
-        switch (s->state) {
572a44
+        free(s->controller);
572a44
+        s->controller = NULL;
572a44
 
572a44
-        case SCOPE_RUNNING:
572a44
-        case SCOPE_STOP_SIGTERM:
572a44
-        case SCOPE_STOP_SIGKILL:
572a44
-                scope_enter_dead(s, SCOPE_SUCCESS);
572a44
+        /* The client is no longer watching the remaining processes,
572a44
+         * so let's step in here, under the assumption that the
572a44
+         * remaining processes will be sooner or later reassigned to
572a44
+         * us as parent. */
572a44
 
572a44
-                break;
572a44
+        unit_tidy_watch_pids(UNIT(s), 0, 0);
572a44
+        unit_watch_all_pids(UNIT(s));
572a44
 
572a44
-        default:
572a44
-                ;
572a44
-        }
572a44
+        /* If the PID set is empty now, then let's finish this off */
572a44
+        if (set_isempty(UNIT(s)->pids))
572a44
+                scope_notify_cgroup_empty_event(UNIT(s));
572a44
+        else
572a44
+                scope_set_state(s, SCOPE_ABANDONED);
572a44
+
572a44
+        return 0;
572a44
 }
572a44
 
572a44
 _pure_ static UnitActiveState scope_active_state(Unit *u) {
572a44
@@ -432,6 +468,7 @@ _pure_ static const char *scope_sub_state_to_string(Unit *u) {
572a44
 static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
572a44
         [SCOPE_DEAD] = "dead",
572a44
         [SCOPE_RUNNING] = "running",
572a44
+        [SCOPE_ABANDONED] = "abandoned",
572a44
         [SCOPE_STOP_SIGTERM] = "stop-sigterm",
572a44
         [SCOPE_STOP_SIGKILL] = "stop-sigkill",
572a44
         [SCOPE_FAILED] = "failed",
572a44
@@ -481,6 +518,8 @@ const UnitVTable scope_vtable = {
572a44
 
572a44
         .check_gc = scope_check_gc,
572a44
 
572a44
+        .sigchld_event = scope_sigchld_event,
572a44
+
572a44
         .timer_event = scope_timer_event,
572a44
 
572a44
         .reset_failed = scope_reset_failed,
572a44
diff --git a/src/core/scope.h b/src/core/scope.h
572a44
index b4bafa7..1e9f201 100644
572a44
--- a/src/core/scope.h
572a44
+++ b/src/core/scope.h
572a44
@@ -29,6 +29,7 @@ typedef struct Scope Scope;
572a44
 typedef enum ScopeState {
572a44
         SCOPE_DEAD,
572a44
         SCOPE_RUNNING,
572a44
+        SCOPE_ABANDONED,
572a44
         SCOPE_STOP_SIGTERM,
572a44
         SCOPE_STOP_SIGKILL,
572a44
         SCOPE_FAILED,
572a44
@@ -57,13 +58,13 @@ struct Scope {
572a44
 
572a44
         char *controller;
572a44
 
572a44
-        Set *pids;
572a44
-
572a44
         Watch timer_watch;
572a44
 };
572a44
 
572a44
 extern const UnitVTable scope_vtable;
572a44
 
572a44
+int scope_abandon(Scope *s);
572a44
+
572a44
 const char* scope_state_to_string(ScopeState i) _const_;
572a44
 ScopeState scope_state_from_string(const char *s) _pure_;
572a44
 
572a44
diff --git a/src/core/service.c b/src/core/service.c
572a44
index f0acda1..41e5cb5 100644
572a44
--- a/src/core/service.c
572a44
+++ b/src/core/service.c
572a44
@@ -1546,6 +1546,11 @@ static void service_set_state(Service *s, ServiceState state) {
572a44
                 s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
572a44
         }
572a44
 
572a44
+        if (state == SERVICE_DEAD ||
572a44
+            state == SERVICE_FAILED ||
572a44
+            state == SERVICE_AUTO_RESTART)
572a44
+                unit_unwatch_all_pids(UNIT(s));
572a44
+
572a44
         if (state != SERVICE_START_PRE &&
572a44
             state != SERVICE_START &&
572a44
             state != SERVICE_START_POST &&
572a44
@@ -1661,8 +1666,14 @@ static int service_coldplug(Unit *u) {
572a44
                                         return r;
572a44
                         }
572a44
 
572a44
+                if (s->deserialized_state != SERVICE_DEAD &&
572a44
+                    s->deserialized_state != SERVICE_FAILED &&
572a44
+                    s->deserialized_state != SERVICE_AUTO_RESTART)
572a44
+                        unit_watch_all_pids(UNIT(s));
572a44
+
572a44
                 if (s->deserialized_state == SERVICE_START_POST ||
572a44
-                    s->deserialized_state == SERVICE_RUNNING)
572a44
+                    s->deserialized_state == SERVICE_RUNNING ||
572a44
+                    s->deserialized_state == SERVICE_RELOAD)
572a44
                         service_handle_watchdog(s);
572a44
 
572a44
                 service_set_state(s, s->deserialized_state);
572a44
@@ -1970,6 +1981,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
572a44
                 s->result = f;
572a44
 
572a44
         service_unwatch_control_pid(s);
572a44
+        unit_watch_all_pids(UNIT(s));
572a44
 
572a44
         s->control_command = s->exec_command[SERVICE_EXEC_STOP_POST];
572a44
         if (s->control_command) {
572a44
@@ -2010,6 +2022,8 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
572a44
         if (f != SERVICE_SUCCESS)
572a44
                 s->result = f;
572a44
 
572a44
+        unit_watch_all_pids(UNIT(s));
572a44
+
572a44
         r = unit_kill_context(
572a44
                         UNIT(s),
572a44
                         &s->kill_context,
572a44
@@ -2055,6 +2069,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
572a44
                 s->result = f;
572a44
 
572a44
         service_unwatch_control_pid(s);
572a44
+        unit_watch_all_pids(UNIT(s));
572a44
 
572a44
         s->control_command = s->exec_command[SERVICE_EXEC_STOP];
572a44
         if (s->control_command) {
572a44
@@ -2961,6 +2976,62 @@ fail:
572a44
         service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);
572a44
 }
572a44
 
572a44
+static void service_notify_cgroup_empty_event(Unit *u) {
572a44
+        Service *s = SERVICE(u);
572a44
+
572a44
+        assert(u);
572a44
+
572a44
+        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
572a44
+
572a44
+        switch (s->state) {
572a44
+
572a44
+                /* Waiting for SIGCHLD is usually more interesting,
572a44
+                 * because it includes return codes/signals. Which is
572a44
+                 * why we ignore the cgroup events for most cases,
572a44
+                 * except when we don't know pid which to expect the
572a44
+                 * SIGCHLD for. */
572a44
+
572a44
+        case SERVICE_START:
572a44
+        case SERVICE_START_POST:
572a44
+                /* If we were hoping for the daemon to write its PID file,
572a44
+                 * we can give up now. */
572a44
+                if (s->pid_file_pathspec) {
572a44
+                        log_warning_unit(u->id,
572a44
+                                         "%s never wrote its PID file. Failing.", UNIT(s)->id);
572a44
+                        service_unwatch_pid_file(s);
572a44
+                        if (s->state == SERVICE_START)
572a44
+                                service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_RESOURCES);
572a44
+                        else
572a44
+                                service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
572a44
+                }
572a44
+                break;
572a44
+
572a44
+        case SERVICE_RUNNING:
572a44
+                /* service_enter_running() will figure out what to do */
572a44
+                service_enter_running(s, SERVICE_SUCCESS);
572a44
+                break;
572a44
+
572a44
+        case SERVICE_STOP_SIGTERM:
572a44
+        case SERVICE_STOP_SIGKILL:
572a44
+
572a44
+                if (main_pid_good(s) <= 0 && !control_pid_good(s))
572a44
+                        service_enter_stop_post(s, SERVICE_SUCCESS);
572a44
+
572a44
+                break;
572a44
+
572a44
+        case SERVICE_STOP_POST:
572a44
+        case SERVICE_FINAL_SIGTERM:
572a44
+        case SERVICE_FINAL_SIGKILL:
572a44
+                if (main_pid_good(s) <= 0 && !control_pid_good(s))
572a44
+                        service_enter_dead(s, SERVICE_SUCCESS, true);
572a44
+
572a44
+                break;
572a44
+
572a44
+        default:
572a44
+                ;
572a44
+        }
572a44
+}
572a44
+
572a44
 static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
572a44
         Service *s = SERVICE(u);
572a44
         ServiceResult f;
572a44
@@ -3229,6 +3300,18 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
572a44
 
572a44
         /* Notify clients about changed exit status */
572a44
         unit_add_to_dbus_queue(u);
572a44
+
572a44
+        /* We got one SIGCHLD for the service, let's watch all
572a44
+         * processes that are now running of the service, and watch
572a44
+         * that. Among the PIDs we then watch will be children
572a44
+         * reassigned to us, which hopefully allows us to identify
572a44
+         * when all children are gone */
572a44
+        unit_tidy_watch_pids(u, s->main_pid, s->control_pid);
572a44
+        unit_watch_all_pids(u);
572a44
+
572a44
+        /* If the PID set is empty now, then let's finish this off */
572a44
+        if (set_isempty(u->pids))
572a44
+                service_notify_cgroup_empty_event(u);
572a44
 }
572a44
 
572a44
 static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
572a44
@@ -3332,61 +3415,6 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
572a44
         }
572a44
 }
572a44
 
572a44
-static void service_notify_cgroup_empty_event(Unit *u) {
572a44
-        Service *s = SERVICE(u);
572a44
-
572a44
-        assert(u);
572a44
-
572a44
-        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
572a44
-
572a44
-        switch (s->state) {
572a44
-
572a44
-                /* Waiting for SIGCHLD is usually more interesting,
572a44
-                 * because it includes return codes/signals. Which is
572a44
-                 * why we ignore the cgroup events for most cases,
572a44
-                 * except when we don't know pid which to expect the
572a44
-                 * SIGCHLD for. */
572a44
-
572a44
-        case SERVICE_START:
572a44
-        case SERVICE_START_POST:
572a44
-                /* If we were hoping for the daemon to write its PID file,
572a44
-                 * we can give up now. */
572a44
-                if (s->pid_file_pathspec) {
572a44
-                        log_warning_unit(u->id,
572a44
-                                         "%s never wrote its PID file. Failing.", UNIT(s)->id);
572a44
-                        service_unwatch_pid_file(s);
572a44
-                        if (s->state == SERVICE_START)
572a44
-                                service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_RESOURCES);
572a44
-                        else
572a44
-                                service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
572a44
-                }
572a44
-                break;
572a44
-
572a44
-        case SERVICE_RUNNING:
572a44
-                /* service_enter_running() will figure out what to do */
572a44
-                service_enter_running(s, SERVICE_SUCCESS);
572a44
-                break;
572a44
-
572a44
-        case SERVICE_STOP_SIGTERM:
572a44
-        case SERVICE_STOP_SIGKILL:
572a44
-
572a44
-                if (main_pid_good(s) <= 0 && !control_pid_good(s))
572a44
-                        service_enter_stop_post(s, SERVICE_SUCCESS);
572a44
-
572a44
-                break;
572a44
-
572a44
-        case SERVICE_FINAL_SIGTERM:
572a44
-        case SERVICE_FINAL_SIGKILL:
572a44
-                if (main_pid_good(s) <= 0 && !control_pid_good(s))
572a44
-                        service_enter_dead(s, SERVICE_SUCCESS, true);
572a44
-
572a44
-                break;
572a44
-
572a44
-        default:
572a44
-                ;
572a44
-        }
572a44
-}
572a44
-
572a44
 static void service_notify_message(Unit *u, pid_t pid, char **tags) {
572a44
         Service *s = SERVICE(u);
572a44
         const char *e;
572a44
diff --git a/src/core/unit.c b/src/core/unit.c
572a44
index 6c2c4a0..0332094 100644
572a44
--- a/src/core/unit.c
572a44
+++ b/src/core/unit.c
572a44
@@ -472,6 +472,8 @@ void unit_free(Unit *u) {
572a44
 
572a44
         set_free_free(u->names);
572a44
 
572a44
+        unit_unwatch_all_pids(u);
572a44
+
572a44
         condition_free_list(u->conditions);
572a44
 
572a44
         unit_ref_unset(&u->slice);
572a44
@@ -1658,13 +1660,25 @@ void unit_unwatch_fd(Unit *u, Watch *w) {
572a44
 }
572a44
 
572a44
 int unit_watch_pid(Unit *u, pid_t pid) {
572a44
+        int q, r;
572a44
+
572a44
         assert(u);
572a44
         assert(pid >= 1);
572a44
 
572a44
+        r = set_ensure_allocated(&u->pids, trivial_hash_func, trivial_compare_func);
572a44
+        if (r < 0)
572a44
+                return r;
572a44
+
572a44
         /* Watch a specific PID. We only support one unit watching
572a44
          * each PID for now. */
572a44
 
572a44
-        return hashmap_put(u->manager->watch_pids, LONG_TO_PTR(pid), u);
572a44
+        r = set_put(u->pids, LONG_TO_PTR(pid));
572a44
+
572a44
+        q = hashmap_put(u->manager->watch_pids, LONG_TO_PTR(pid), u);
572a44
+        if (q < 0)
572a44
+                return q;
572a44
+
572a44
+        return r;
572a44
 }
572a44
 
572a44
 void unit_unwatch_pid(Unit *u, pid_t pid) {
572a44
@@ -1672,6 +1686,102 @@ void unit_unwatch_pid(Unit *u, pid_t pid) {
572a44
         assert(pid >= 1);
572a44
 
572a44
         hashmap_remove_value(u->manager->watch_pids, LONG_TO_PTR(pid), u);
572a44
+        set_remove(u->pids, LONG_TO_PTR(pid));
572a44
+}
572a44
+
572a44
+static int watch_pids_in_path(Unit *u, const char *path) {
572a44
+        _cleanup_closedir_ DIR *d = NULL;
572a44
+        _cleanup_fclose_ FILE *f = NULL;
572a44
+        int ret = 0, r;
572a44
+
572a44
+        assert(u);
572a44
+        assert(path);
572a44
+
572a44
+        /* Adds all PIDs from a specific cgroup path to the set of PIDs we watch. */
572a44
+
572a44
+        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
572a44
+        if (r >= 0) {
572a44
+                pid_t pid;
572a44
+
572a44
+                while ((r = cg_read_pid(f, &pid)) > 0) {
572a44
+                        r = unit_watch_pid(u, pid);
572a44
+                        if (r < 0 && ret >= 0)
572a44
+                                ret = r;
572a44
+                }
572a44
+                if (r < 0 && ret >= 0)
572a44
+                        ret = r;
572a44
+
572a44
+        } else if (ret >= 0)
572a44
+                ret = r;
572a44
+
572a44
+        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
572a44
+        if (r >= 0) {
572a44
+                char *fn;
572a44
+
572a44
+                while ((r = cg_read_subgroup(d, &fn)) > 0) {
572a44
+                        _cleanup_free_ char *p = NULL;
572a44
+
572a44
+                        p = strjoin(path, "/", fn, NULL);
572a44
+                        free(fn);
572a44
+
572a44
+                        if (!p)
572a44
+                                return -ENOMEM;
572a44
+
572a44
+                        r = watch_pids_in_path(u, p);
572a44
+                        if (r < 0 && ret >= 0)
572a44
+                                ret = r;
572a44
+                }
572a44
+                if (r < 0 && ret >= 0)
572a44
+                        ret = r;
572a44
+
572a44
+        } else if (ret >= 0)
572a44
+                ret = r;
572a44
+
572a44
+        return ret;
572a44
+}
572a44
+
572a44
+
572a44
+int unit_watch_all_pids(Unit *u) {
572a44
+        assert(u);
572a44
+
572a44
+        if (!u->cgroup_path)
572a44
+                return -ENOENT;
572a44
+
572a44
+        /* Adds all PIDs from our cgroup to the set of PIDs we watch */
572a44
+
572a44
+        return watch_pids_in_path(u, u->cgroup_path);
572a44
+}
572a44
+
572a44
+void unit_unwatch_all_pids(Unit *u) {
572a44
+        Iterator i;
572a44
+        void *e;
572a44
+
572a44
+        assert(u);
572a44
+
572a44
+        SET_FOREACH(e, u->pids, i)
572a44
+                hashmap_remove_value(u->manager->watch_pids, e, u);
572a44
+
572a44
+        set_free(u->pids);
572a44
+        u->pids = NULL;
572a44
+}
572a44
+
572a44
+void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2) {
572a44
+        Iterator i;
572a44
+        void *e;
572a44
+
572a44
+        assert(u);
572a44
+
572a44
+        /* Cleans dead PIDs from our list */
572a44
+
572a44
+        SET_FOREACH(e, u->pids, i) {
572a44
+                pid_t pid = PTR_TO_LONG(e);
572a44
+
572a44
+                if (pid == except1 || pid == except2)
572a44
+                        continue;
572a44
+
572a44
+                if (kill(pid, 0) < 0 && errno == ESRCH)
572a44
+                        set_remove(u->pids, e);
572a44
+        }
572a44
 }
572a44
 
572a44
 int unit_watch_timer(Unit *u, clockid_t clock_id, bool relative, usec_t usec, Watch *w) {
572a44
diff --git a/src/core/unit.h b/src/core/unit.h
572a44
index 6dd750f..6dff25e 100644
572a44
--- a/src/core/unit.h
572a44
+++ b/src/core/unit.h
572a44
@@ -198,6 +198,11 @@ struct Unit {
572a44
         /* CGroup realize members queue */
572a44
         LIST_FIELDS(Unit, cgroup_queue);
572a44
 
572a44
+        /* PIDs we keep an eye on. Note that a unit might have many
572a44
+         * more, but these are the ones we care enough about to
572a44
+         * process SIGCHLD for */
572a44
+        Set *pids;
572a44
+
572a44
         /* Used during GC sweeps */
572a44
         unsigned gc_marker;
572a44
 
572a44
@@ -531,6 +536,10 @@ void unit_unwatch_fd(Unit *u, Watch *w);
572a44
 
572a44
 int unit_watch_pid(Unit *u, pid_t pid);
572a44
 void unit_unwatch_pid(Unit *u, pid_t pid);
572a44
+int unit_watch_all_pids(Unit *u);
572a44
+void unit_unwatch_all_pids(Unit *u);
572a44
+
572a44
+void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2);
572a44
 
572a44
 int unit_watch_timer(Unit *u, clockid_t, bool relative, usec_t usec, Watch *w);
572a44
 void unit_unwatch_timer(Unit *u, Watch *w);