ff2b41
From 7aff4320eab8b12b19a8800a13f6573c0cdda946 Mon Sep 17 00:00:00 2001
ff2b41
From: Franck Bui <fbui@suse.com>
ff2b41
Date: Mon, 18 Mar 2019 20:59:36 +0100
ff2b41
Subject: [PATCH] core: reduce the number of stalled PIDs from the watched
ff2b41
 processes list when possible
ff2b41
ff2b41
Some PIDs can remain in the watched list even though their processes have
ff2b41
exited since a long time. It can easily happen if the main process of a forking
ff2b41
service manages to spawn a child before the control process exits for example.
ff2b41
ff2b41
However when a pid is about to be mapped to a unit by calling unit_watch_pid(),
ff2b41
the caller usually knows if the pid should belong to this unit exclusively: if
ff2b41
we just forked() off a child, then we can be sure that its PID is otherwise
ff2b41
unused. In this case we take this opportunity to remove any stalled PIDs from
ff2b41
the watched process list.
ff2b41
ff2b41
If we learnt about a PID in any other form (for example via PID file, via
ff2b41
searching, MAINPID= and so on), then we can't assume anything.
ff2b41
ff2b41
(cherry picked from commit f75f613d259e9332be8e9657fa37e73f7aabcb8a)
ff2b41
ff2b41
Resolves: #1501796
ff2b41
---
ff2b41
 src/core/busname.c    |  4 ++--
ff2b41
 src/core/cgroup.c     |  8 +++-----
ff2b41
 src/core/dbus-scope.c |  2 +-
ff2b41
 src/core/mount.c      |  4 ++--
ff2b41
 src/core/service.c    | 24 ++++++++++--------------
ff2b41
 src/core/socket.c     |  6 +++---
ff2b41
 src/core/swap.c       |  4 ++--
ff2b41
 src/core/unit.c       | 25 ++++++++++++++++++++++---
ff2b41
 src/core/unit.h       |  2 +-
ff2b41
 src/shared/util.c     | 14 ++++++++++++++
ff2b41
 src/shared/util.h     |  1 +
ff2b41
 11 files changed, 61 insertions(+), 33 deletions(-)
ff2b41
ff2b41
diff --git a/src/core/busname.c b/src/core/busname.c
ff2b41
index f5553e5418..8d149aae4e 100644
ff2b41
--- a/src/core/busname.c
ff2b41
+++ b/src/core/busname.c
ff2b41
@@ -350,7 +350,7 @@ static int busname_coldplug(Unit *u, Hashmap *deferred_work) {
ff2b41
                 if (n->control_pid <= 0)
ff2b41
                         return -EBADMSG;
ff2b41
 
ff2b41
-                r = unit_watch_pid(UNIT(n), n->control_pid);
ff2b41
+                r = unit_watch_pid(UNIT(n), n->control_pid, false);
ff2b41
                 if (r < 0)
ff2b41
                         return r;
ff2b41
 
ff2b41
@@ -413,7 +413,7 @@ static int busname_make_starter(BusName *n, pid_t *_pid) {
ff2b41
                 _exit(ret);
ff2b41
         }
ff2b41
 
ff2b41
-        r = unit_watch_pid(UNIT(n), pid);
ff2b41
+        r = unit_watch_pid(UNIT(n), pid, true);
ff2b41
         if (r < 0)
ff2b41
                 goto fail;
ff2b41
 
ff2b41
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
ff2b41
index 0779fa5552..0ce265dbf4 100644
ff2b41
--- a/src/core/cgroup.c
ff2b41
+++ b/src/core/cgroup.c
ff2b41
@@ -26,6 +26,7 @@
ff2b41
 #include "special.h"
ff2b41
 #include "cgroup-util.h"
ff2b41
 #include "cgroup.h"
ff2b41
+#include "util.h"
ff2b41
 
ff2b41
 #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
ff2b41
 
ff2b41
@@ -881,7 +882,7 @@ void unit_destroy_cgroup_if_empty(Unit *u) {
ff2b41
 
ff2b41
 pid_t unit_search_main_pid(Unit *u) {
ff2b41
         _cleanup_fclose_ FILE *f = NULL;
ff2b41
-        pid_t pid = 0, npid, mypid;
ff2b41
+        pid_t pid = 0, npid;
ff2b41
 
ff2b41
         assert(u);
ff2b41
 
ff2b41
@@ -891,15 +892,12 @@ pid_t unit_search_main_pid(Unit *u) {
ff2b41
         if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
ff2b41
                 return 0;
ff2b41
 
ff2b41
-        mypid = getpid();
ff2b41
         while (cg_read_pid(f, &npid) > 0)  {
ff2b41
-                pid_t ppid;
ff2b41
-
ff2b41
                 if (npid == pid)
ff2b41
                         continue;
ff2b41
 
ff2b41
                 /* Ignore processes that aren't our kids */
ff2b41
-                if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
ff2b41
+                if (pid_is_my_child(npid) == 0)
ff2b41
                         continue;
ff2b41
 
ff2b41
                 if (pid != 0) {
ff2b41
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
ff2b41
index 60215a1935..f31360ad56 100644
ff2b41
--- a/src/core/dbus-scope.c
ff2b41
+++ b/src/core/dbus-scope.c
ff2b41
@@ -93,7 +93,7 @@ static int bus_scope_set_transient_property(
ff2b41
                                 return -EINVAL;
ff2b41
 
ff2b41
                         if (mode != UNIT_CHECK) {
ff2b41
-                                r = unit_watch_pid(UNIT(s), pid);
ff2b41
+                                r = unit_watch_pid(UNIT(s), pid, false);
ff2b41
                                 if (r < 0 && r != -EEXIST)
ff2b41
                                         return r;
ff2b41
                         }
ff2b41
diff --git a/src/core/mount.c b/src/core/mount.c
ff2b41
index bfbfc10731..3167bd6bb1 100644
ff2b41
--- a/src/core/mount.c
ff2b41
+++ b/src/core/mount.c
ff2b41
@@ -659,7 +659,7 @@ static int mount_coldplug(Unit *u, Hashmap *deferred_work) {
ff2b41
                 if (m->control_pid <= 0)
ff2b41
                         return -EBADMSG;
ff2b41
 
ff2b41
-                r = unit_watch_pid(UNIT(m), m->control_pid);
ff2b41
+                r = unit_watch_pid(UNIT(m), m->control_pid, false);
ff2b41
                 if (r < 0)
ff2b41
                         return r;
ff2b41
 
ff2b41
@@ -751,7 +751,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
ff2b41
         if (r < 0)
ff2b41
                 goto fail;
ff2b41
 
ff2b41
-        r = unit_watch_pid(UNIT(m), pid);
ff2b41
+        r = unit_watch_pid(UNIT(m), pid, true);
ff2b41
         if (r < 0)
ff2b41
                 /* FIXME: we need to do something here */
ff2b41
                 goto fail;
ff2b41
diff --git a/src/core/service.c b/src/core/service.c
ff2b41
index f7b859d076..1ad154e41f 100644
ff2b41
--- a/src/core/service.c
ff2b41
+++ b/src/core/service.c
ff2b41
@@ -145,8 +145,6 @@ static void service_unwatch_pid_file(Service *s) {
ff2b41
 }
ff2b41
 
ff2b41
 static int service_set_main_pid(Service *s, pid_t pid) {
ff2b41
-        pid_t ppid;
ff2b41
-
ff2b41
         assert(s);
ff2b41
 
ff2b41
         if (pid <= 1)
ff2b41
@@ -165,12 +163,10 @@ static int service_set_main_pid(Service *s, pid_t pid) {
ff2b41
 
ff2b41
         s->main_pid = pid;
ff2b41
         s->main_pid_known = true;
ff2b41
+        s->main_pid_alien = pid_is_my_child(pid) == 0;
ff2b41
 
ff2b41
-        if (get_parent_of_pid(pid, &ppid) >= 0 && ppid != getpid()) {
ff2b41
+        if (s->main_pid_alien)
ff2b41
                 log_unit_warning(UNIT(s)->id, "%s: Supervising process "PID_FMT" which is not our child. We'll most likely not notice when it exits.", UNIT(s)->id, pid);
ff2b41
-                s->main_pid_alien = true;
ff2b41
-        } else
ff2b41
-                s->main_pid_alien = false;
ff2b41
 
ff2b41
         return 0;
ff2b41
 }
ff2b41
@@ -809,7 +805,7 @@ static int service_load_pid_file(Service *s, bool may_warn) {
ff2b41
         if (r < 0)
ff2b41
                 return r;
ff2b41
 
ff2b41
-        r = unit_watch_pid(UNIT(s), pid);
ff2b41
+        r = unit_watch_pid(UNIT(s), pid, false);
ff2b41
         if (r < 0) {
ff2b41
                 /* FIXME: we need to do something here */
ff2b41
                 log_unit_warning(UNIT(s)->id, "Failed to watch PID "PID_FMT" from service %s", pid, UNIT(s)->id);
ff2b41
@@ -844,7 +840,7 @@ static int service_search_main_pid(Service *s) {
ff2b41
         if (r < 0)
ff2b41
                 return r;
ff2b41
 
ff2b41
-        r = unit_watch_pid(UNIT(s), pid);
ff2b41
+        r = unit_watch_pid(UNIT(s), pid, false);
ff2b41
         if (r < 0) {
ff2b41
                 /* FIXME: we need to do something here */
ff2b41
                 log_unit_warning(UNIT(s)->id, "Failed to watch PID "PID_FMT" from service %s", pid, UNIT(s)->id);
ff2b41
@@ -989,7 +985,7 @@ static int service_coldplug(Unit *u, Hashmap *deferred_work) {
ff2b41
                             SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
ff2b41
                             SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
ff2b41
                             SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
ff2b41
-                        r = unit_watch_pid(UNIT(s), s->main_pid);
ff2b41
+                        r = unit_watch_pid(UNIT(s), s->main_pid, false);
ff2b41
                         if (r < 0)
ff2b41
                                 return r;
ff2b41
                 }
ff2b41
@@ -1001,7 +997,7 @@ static int service_coldplug(Unit *u, Hashmap *deferred_work) {
ff2b41
                            SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL,
ff2b41
                            SERVICE_STOP_SIGABRT, SERVICE_STOP_POST,
ff2b41
                            SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
ff2b41
-                        r = unit_watch_pid(UNIT(s), s->control_pid);
ff2b41
+                        r = unit_watch_pid(UNIT(s), s->control_pid, false);
ff2b41
                         if (r < 0)
ff2b41
                                 return r;
ff2b41
                 }
ff2b41
@@ -1271,7 +1267,7 @@ static int service_spawn(
ff2b41
         if (r < 0)
ff2b41
                 goto fail;
ff2b41
 
ff2b41
-        r = unit_watch_pid(UNIT(s), pid);
ff2b41
+        r = unit_watch_pid(UNIT(s), pid, true);
ff2b41
         if (r < 0)
ff2b41
                 /* FIXME: we need to do something here */
ff2b41
                 goto fail;
ff2b41
@@ -2325,7 +2321,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
ff2b41
                         log_unit_debug(u->id, "Failed to parse main-pid value %s", value);
ff2b41
                 else {
ff2b41
                         service_set_main_pid(s, pid);
ff2b41
-                        unit_watch_pid(UNIT(s), pid);
ff2b41
+                        unit_watch_pid(UNIT(s), pid, false);
ff2b41
                 }
ff2b41
         } else if (streq(key, "main-pid-known")) {
ff2b41
                 int b;
ff2b41
@@ -3085,7 +3081,7 @@ static void service_notify_message(
ff2b41
                         }
ff2b41
                         if (r > 0) {
ff2b41
                                 service_set_main_pid(s, new_main_pid);
ff2b41
-                                unit_watch_pid(UNIT(s), new_main_pid);
ff2b41
+                                unit_watch_pid(UNIT(s), new_main_pid, false);
ff2b41
                                 notify_dbus = true;
ff2b41
                         }
ff2b41
                 }
ff2b41
@@ -3265,7 +3261,7 @@ static void service_bus_name_owner_change(
ff2b41
                         log_unit_debug(u->id, "%s's D-Bus name %s is now owned by process %u", u->id, name, (unsigned) pid);
ff2b41
 
ff2b41
                         service_set_main_pid(s, pid);
ff2b41
-                        unit_watch_pid(UNIT(s), pid);
ff2b41
+                        unit_watch_pid(UNIT(s), pid, false);
ff2b41
                 }
ff2b41
         }
ff2b41
 }
ff2b41
diff --git a/src/core/socket.c b/src/core/socket.c
ff2b41
index 1e8ae0a6e5..a5f610cd77 100644
ff2b41
--- a/src/core/socket.c
ff2b41
+++ b/src/core/socket.c
ff2b41
@@ -1350,7 +1350,7 @@ static int socket_coldplug(Unit *u, Hashmap *deferred_work) {
ff2b41
                 if (s->control_pid <= 0)
ff2b41
                         return -EBADMSG;
ff2b41
 
ff2b41
-                r = unit_watch_pid(UNIT(s), s->control_pid);
ff2b41
+                r = unit_watch_pid(UNIT(s), s->control_pid, false);
ff2b41
                 if (r < 0)
ff2b41
                         return r;
ff2b41
 
ff2b41
@@ -1427,7 +1427,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
ff2b41
         if (r < 0)
ff2b41
                 goto fail;
ff2b41
 
ff2b41
-        r = unit_watch_pid(UNIT(s), pid);
ff2b41
+        r = unit_watch_pid(UNIT(s), pid, true);
ff2b41
         if (r < 0)
ff2b41
                 /* FIXME: we need to do something here */
ff2b41
                 goto fail;
ff2b41
@@ -1512,7 +1512,7 @@ static int socket_chown(Socket *s, pid_t *_pid) {
ff2b41
                 _exit(ret);
ff2b41
         }
ff2b41
 
ff2b41
-        r = unit_watch_pid(UNIT(s), pid);
ff2b41
+        r = unit_watch_pid(UNIT(s), pid, true);
ff2b41
         if (r < 0)
ff2b41
                 goto fail;
ff2b41
 
ff2b41
diff --git a/src/core/swap.c b/src/core/swap.c
ff2b41
index 757a8d45c5..4a5e882332 100644
ff2b41
--- a/src/core/swap.c
ff2b41
+++ b/src/core/swap.c
ff2b41
@@ -548,7 +548,7 @@ static int swap_coldplug(Unit *u, Hashmap *deferred_work) {
ff2b41
                 if (s->control_pid <= 0)
ff2b41
                         return -EBADMSG;
ff2b41
 
ff2b41
-                r = unit_watch_pid(UNIT(s), s->control_pid);
ff2b41
+                r = unit_watch_pid(UNIT(s), s->control_pid, false);
ff2b41
                 if (r < 0)
ff2b41
                         return r;
ff2b41
 
ff2b41
@@ -645,7 +645,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
ff2b41
         if (r < 0)
ff2b41
                 goto fail;
ff2b41
 
ff2b41
-        r = unit_watch_pid(UNIT(s), pid);
ff2b41
+        r = unit_watch_pid(UNIT(s), pid, true);
ff2b41
         if (r < 0)
ff2b41
                 /* FIXME: we need to do something here */
ff2b41
                 goto fail;
ff2b41
diff --git a/src/core/unit.c b/src/core/unit.c
ff2b41
index def36a0930..cdac192eb6 100644
ff2b41
--- a/src/core/unit.c
ff2b41
+++ b/src/core/unit.c
ff2b41
@@ -52,6 +52,7 @@
ff2b41
 #include "execute.h"
ff2b41
 #include "virt.h"
ff2b41
 #include "dropin.h"
ff2b41
+#include "util.h"
ff2b41
 
ff2b41
 const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX] = {
ff2b41
         [UNIT_SERVICE] = &service_vtable,
ff2b41
@@ -1961,7 +1962,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
ff2b41
         unit_add_to_gc_queue(u);
ff2b41
 }
ff2b41
 
ff2b41
-int unit_watch_pid(Unit *u, pid_t pid) {
ff2b41
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive) {
ff2b41
         int q, r;
ff2b41
 
ff2b41
         assert(u);
ff2b41
@@ -1970,6 +1971,15 @@ int unit_watch_pid(Unit *u, pid_t pid) {
ff2b41
         /* Watch a specific PID. We only support one or two units
ff2b41
          * watching each PID for now, not more. */
ff2b41
 
ff2b41
+        /* Caller might be sure that this PID belongs to this unit only. Let's take this
ff2b41
+         * opportunity to remove any stalled references to this PID as they can be created
ff2b41
+         * easily (when watching a process which is not our direct child). */
ff2b41
+        if (exclusive) {
ff2b41
+                log_unit_debug(u->id, "Cleaning "PID_FMT" from watches.", pid);
ff2b41
+                hashmap_remove2(u->manager->watch_pids1, LONG_TO_PTR(pid), NULL);
ff2b41
+                hashmap_remove2(u->manager->watch_pids2, LONG_TO_PTR(pid), NULL);
ff2b41
+        }
ff2b41
+
ff2b41
         r = set_ensure_allocated(&u->pids, NULL);
ff2b41
         if (r < 0)
ff2b41
                 return r;
ff2b41
@@ -1985,7 +1995,12 @@ int unit_watch_pid(Unit *u, pid_t pid) {
ff2b41
                         return r;
ff2b41
 
ff2b41
                 r = hashmap_put(u->manager->watch_pids2, LONG_TO_PTR(pid), u);
ff2b41
-        }
ff2b41
+                if (r >= 0)
ff2b41
+                        log_unit_debug(u->id, "Watching "PID_FMT" through watch_pids2.", pid);
ff2b41
+                else if (r == -EEXIST)
ff2b41
+                        log_unit_warning(u->id, "Cannot watch "PID_FMT", PID is already watched twice.", pid);
ff2b41
+        } else if (r >= 0)
ff2b41
+                log_unit_debug(u->id, "Watching "PID_FMT" through watch_pids1.", pid);
ff2b41
 
ff2b41
         q = set_put(u->pids, LONG_TO_PTR(pid));
ff2b41
         if (q < 0)
ff2b41
@@ -1998,6 +2013,8 @@ void unit_unwatch_pid(Unit *u, pid_t pid) {
ff2b41
         assert(u);
ff2b41
         assert(pid >= 1);
ff2b41
 
ff2b41
+        log_unit_debug(u->id, "Unwatching "PID_FMT".", pid);
ff2b41
+
ff2b41
         hashmap_remove_value(u->manager->watch_pids1, LONG_TO_PTR(pid), u);
ff2b41
         hashmap_remove_value(u->manager->watch_pids2, LONG_TO_PTR(pid), u);
ff2b41
         set_remove(u->pids, LONG_TO_PTR(pid));
ff2b41
@@ -2028,7 +2045,9 @@ static int unit_watch_pids_in_path(Unit *u, const char *path) {
ff2b41
                 pid_t pid;
ff2b41
 
ff2b41
                 while ((r = cg_read_pid(f, &pid)) > 0) {
ff2b41
-                        r = unit_watch_pid(u, pid);
ff2b41
+                        if (pid_is_my_child(pid) == 0)
ff2b41
+                                log_unit_debug(u->id, "Watching non detached "PID_FMT".", pid);
ff2b41
+                        r = unit_watch_pid(u, pid, false);
ff2b41
                         if (r < 0 && ret >= 0)
ff2b41
                                 ret = r;
ff2b41
                 }
ff2b41
diff --git a/src/core/unit.h b/src/core/unit.h
ff2b41
index a6e21d60ce..4a8bd79052 100644
ff2b41
--- a/src/core/unit.h
ff2b41
+++ b/src/core/unit.h
ff2b41
@@ -537,7 +537,7 @@ int unit_kill_common(Unit *u, KillWho who, int signo, pid_t main_pid, pid_t cont
ff2b41
 
ff2b41
 void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success);
ff2b41
 
ff2b41
-int unit_watch_pid(Unit *u, pid_t pid);
ff2b41
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive);
ff2b41
 void unit_unwatch_pid(Unit *u, pid_t pid);
ff2b41
 int unit_watch_all_pids(Unit *u);
ff2b41
 void unit_unwatch_all_pids(Unit *u);
ff2b41
diff --git a/src/shared/util.c b/src/shared/util.c
ff2b41
index 82c8e433dd..127a64c3c6 100644
ff2b41
--- a/src/shared/util.c
ff2b41
+++ b/src/shared/util.c
ff2b41
@@ -641,6 +641,20 @@ int get_parent_of_pid(pid_t pid, pid_t *_ppid) {
ff2b41
         return 0;
ff2b41
 }
ff2b41
 
ff2b41
+int pid_is_my_child(pid_t pid) {
ff2b41
+        pid_t ppid;
ff2b41
+        int r;
ff2b41
+
ff2b41
+        if (pid <= 1)
ff2b41
+                return false;
ff2b41
+
ff2b41
+        r = get_parent_of_pid(pid, &ppid);
ff2b41
+        if (r < 0)
ff2b41
+                return r;
ff2b41
+
ff2b41
+        return ppid == getpid();
ff2b41
+}
ff2b41
+
ff2b41
 int fchmod_umask(int fd, mode_t m) {
ff2b41
         mode_t u;
ff2b41
         int r;
ff2b41
diff --git a/src/shared/util.h b/src/shared/util.h
ff2b41
index 538ca4be2d..6b885d7533 100644
ff2b41
--- a/src/shared/util.h
ff2b41
+++ b/src/shared/util.h
ff2b41
@@ -279,6 +279,7 @@ const char* split(const char **state, size_t *l, const char *separator, bool quo
ff2b41
         for ((state) = (s), (word) = split(&(state), &(length), (separator), (quoted)); (word); (word) = split(&(state), &(length), (separator), (quoted)))
ff2b41
 
ff2b41
 pid_t get_parent_of_pid(pid_t pid, pid_t *ppid);
ff2b41
+int pid_is_my_child(pid_t pid);
ff2b41
 
ff2b41
 char *strappend(const char *s, const char *suffix);
ff2b41
 char *strnappend(const char *s, const char *suffix, size_t length);