Brian Stinson 2593d8
From 79e9566ec0a61d887ab63f17192dbd71aae36ee0 Mon Sep 17 00:00:00 2001
Brian Stinson 2593d8
From: Franck Bui <fbui@suse.com>
Brian Stinson 2593d8
Date: Mon, 18 Mar 2019 20:59:36 +0100
Brian Stinson 2593d8
Subject: [PATCH] core: reduce the number of stalled PIDs from the watched
Brian Stinson 2593d8
 processes list when possible
Brian Stinson 2593d8
MIME-Version: 1.0
Brian Stinson 2593d8
Content-Type: text/plain; charset=UTF-8
Brian Stinson 2593d8
Content-Transfer-Encoding: 8bit
Brian Stinson 2593d8
Brian Stinson 2593d8
Some PIDs can remain in the watched list even though their processes have
Brian Stinson 2593d8
exited since a long time. It can easily happen if the main process of a forking
Brian Stinson 2593d8
service manages to spawn a child before the control process exits for example.
Brian Stinson 2593d8
Brian Stinson 2593d8
However when a pid is about to be mapped to a unit by calling unit_watch_pid(),
Brian Stinson 2593d8
the caller usually knows if the pid should belong to this unit exclusively: if
Brian Stinson 2593d8
we just forked() off a child, then we can be sure that its PID is otherwise
Brian Stinson 2593d8
unused. In this case we take this opportunity to remove any stalled PIDs from
Brian Stinson 2593d8
the watched process list.
Brian Stinson 2593d8
Brian Stinson 2593d8
If we learnt about a PID in any other form (for example via PID file, via
Brian Stinson 2593d8
searching, MAINPID= and so on), then we can't assume anything.
Brian Stinson 2593d8
Brian Stinson 2593d8
Thanks Renaud Métrich for backporting this to RHEL.
Brian Stinson 2593d8
Resolves: #1744972
Brian Stinson 2593d8
---
Brian Stinson 2593d8
 src/core/cgroup.c         |  2 +-
Brian Stinson 2593d8
 src/core/dbus-scope.c     |  2 +-
Brian Stinson 2593d8
 src/core/manager.c        | 10 ++++++++++
Brian Stinson 2593d8
 src/core/manager.h        |  2 ++
Brian Stinson 2593d8
 src/core/mount.c          |  5 ++---
Brian Stinson 2593d8
 src/core/service.c        | 16 ++++++++--------
Brian Stinson 2593d8
 src/core/socket.c         |  7 +++----
Brian Stinson 2593d8
 src/core/swap.c           |  5 ++---
Brian Stinson 2593d8
 src/core/unit.c           |  8 +++++++-
Brian Stinson 2593d8
 src/core/unit.h           |  2 +-
Brian Stinson 2593d8
 src/test/test-watch-pid.c | 12 ++++++------
Brian Stinson 2593d8
 11 files changed, 43 insertions(+), 28 deletions(-)
Brian Stinson 2593d8
Brian Stinson 2593d8
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
Brian Stinson 2593d8
index b7ed07e65b..76eafdc082 100644
Brian Stinson 2593d8
--- a/src/core/cgroup.c
Brian Stinson 2593d8
+++ b/src/core/cgroup.c
Brian Stinson 2593d8
@@ -1926,7 +1926,7 @@ static int unit_watch_pids_in_path(Unit *u, const char *path) {
Brian Stinson 2593d8
                 pid_t pid;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                 while ((r = cg_read_pid(f, &pid)) > 0) {
Brian Stinson 2593d8
-                        r = unit_watch_pid(u, pid);
Brian Stinson 2593d8
+                        r = unit_watch_pid(u, pid, false);
Brian Stinson 2593d8
                         if (r < 0 && ret >= 0)
Brian Stinson 2593d8
                                 ret = r;
Brian Stinson 2593d8
                 }
Brian Stinson 2593d8
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
Brian Stinson 2593d8
index 6725f62794..0bbf64fff1 100644
Brian Stinson 2593d8
--- a/src/core/dbus-scope.c
Brian Stinson 2593d8
+++ b/src/core/dbus-scope.c
Brian Stinson 2593d8
@@ -106,7 +106,7 @@ static int bus_scope_set_transient_property(
Brian Stinson 2593d8
                                 return r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                         if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
Brian Stinson 2593d8
-                                r = unit_watch_pid(UNIT(s), pid);
Brian Stinson 2593d8
+                                r = unit_watch_pid(UNIT(s), pid, false);
Brian Stinson 2593d8
                                 if (r < 0 && r != -EEXIST)
Brian Stinson 2593d8
                                         return r;
Brian Stinson 2593d8
                         }
Brian Stinson 2593d8
diff --git a/src/core/manager.c b/src/core/manager.c
Brian Stinson 2593d8
index c83e296cf3..0eae7d46fb 100644
Brian Stinson 2593d8
--- a/src/core/manager.c
Brian Stinson 2593d8
+++ b/src/core/manager.c
Brian Stinson 2593d8
@@ -2044,6 +2044,16 @@ void manager_clear_jobs(Manager *m) {
Brian Stinson 2593d8
                 job_finish_and_invalidate(j, JOB_CANCELED, false, false);
Brian Stinson 2593d8
 }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+void manager_unwatch_pid(Manager *m, pid_t pid) {
Brian Stinson 2593d8
+        assert(m);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        /* First let's drop the unit keyed as "pid". */
Brian Stinson 2593d8
+        (void) hashmap_remove(m->watch_pids, PID_TO_PTR(pid));
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        /* Then, let's also drop the array keyed by -pid. */
Brian Stinson 2593d8
+        free(hashmap_remove(m->watch_pids, PID_TO_PTR(-pid)));
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
 static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
Brian Stinson 2593d8
         Manager *m = userdata;
Brian Stinson 2593d8
         Job *j;
Brian Stinson 2593d8
diff --git a/src/core/manager.h b/src/core/manager.h
Brian Stinson 2593d8
index c7f4d66ecd..fa47952d24 100644
Brian Stinson 2593d8
--- a/src/core/manager.h
Brian Stinson 2593d8
+++ b/src/core/manager.h
Brian Stinson 2593d8
@@ -406,6 +406,8 @@ int manager_get_dump_string(Manager *m, char **ret);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 void manager_clear_jobs(Manager *m);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+void manager_unwatch_pid(Manager *m, pid_t pid);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
 unsigned manager_dispatch_load_queue(Manager *m);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 int manager_environment_add(Manager *m, char **minus, char **plus);
Brian Stinson 2593d8
diff --git a/src/core/mount.c b/src/core/mount.c
Brian Stinson 2593d8
index 2ac04e3874..5878814b1b 100644
Brian Stinson 2593d8
--- a/src/core/mount.c
Brian Stinson 2593d8
+++ b/src/core/mount.c
Brian Stinson 2593d8
@@ -677,7 +677,7 @@ static int mount_coldplug(Unit *u) {
Brian Stinson 2593d8
             pid_is_unwaited(m->control_pid) &&
Brian Stinson 2593d8
             MOUNT_STATE_WITH_PROCESS(new_state)) {
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-                r = unit_watch_pid(UNIT(m), m->control_pid);
Brian Stinson 2593d8
+                r = unit_watch_pid(UNIT(m), m->control_pid, false);
Brian Stinson 2593d8
                 if (r < 0)
Brian Stinson 2593d8
                         return r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
@@ -781,9 +781,8 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
Brian Stinson 2593d8
         if (r < 0)
Brian Stinson 2593d8
                 return r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        r = unit_watch_pid(UNIT(m), pid);
Brian Stinson 2593d8
+        r = unit_watch_pid(UNIT(m), pid, true);
Brian Stinson 2593d8
         if (r < 0)
Brian Stinson 2593d8
-                /* FIXME: we need to do something here */
Brian Stinson 2593d8
                 return r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         *_pid = pid;
Brian Stinson 2593d8
diff --git a/src/core/service.c b/src/core/service.c
Brian Stinson 2593d8
index 614ba05d89..310838a5f6 100644
Brian Stinson 2593d8
--- a/src/core/service.c
Brian Stinson 2593d8
+++ b/src/core/service.c
Brian Stinson 2593d8
@@ -974,7 +974,7 @@ static int service_load_pid_file(Service *s, bool may_warn) {
Brian Stinson 2593d8
         if (r < 0)
Brian Stinson 2593d8
                 return r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        r = unit_watch_pid(UNIT(s), pid);
Brian Stinson 2593d8
+        r = unit_watch_pid(UNIT(s), pid, false);
Brian Stinson 2593d8
         if (r < 0) /* FIXME: we need to do something here */
Brian Stinson 2593d8
                 return log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" for service: %m", pid);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
@@ -1004,7 +1004,7 @@ static void service_search_main_pid(Service *s) {
Brian Stinson 2593d8
         if (service_set_main_pid(s, pid) < 0)
Brian Stinson 2593d8
                 return;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        r = unit_watch_pid(UNIT(s), pid);
Brian Stinson 2593d8
+        r = unit_watch_pid(UNIT(s), pid, false);
Brian Stinson 2593d8
         if (r < 0)
Brian Stinson 2593d8
                 /* FIXME: we need to do something here */
Brian Stinson 2593d8
                 log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" from: %m", pid);
Brian Stinson 2593d8
@@ -1135,7 +1135,7 @@ static int service_coldplug(Unit *u) {
Brian Stinson 2593d8
                     SERVICE_RUNNING, SERVICE_RELOAD,
Brian Stinson 2593d8
                     SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
Brian Stinson 2593d8
                     SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
Brian Stinson 2593d8
-                r = unit_watch_pid(UNIT(s), s->main_pid);
Brian Stinson 2593d8
+                r = unit_watch_pid(UNIT(s), s->main_pid, false);
Brian Stinson 2593d8
                 if (r < 0)
Brian Stinson 2593d8
                         return r;
Brian Stinson 2593d8
         }
Brian Stinson 2593d8
@@ -1147,7 +1147,7 @@ static int service_coldplug(Unit *u) {
Brian Stinson 2593d8
                    SERVICE_RELOAD,
Brian Stinson 2593d8
                    SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
Brian Stinson 2593d8
                    SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
Brian Stinson 2593d8
-                r = unit_watch_pid(UNIT(s), s->control_pid);
Brian Stinson 2593d8
+                r = unit_watch_pid(UNIT(s), s->control_pid, false);
Brian Stinson 2593d8
                 if (r < 0)
Brian Stinson 2593d8
                         return r;
Brian Stinson 2593d8
         }
Brian Stinson 2593d8
@@ -1545,8 +1545,8 @@ static int service_spawn(
Brian Stinson 2593d8
         s->exec_fd_event_source = TAKE_PTR(exec_fd_source);
Brian Stinson 2593d8
         s->exec_fd_hot = false;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        r = unit_watch_pid(UNIT(s), pid);
Brian Stinson 2593d8
-        if (r < 0) /* FIXME: we need to do something here */
Brian Stinson 2593d8
+        r = unit_watch_pid(UNIT(s), pid, true);
Brian Stinson 2593d8
+        if (r < 0)
Brian Stinson 2593d8
                 return r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         *_pid = pid;
Brian Stinson 2593d8
@@ -3643,7 +3643,7 @@ static void service_notify_message(
Brian Stinson 2593d8
                         }
Brian Stinson 2593d8
                         if (r > 0) {
Brian Stinson 2593d8
                                 service_set_main_pid(s, new_main_pid);
Brian Stinson 2593d8
-                                unit_watch_pid(UNIT(s), new_main_pid);
Brian Stinson 2593d8
+                                unit_watch_pid(UNIT(s), new_main_pid, false);
Brian Stinson 2593d8
                                 notify_dbus = true;
Brian Stinson 2593d8
                         }
Brian Stinson 2593d8
                 }
Brian Stinson 2593d8
@@ -3858,7 +3858,7 @@ static void service_bus_name_owner_change(
Brian Stinson 2593d8
                         log_unit_debug(u, "D-Bus name %s is now owned by process " PID_FMT, name, pid);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                         service_set_main_pid(s, pid);
Brian Stinson 2593d8
-                        unit_watch_pid(UNIT(s), pid);
Brian Stinson 2593d8
+                        unit_watch_pid(UNIT(s), pid, false);
Brian Stinson 2593d8
                 }
Brian Stinson 2593d8
         }
Brian Stinson 2593d8
 }
Brian Stinson 2593d8
diff --git a/src/core/socket.c b/src/core/socket.c
Brian Stinson 2593d8
index d488c64e91..b034549634 100644
Brian Stinson 2593d8
--- a/src/core/socket.c
Brian Stinson 2593d8
+++ b/src/core/socket.c
Brian Stinson 2593d8
@@ -1816,7 +1816,7 @@ static int socket_coldplug(Unit *u) {
Brian Stinson 2593d8
                    SOCKET_FINAL_SIGTERM,
Brian Stinson 2593d8
                    SOCKET_FINAL_SIGKILL)) {
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-                r = unit_watch_pid(UNIT(s), s->control_pid);
Brian Stinson 2593d8
+                r = unit_watch_pid(UNIT(s), s->control_pid, false);
Brian Stinson 2593d8
                 if (r < 0)
Brian Stinson 2593d8
                         return r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
@@ -1902,9 +1902,8 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
Brian Stinson 2593d8
         if (r < 0)
Brian Stinson 2593d8
                 return r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        r = unit_watch_pid(UNIT(s), pid);
Brian Stinson 2593d8
+        r = unit_watch_pid(UNIT(s), pid, true);
Brian Stinson 2593d8
         if (r < 0)
Brian Stinson 2593d8
-                /* FIXME: we need to do something here */
Brian Stinson 2593d8
                 return r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         *_pid = pid;
Brian Stinson 2593d8
@@ -1973,7 +1972,7 @@ static int socket_chown(Socket *s, pid_t *_pid) {
Brian Stinson 2593d8
                 _exit(EXIT_SUCCESS);
Brian Stinson 2593d8
         }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        r = unit_watch_pid(UNIT(s), pid);
Brian Stinson 2593d8
+        r = unit_watch_pid(UNIT(s), pid, true);
Brian Stinson 2593d8
         if (r < 0)
Brian Stinson 2593d8
                 goto fail;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
diff --git a/src/core/swap.c b/src/core/swap.c
Brian Stinson 2593d8
index b644753a1c..e717dbb54a 100644
Brian Stinson 2593d8
--- a/src/core/swap.c
Brian Stinson 2593d8
+++ b/src/core/swap.c
Brian Stinson 2593d8
@@ -531,7 +531,7 @@ static int swap_coldplug(Unit *u) {
Brian Stinson 2593d8
             pid_is_unwaited(s->control_pid) &&
Brian Stinson 2593d8
             SWAP_STATE_WITH_PROCESS(new_state)) {
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-                r = unit_watch_pid(UNIT(s), s->control_pid);
Brian Stinson 2593d8
+                r = unit_watch_pid(UNIT(s), s->control_pid, false);
Brian Stinson 2593d8
                 if (r < 0)
Brian Stinson 2593d8
                         return r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
@@ -636,9 +636,8 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
Brian Stinson 2593d8
         if (r < 0)
Brian Stinson 2593d8
                 goto fail;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        r = unit_watch_pid(UNIT(s), pid);
Brian Stinson 2593d8
+        r = unit_watch_pid(UNIT(s), pid, true);
Brian Stinson 2593d8
         if (r < 0)
Brian Stinson 2593d8
-                /* FIXME: we need to do something here */
Brian Stinson 2593d8
                 goto fail;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         *_pid = pid;
Brian Stinson 2593d8
diff --git a/src/core/unit.c b/src/core/unit.c
Brian Stinson 2593d8
index d298afb0d4..b0b1c77ef7 100644
Brian Stinson 2593d8
--- a/src/core/unit.c
Brian Stinson 2593d8
+++ b/src/core/unit.c
Brian Stinson 2593d8
@@ -2500,7 +2500,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
Brian Stinson 2593d8
         unit_add_to_gc_queue(u);
Brian Stinson 2593d8
 }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-int unit_watch_pid(Unit *u, pid_t pid) {
Brian Stinson 2593d8
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive) {
Brian Stinson 2593d8
         int r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         assert(u);
Brian Stinson 2593d8
@@ -2508,6 +2508,12 @@ int unit_watch_pid(Unit *u, pid_t pid) {
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         /* Watch a specific PID */
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+        /* Caller might be sure that this PID belongs to this unit only. Let's take this
Brian Stinson 2593d8
+         * opportunity to remove any stalled references to this PID as they can be created
Brian Stinson 2593d8
+         * easily (when watching a process which is not our direct child). */
Brian Stinson 2593d8
+        if (exclusive)
Brian Stinson 2593d8
+                manager_unwatch_pid(u->manager, pid);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
         r = set_ensure_allocated(&u->pids, NULL);
Brian Stinson 2593d8
         if (r < 0)
Brian Stinson 2593d8
                 return r;
Brian Stinson 2593d8
diff --git a/src/core/unit.h b/src/core/unit.h
Brian Stinson 2593d8
index e1a60da244..68cc1869e4 100644
Brian Stinson 2593d8
--- a/src/core/unit.h
Brian Stinson 2593d8
+++ b/src/core/unit.h
Brian Stinson 2593d8
@@ -655,7 +655,7 @@ typedef enum UnitNotifyFlags {
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlags flags);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-int unit_watch_pid(Unit *u, pid_t pid);
Brian Stinson 2593d8
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive);
Brian Stinson 2593d8
 void unit_unwatch_pid(Unit *u, pid_t pid);
Brian Stinson 2593d8
 void unit_unwatch_all_pids(Unit *u);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
diff --git a/src/test/test-watch-pid.c b/src/test/test-watch-pid.c
Brian Stinson 2593d8
index cb43b35bc5..8c70175aed 100644
Brian Stinson 2593d8
--- a/src/test/test-watch-pid.c
Brian Stinson 2593d8
+++ b/src/test/test-watch-pid.c
Brian Stinson 2593d8
@@ -49,25 +49,25 @@ int main(int argc, char *argv[]) {
Brian Stinson 2593d8
         assert_se(hashmap_isempty(m->watch_pids));
Brian Stinson 2593d8
         assert_se(manager_get_unit_by_pid(m, 4711) == NULL);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        assert_se(unit_watch_pid(a, 4711) >= 0);
Brian Stinson 2593d8
+        assert_se(unit_watch_pid(a, 4711, false) >= 0);
Brian Stinson 2593d8
         assert_se(manager_get_unit_by_pid(m, 4711) == a);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        assert_se(unit_watch_pid(a, 4711) >= 0);
Brian Stinson 2593d8
+        assert_se(unit_watch_pid(a, 4711, false) >= 0);
Brian Stinson 2593d8
         assert_se(manager_get_unit_by_pid(m, 4711) == a);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        assert_se(unit_watch_pid(b, 4711) >= 0);
Brian Stinson 2593d8
+        assert_se(unit_watch_pid(b, 4711, false) >= 0);
Brian Stinson 2593d8
         u = manager_get_unit_by_pid(m, 4711);
Brian Stinson 2593d8
         assert_se(u == a || u == b);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        assert_se(unit_watch_pid(b, 4711) >= 0);
Brian Stinson 2593d8
+        assert_se(unit_watch_pid(b, 4711, false) >= 0);
Brian Stinson 2593d8
         u = manager_get_unit_by_pid(m, 4711);
Brian Stinson 2593d8
         assert_se(u == a || u == b);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        assert_se(unit_watch_pid(c, 4711) >= 0);
Brian Stinson 2593d8
+        assert_se(unit_watch_pid(c, 4711, false) >= 0);
Brian Stinson 2593d8
         u = manager_get_unit_by_pid(m, 4711);
Brian Stinson 2593d8
         assert_se(u == a || u == b || u == c);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
-        assert_se(unit_watch_pid(c, 4711) >= 0);
Brian Stinson 2593d8
+        assert_se(unit_watch_pid(c, 4711, false) >= 0);
Brian Stinson 2593d8
         u = manager_get_unit_by_pid(m, 4711);
Brian Stinson 2593d8
         assert_se(u == a || u == b || u == c);
Brian Stinson 2593d8