|
|
b9a53a |
From 79e9566ec0a61d887ab63f17192dbd71aae36ee0 Mon Sep 17 00:00:00 2001
|
|
|
b9a53a |
From: Franck Bui <fbui@suse.com>
|
|
|
b9a53a |
Date: Mon, 18 Mar 2019 20:59:36 +0100
|
|
|
b9a53a |
Subject: [PATCH] core: reduce the number of stalled PIDs from the watched
|
|
|
b9a53a |
processes list when possible
|
|
|
b9a53a |
MIME-Version: 1.0
|
|
|
b9a53a |
Content-Type: text/plain; charset=UTF-8
|
|
|
b9a53a |
Content-Transfer-Encoding: 8bit
|
|
|
b9a53a |
|
|
|
b9a53a |
Some PIDs can remain in the watched list even though their processes have
|
|
|
b9a53a |
exited since a long time. It can easily happen if the main process of a forking
|
|
|
b9a53a |
service manages to spawn a child before the control process exits for example.
|
|
|
b9a53a |
|
|
|
b9a53a |
However when a pid is about to be mapped to a unit by calling unit_watch_pid(),
|
|
|
b9a53a |
the caller usually knows if the pid should belong to this unit exclusively: if
|
|
|
b9a53a |
we just forked() off a child, then we can be sure that its PID is otherwise
|
|
|
b9a53a |
unused. In this case we take this opportunity to remove any stalled PIDs from
|
|
|
b9a53a |
the watched process list.
|
|
|
b9a53a |
|
|
|
b9a53a |
If we learnt about a PID in any other form (for example via PID file, via
|
|
|
b9a53a |
searching, MAINPID= and so on), then we can't assume anything.
|
|
|
b9a53a |
|
|
|
b9a53a |
Thanks Renaud Métrich for backporting this to RHEL.
|
|
|
b9a53a |
Resolves: #1744972
|
|
|
b9a53a |
---
|
|
|
b9a53a |
src/core/cgroup.c | 2 +-
|
|
|
b9a53a |
src/core/dbus-scope.c | 2 +-
|
|
|
b9a53a |
src/core/manager.c | 10 ++++++++++
|
|
|
b9a53a |
src/core/manager.h | 2 ++
|
|
|
b9a53a |
src/core/mount.c | 5 ++---
|
|
|
b9a53a |
src/core/service.c | 16 ++++++++--------
|
|
|
b9a53a |
src/core/socket.c | 7 +++----
|
|
|
b9a53a |
src/core/swap.c | 5 ++---
|
|
|
b9a53a |
src/core/unit.c | 8 +++++++-
|
|
|
b9a53a |
src/core/unit.h | 2 +-
|
|
|
b9a53a |
src/test/test-watch-pid.c | 12 ++++++------
|
|
|
b9a53a |
11 files changed, 43 insertions(+), 28 deletions(-)
|
|
|
b9a53a |
|
|
|
b9a53a |
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
|
|
|
b9a53a |
index b7ed07e65b..76eafdc082 100644
|
|
|
b9a53a |
--- a/src/core/cgroup.c
|
|
|
b9a53a |
+++ b/src/core/cgroup.c
|
|
|
b9a53a |
@@ -1926,7 +1926,7 @@ static int unit_watch_pids_in_path(Unit *u, const char *path) {
|
|
|
b9a53a |
pid_t pid;
|
|
|
b9a53a |
|
|
|
b9a53a |
while ((r = cg_read_pid(f, &pid)) > 0) {
|
|
|
b9a53a |
- r = unit_watch_pid(u, pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(u, pid, false);
|
|
|
b9a53a |
if (r < 0 && ret >= 0)
|
|
|
b9a53a |
ret = r;
|
|
|
b9a53a |
}
|
|
|
b9a53a |
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
|
|
|
b9a53a |
index 6725f62794..0bbf64fff1 100644
|
|
|
b9a53a |
--- a/src/core/dbus-scope.c
|
|
|
b9a53a |
+++ b/src/core/dbus-scope.c
|
|
|
b9a53a |
@@ -106,7 +106,7 @@ static int bus_scope_set_transient_property(
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
|
|
|
b9a53a |
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(s), pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(s), pid, false);
|
|
|
b9a53a |
if (r < 0 && r != -EEXIST)
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
}
|
|
|
b9a53a |
diff --git a/src/core/manager.c b/src/core/manager.c
|
|
|
b9a53a |
index c83e296cf3..0eae7d46fb 100644
|
|
|
b9a53a |
--- a/src/core/manager.c
|
|
|
b9a53a |
+++ b/src/core/manager.c
|
|
|
b9a53a |
@@ -2044,6 +2044,16 @@ void manager_clear_jobs(Manager *m) {
|
|
|
b9a53a |
job_finish_and_invalidate(j, JOB_CANCELED, false, false);
|
|
|
b9a53a |
}
|
|
|
b9a53a |
|
|
|
b9a53a |
+void manager_unwatch_pid(Manager *m, pid_t pid) {
|
|
|
b9a53a |
+ assert(m);
|
|
|
b9a53a |
+
|
|
|
b9a53a |
+ /* First let's drop the unit keyed as "pid". */
|
|
|
b9a53a |
+ (void) hashmap_remove(m->watch_pids, PID_TO_PTR(pid));
|
|
|
b9a53a |
+
|
|
|
b9a53a |
+ /* Then, let's also drop the array keyed by -pid. */
|
|
|
b9a53a |
+ free(hashmap_remove(m->watch_pids, PID_TO_PTR(-pid)));
|
|
|
b9a53a |
+}
|
|
|
b9a53a |
+
|
|
|
b9a53a |
static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
|
|
|
b9a53a |
Manager *m = userdata;
|
|
|
b9a53a |
Job *j;
|
|
|
b9a53a |
diff --git a/src/core/manager.h b/src/core/manager.h
|
|
|
b9a53a |
index c7f4d66ecd..fa47952d24 100644
|
|
|
b9a53a |
--- a/src/core/manager.h
|
|
|
b9a53a |
+++ b/src/core/manager.h
|
|
|
b9a53a |
@@ -406,6 +406,8 @@ int manager_get_dump_string(Manager *m, char **ret);
|
|
|
b9a53a |
|
|
|
b9a53a |
void manager_clear_jobs(Manager *m);
|
|
|
b9a53a |
|
|
|
b9a53a |
+void manager_unwatch_pid(Manager *m, pid_t pid);
|
|
|
b9a53a |
+
|
|
|
b9a53a |
unsigned manager_dispatch_load_queue(Manager *m);
|
|
|
b9a53a |
|
|
|
b9a53a |
int manager_environment_add(Manager *m, char **minus, char **plus);
|
|
|
b9a53a |
diff --git a/src/core/mount.c b/src/core/mount.c
|
|
|
b9a53a |
index 2ac04e3874..5878814b1b 100644
|
|
|
b9a53a |
--- a/src/core/mount.c
|
|
|
b9a53a |
+++ b/src/core/mount.c
|
|
|
b9a53a |
@@ -677,7 +677,7 @@ static int mount_coldplug(Unit *u) {
|
|
|
b9a53a |
pid_is_unwaited(m->control_pid) &&
|
|
|
b9a53a |
MOUNT_STATE_WITH_PROCESS(new_state)) {
|
|
|
b9a53a |
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(m), m->control_pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(m), m->control_pid, false);
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
|
|
|
b9a53a |
@@ -781,9 +781,8 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(m), pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(m), pid, true);
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
- /* FIXME: we need to do something here */
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
|
|
|
b9a53a |
*_pid = pid;
|
|
|
b9a53a |
diff --git a/src/core/service.c b/src/core/service.c
|
|
|
b9a53a |
index 614ba05d89..310838a5f6 100644
|
|
|
b9a53a |
--- a/src/core/service.c
|
|
|
b9a53a |
+++ b/src/core/service.c
|
|
|
b9a53a |
@@ -974,7 +974,7 @@ static int service_load_pid_file(Service *s, bool may_warn) {
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(s), pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(s), pid, false);
|
|
|
b9a53a |
if (r < 0) /* FIXME: we need to do something here */
|
|
|
b9a53a |
return log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" for service: %m", pid);
|
|
|
b9a53a |
|
|
|
b9a53a |
@@ -1004,7 +1004,7 @@ static void service_search_main_pid(Service *s) {
|
|
|
b9a53a |
if (service_set_main_pid(s, pid) < 0)
|
|
|
b9a53a |
return;
|
|
|
b9a53a |
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(s), pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(s), pid, false);
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
/* FIXME: we need to do something here */
|
|
|
b9a53a |
log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" from: %m", pid);
|
|
|
b9a53a |
@@ -1135,7 +1135,7 @@ static int service_coldplug(Unit *u) {
|
|
|
b9a53a |
SERVICE_RUNNING, SERVICE_RELOAD,
|
|
|
b9a53a |
SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
|
|
|
b9a53a |
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(s), s->main_pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(s), s->main_pid, false);
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
}
|
|
|
b9a53a |
@@ -1147,7 +1147,7 @@ static int service_coldplug(Unit *u) {
|
|
|
b9a53a |
SERVICE_RELOAD,
|
|
|
b9a53a |
SERVICE_STOP, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
|
|
|
b9a53a |
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(s), s->control_pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(s), s->control_pid, false);
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
}
|
|
|
b9a53a |
@@ -1545,8 +1545,8 @@ static int service_spawn(
|
|
|
b9a53a |
s->exec_fd_event_source = TAKE_PTR(exec_fd_source);
|
|
|
b9a53a |
s->exec_fd_hot = false;
|
|
|
b9a53a |
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(s), pid);
|
|
|
b9a53a |
- if (r < 0) /* FIXME: we need to do something here */
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(s), pid, true);
|
|
|
b9a53a |
+ if (r < 0)
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
|
|
|
b9a53a |
*_pid = pid;
|
|
|
b9a53a |
@@ -3643,7 +3643,7 @@ static void service_notify_message(
|
|
|
b9a53a |
}
|
|
|
b9a53a |
if (r > 0) {
|
|
|
b9a53a |
service_set_main_pid(s, new_main_pid);
|
|
|
b9a53a |
- unit_watch_pid(UNIT(s), new_main_pid);
|
|
|
b9a53a |
+ unit_watch_pid(UNIT(s), new_main_pid, false);
|
|
|
b9a53a |
notify_dbus = true;
|
|
|
b9a53a |
}
|
|
|
b9a53a |
}
|
|
|
b9a53a |
@@ -3858,7 +3858,7 @@ static void service_bus_name_owner_change(
|
|
|
b9a53a |
log_unit_debug(u, "D-Bus name %s is now owned by process " PID_FMT, name, pid);
|
|
|
b9a53a |
|
|
|
b9a53a |
service_set_main_pid(s, pid);
|
|
|
b9a53a |
- unit_watch_pid(UNIT(s), pid);
|
|
|
b9a53a |
+ unit_watch_pid(UNIT(s), pid, false);
|
|
|
b9a53a |
}
|
|
|
b9a53a |
}
|
|
|
b9a53a |
}
|
|
|
b9a53a |
diff --git a/src/core/socket.c b/src/core/socket.c
|
|
|
b9a53a |
index d488c64e91..b034549634 100644
|
|
|
b9a53a |
--- a/src/core/socket.c
|
|
|
b9a53a |
+++ b/src/core/socket.c
|
|
|
b9a53a |
@@ -1816,7 +1816,7 @@ static int socket_coldplug(Unit *u) {
|
|
|
b9a53a |
SOCKET_FINAL_SIGTERM,
|
|
|
b9a53a |
SOCKET_FINAL_SIGKILL)) {
|
|
|
b9a53a |
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(s), s->control_pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(s), s->control_pid, false);
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
|
|
|
b9a53a |
@@ -1902,9 +1902,8 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(s), pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(s), pid, true);
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
- /* FIXME: we need to do something here */
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
|
|
|
b9a53a |
*_pid = pid;
|
|
|
b9a53a |
@@ -1973,7 +1972,7 @@ static int socket_chown(Socket *s, pid_t *_pid) {
|
|
|
b9a53a |
_exit(EXIT_SUCCESS);
|
|
|
b9a53a |
}
|
|
|
b9a53a |
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(s), pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(s), pid, true);
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
goto fail;
|
|
|
b9a53a |
|
|
|
b9a53a |
diff --git a/src/core/swap.c b/src/core/swap.c
|
|
|
b9a53a |
index b644753a1c..e717dbb54a 100644
|
|
|
b9a53a |
--- a/src/core/swap.c
|
|
|
b9a53a |
+++ b/src/core/swap.c
|
|
|
b9a53a |
@@ -531,7 +531,7 @@ static int swap_coldplug(Unit *u) {
|
|
|
b9a53a |
pid_is_unwaited(s->control_pid) &&
|
|
|
b9a53a |
SWAP_STATE_WITH_PROCESS(new_state)) {
|
|
|
b9a53a |
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(s), s->control_pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(s), s->control_pid, false);
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
|
|
|
b9a53a |
@@ -636,9 +636,8 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
goto fail;
|
|
|
b9a53a |
|
|
|
b9a53a |
- r = unit_watch_pid(UNIT(s), pid);
|
|
|
b9a53a |
+ r = unit_watch_pid(UNIT(s), pid, true);
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
- /* FIXME: we need to do something here */
|
|
|
b9a53a |
goto fail;
|
|
|
b9a53a |
|
|
|
b9a53a |
*_pid = pid;
|
|
|
b9a53a |
diff --git a/src/core/unit.c b/src/core/unit.c
|
|
|
b9a53a |
index d298afb0d4..b0b1c77ef7 100644
|
|
|
b9a53a |
--- a/src/core/unit.c
|
|
|
b9a53a |
+++ b/src/core/unit.c
|
|
|
b9a53a |
@@ -2500,7 +2500,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
|
|
|
b9a53a |
unit_add_to_gc_queue(u);
|
|
|
b9a53a |
}
|
|
|
b9a53a |
|
|
|
b9a53a |
-int unit_watch_pid(Unit *u, pid_t pid) {
|
|
|
b9a53a |
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive) {
|
|
|
b9a53a |
int r;
|
|
|
b9a53a |
|
|
|
b9a53a |
assert(u);
|
|
|
b9a53a |
@@ -2508,6 +2508,12 @@ int unit_watch_pid(Unit *u, pid_t pid) {
|
|
|
b9a53a |
|
|
|
b9a53a |
/* Watch a specific PID */
|
|
|
b9a53a |
|
|
|
b9a53a |
+ /* Caller might be sure that this PID belongs to this unit only. Let's take this
|
|
|
b9a53a |
+ * opportunity to remove any stalled references to this PID as they can be created
|
|
|
b9a53a |
+ * easily (when watching a process which is not our direct child). */
|
|
|
b9a53a |
+ if (exclusive)
|
|
|
b9a53a |
+ manager_unwatch_pid(u->manager, pid);
|
|
|
b9a53a |
+
|
|
|
b9a53a |
r = set_ensure_allocated(&u->pids, NULL);
|
|
|
b9a53a |
if (r < 0)
|
|
|
b9a53a |
return r;
|
|
|
b9a53a |
diff --git a/src/core/unit.h b/src/core/unit.h
|
|
|
b9a53a |
index e1a60da244..68cc1869e4 100644
|
|
|
b9a53a |
--- a/src/core/unit.h
|
|
|
b9a53a |
+++ b/src/core/unit.h
|
|
|
b9a53a |
@@ -655,7 +655,7 @@ typedef enum UnitNotifyFlags {
|
|
|
b9a53a |
|
|
|
b9a53a |
void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlags flags);
|
|
|
b9a53a |
|
|
|
b9a53a |
-int unit_watch_pid(Unit *u, pid_t pid);
|
|
|
b9a53a |
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive);
|
|
|
b9a53a |
void unit_unwatch_pid(Unit *u, pid_t pid);
|
|
|
b9a53a |
void unit_unwatch_all_pids(Unit *u);
|
|
|
b9a53a |
|
|
|
b9a53a |
diff --git a/src/test/test-watch-pid.c b/src/test/test-watch-pid.c
|
|
|
b9a53a |
index cb43b35bc5..8c70175aed 100644
|
|
|
b9a53a |
--- a/src/test/test-watch-pid.c
|
|
|
b9a53a |
+++ b/src/test/test-watch-pid.c
|
|
|
b9a53a |
@@ -49,25 +49,25 @@ int main(int argc, char *argv[]) {
|
|
|
b9a53a |
assert_se(hashmap_isempty(m->watch_pids));
|
|
|
b9a53a |
assert_se(manager_get_unit_by_pid(m, 4711) == NULL);
|
|
|
b9a53a |
|
|
|
b9a53a |
- assert_se(unit_watch_pid(a, 4711) >= 0);
|
|
|
b9a53a |
+ assert_se(unit_watch_pid(a, 4711, false) >= 0);
|
|
|
b9a53a |
assert_se(manager_get_unit_by_pid(m, 4711) == a);
|
|
|
b9a53a |
|
|
|
b9a53a |
- assert_se(unit_watch_pid(a, 4711) >= 0);
|
|
|
b9a53a |
+ assert_se(unit_watch_pid(a, 4711, false) >= 0);
|
|
|
b9a53a |
assert_se(manager_get_unit_by_pid(m, 4711) == a);
|
|
|
b9a53a |
|
|
|
b9a53a |
- assert_se(unit_watch_pid(b, 4711) >= 0);
|
|
|
b9a53a |
+ assert_se(unit_watch_pid(b, 4711, false) >= 0);
|
|
|
b9a53a |
u = manager_get_unit_by_pid(m, 4711);
|
|
|
b9a53a |
assert_se(u == a || u == b);
|
|
|
b9a53a |
|
|
|
b9a53a |
- assert_se(unit_watch_pid(b, 4711) >= 0);
|
|
|
b9a53a |
+ assert_se(unit_watch_pid(b, 4711, false) >= 0);
|
|
|
b9a53a |
u = manager_get_unit_by_pid(m, 4711);
|
|
|
b9a53a |
assert_se(u == a || u == b);
|
|
|
b9a53a |
|
|
|
b9a53a |
- assert_se(unit_watch_pid(c, 4711) >= 0);
|
|
|
b9a53a |
+ assert_se(unit_watch_pid(c, 4711, false) >= 0);
|
|
|
b9a53a |
u = manager_get_unit_by_pid(m, 4711);
|
|
|
b9a53a |
assert_se(u == a || u == b || u == c);
|
|
|
b9a53a |
|
|
|
b9a53a |
- assert_se(unit_watch_pid(c, 4711) >= 0);
|
|
|
b9a53a |
+ assert_se(unit_watch_pid(c, 4711, false) >= 0);
|
|
|
b9a53a |
u = manager_get_unit_by_pid(m, 4711);
|
|
|
b9a53a |
assert_se(u == a || u == b || u == c);
|
|
|
b9a53a |
|