|
|
d0811f |
From b554f941a8f275124508794b0b83f0554c7b84dc Mon Sep 17 00:00:00 2001
|
|
|
d0811f |
From: Anita Zhang <the.anitazha@gmail.com>
|
|
|
d0811f |
Date: Thu, 22 Oct 2020 22:44:22 -0700
|
|
|
d0811f |
Subject: [PATCH 2/3] core: clean up inactive/failed {service|scope}'s cgroups
|
|
|
d0811f |
when the last process exits
|
|
|
d0811f |
|
|
|
d0811f |
If processes remain in the unit's cgroup after the final SIGKILL is
|
|
|
d0811f |
sent and the unit has exceeded stop timeout, don't release the unit's
|
|
|
d0811f |
cgroup information. Pid1 will have failed to `rmdir` the cgroup path due
|
|
|
d0811f |
to processes remaining in the cgroup and releasing would leave the cgroup
|
|
|
d0811f |
path on the file system with no tracking for pid1 to clean it up.
|
|
|
d0811f |
|
|
|
d0811f |
Instead, keep the information around until the last process exits and pid1
|
|
|
d0811f |
sends the cgroup empty notification. The service/scope can then prune
|
|
|
d0811f |
the cgroup if the unit is inactive/failed.
|
|
|
d0811f |
---
|
|
|
d0811f |
src/core/cgroup.c | 26 +++++++++++++++++++++++++-
|
|
|
d0811f |
src/core/cgroup.h | 6 +++++-
|
|
|
d0811f |
src/core/scope.c | 5 +++++
|
|
|
d0811f |
src/core/service.c | 7 +++++++
|
|
|
d0811f |
4 files changed, 42 insertions(+), 2 deletions(-)
|
|
|
d0811f |
|
|
|
d0811f |
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
|
|
|
d0811f |
index 031b28a684..bce5f44e78 100644
|
|
|
d0811f |
--- a/src/core/cgroup.c
|
|
|
d0811f |
+++ b/src/core/cgroup.c
|
|
|
d0811f |
@@ -2414,6 +2414,29 @@ void unit_release_cgroup(Unit *u) {
|
|
|
d0811f |
}
|
|
|
d0811f |
}
|
|
|
d0811f |
|
|
|
d0811f |
+bool unit_maybe_release_cgroup(Unit *u) {
|
|
|
d0811f |
+ int r;
|
|
|
d0811f |
+
|
|
|
d0811f |
+ assert(u);
|
|
|
d0811f |
+
|
|
|
d0811f |
+ if (!u->cgroup_path)
|
|
|
d0811f |
+ return true;
|
|
|
d0811f |
+
|
|
|
d0811f |
+ /* Don't release the cgroup if there are still processes under it. If we get notified later when all the
|
|
|
d0811f |
+ * processes exit (e.g. the processes were in D-state and exited after the unit was marked as failed)
|
|
|
d0811f |
+ * we need the cgroup paths to continue to be tracked by the manager so they can be looked up and cleaned
|
|
|
d0811f |
+ * up later. */
|
|
|
d0811f |
+ r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
|
|
|
d0811f |
+ if (r < 0)
|
|
|
d0811f |
+ log_unit_debug_errno(u, r, "Error checking if the cgroup is recursively empty, ignoring: %m");
|
|
|
d0811f |
+ else if (r == 1) {
|
|
|
d0811f |
+ unit_release_cgroup(u);
|
|
|
d0811f |
+ return true;
|
|
|
d0811f |
+ }
|
|
|
d0811f |
+
|
|
|
d0811f |
+ return false;
|
|
|
d0811f |
+}
|
|
|
d0811f |
+
|
|
|
d0811f |
void unit_prune_cgroup(Unit *u) {
|
|
|
d0811f |
int r;
|
|
|
d0811f |
bool is_root_slice;
|
|
|
d0811f |
@@ -2441,7 +2464,8 @@ void unit_prune_cgroup(Unit *u) {
|
|
|
d0811f |
if (is_root_slice)
|
|
|
d0811f |
return;
|
|
|
d0811f |
|
|
|
d0811f |
- unit_release_cgroup(u);
|
|
|
d0811f |
+ if (!unit_maybe_release_cgroup(u)) /* Returns true if the cgroup was released */
|
|
|
d0811f |
+ return;
|
|
|
d0811f |
|
|
|
d0811f |
u->cgroup_realized = false;
|
|
|
d0811f |
u->cgroup_realized_mask = 0;
|
|
|
d0811f |
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
|
|
|
d0811f |
index 52d028e740..be6856c20c 100644
|
|
|
d0811f |
--- a/src/core/cgroup.h
|
|
|
d0811f |
+++ b/src/core/cgroup.h
|
|
|
d0811f |
@@ -220,11 +220,15 @@ int unit_set_cgroup_path(Unit *u, const char *path);
|
|
|
d0811f |
int unit_pick_cgroup_path(Unit *u);
|
|
|
d0811f |
|
|
|
d0811f |
int unit_realize_cgroup(Unit *u);
|
|
|
d0811f |
-void unit_release_cgroup(Unit *u);
|
|
|
d0811f |
void unit_prune_cgroup(Unit *u);
|
|
|
d0811f |
int unit_watch_cgroup(Unit *u);
|
|
|
d0811f |
int unit_watch_cgroup_memory(Unit *u);
|
|
|
d0811f |
|
|
|
d0811f |
+void unit_release_cgroup(Unit *u);
|
|
|
d0811f |
+/* Releases the cgroup only if it is recursively empty.
|
|
|
d0811f |
+ * Returns true if the cgroup was released, false otherwise. */
|
|
|
d0811f |
+bool unit_maybe_release_cgroup(Unit *u);
|
|
|
d0811f |
+
|
|
|
d0811f |
void unit_add_to_cgroup_empty_queue(Unit *u);
|
|
|
d0811f |
int unit_check_oom(Unit *u);
|
|
|
d0811f |
|
|
|
d0811f |
diff --git a/src/core/scope.c b/src/core/scope.c
|
|
|
d0811f |
index 42c51b0865..ffee783a4c 100644
|
|
|
d0811f |
--- a/src/core/scope.c
|
|
|
d0811f |
+++ b/src/core/scope.c
|
|
|
d0811f |
@@ -487,6 +487,11 @@ static void scope_notify_cgroup_empty_event(Unit *u) {
|
|
|
d0811f |
|
|
|
d0811f |
if (IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
|
|
|
d0811f |
scope_enter_dead(s, SCOPE_SUCCESS);
|
|
|
d0811f |
+
|
|
|
d0811f |
+ /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean
|
|
|
d0811f |
+ * up the cgroup earlier and should do it now. */
|
|
|
d0811f |
+ if (IN_SET(s->state, SCOPE_DEAD, SCOPE_FAILED))
|
|
|
d0811f |
+ unit_prune_cgroup(u);
|
|
|
d0811f |
}
|
|
|
d0811f |
|
|
|
d0811f |
static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
|
|
|
d0811f |
diff --git a/src/core/service.c b/src/core/service.c
|
|
|
d0811f |
index 00e61945ba..db8f596ca6 100644
|
|
|
d0811f |
--- a/src/core/service.c
|
|
|
d0811f |
+++ b/src/core/service.c
|
|
|
d0811f |
@@ -3334,6 +3334,13 @@ static void service_notify_cgroup_empty_event(Unit *u) {
|
|
|
d0811f |
|
|
|
d0811f |
break;
|
|
|
d0811f |
|
|
|
d0811f |
+ /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean
|
|
|
d0811f |
+ * up the cgroup earlier and should do it now. */
|
|
|
d0811f |
+ case SERVICE_DEAD:
|
|
|
d0811f |
+ case SERVICE_FAILED:
|
|
|
d0811f |
+ unit_prune_cgroup(u);
|
|
|
d0811f |
+ break;
|
|
|
d0811f |
+
|
|
|
d0811f |
default:
|
|
|
d0811f |
;
|
|
|
d0811f |
}
|
|
|
d0811f |
--
|
|
|
d0811f |
2.24.1
|
|
|
d0811f |
|