|
|
984f77 |
From 5b5571de21d1ddf9a00511a6b2f25d630a903f05 Mon Sep 17 00:00:00 2001
|
|
|
984f77 |
From: Michal Sekletar <msekleta@redhat.com>
|
|
|
984f77 |
Date: Wed, 1 Jun 2022 10:15:06 +0200
|
|
|
984f77 |
Subject: [PATCH] scope: allow unprivileged delegation on scopes
|
|
|
984f77 |
|
|
|
984f77 |
Previously it was possible to set delegate property for scope, but you
|
|
|
984f77 |
were not able to allow unprivileged process to manage the scope's cgroup
|
|
|
984f77 |
hierarchy. This is useful when launching manager process that will run
|
|
|
984f77 |
unprivileged but is supposed to manage its own (scope) sub-hierarchy.
|
|
|
984f77 |
|
|
|
984f77 |
Fixes #21683
|
|
|
984f77 |
|
|
|
984f77 |
(cherry picked from commit 03860190fefce8bbea3a6f0e77919b882ade517c)
|
|
|
984f77 |
|
|
|
984f77 |
Resolves: #2068575
|
|
|
984f77 |
---
|
|
|
984f77 |
src/basic/unit-def.c | 1 +
|
|
|
984f77 |
src/basic/unit-def.h | 1 +
|
|
|
984f77 |
src/core/dbus-scope.c | 6 ++
|
|
|
984f77 |
src/core/scope.c | 135 +++++++++++++++++++++++++----
|
|
|
984f77 |
src/core/scope.h | 3 +
|
|
|
984f77 |
src/shared/bus-unit-util.c | 5 ++
|
|
|
984f77 |
test/TEST-19-DELEGATE/testsuite.sh | 13 +++
|
|
|
984f77 |
7 files changed, 145 insertions(+), 19 deletions(-)
|
|
|
984f77 |
|
|
|
984f77 |
diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c
|
|
|
984f77 |
index e79cc73dd3..16c4d38d41 100644
|
|
|
984f77 |
--- a/src/basic/unit-def.c
|
|
|
984f77 |
+++ b/src/basic/unit-def.c
|
|
|
984f77 |
@@ -160,6 +160,7 @@ DEFINE_STRING_TABLE_LOOKUP(path_state, PathState);
|
|
|
984f77 |
|
|
|
984f77 |
static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
|
|
|
984f77 |
[SCOPE_DEAD] = "dead",
|
|
|
984f77 |
+ [SCOPE_START_CHOWN] = "start-chown",
|
|
|
984f77 |
[SCOPE_RUNNING] = "running",
|
|
|
984f77 |
[SCOPE_ABANDONED] = "abandoned",
|
|
|
984f77 |
[SCOPE_STOP_SIGTERM] = "stop-sigterm",
|
|
|
984f77 |
diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h
|
|
|
984f77 |
index 8eea379a6d..03d151ec19 100644
|
|
|
984f77 |
--- a/src/basic/unit-def.h
|
|
|
984f77 |
+++ b/src/basic/unit-def.h
|
|
|
984f77 |
@@ -99,6 +99,7 @@ typedef enum PathState {
|
|
|
984f77 |
|
|
|
984f77 |
typedef enum ScopeState {
|
|
|
984f77 |
SCOPE_DEAD,
|
|
|
984f77 |
+ SCOPE_START_CHOWN,
|
|
|
984f77 |
SCOPE_RUNNING,
|
|
|
984f77 |
SCOPE_ABANDONED,
|
|
|
984f77 |
SCOPE_STOP_SIGTERM,
|
|
|
984f77 |
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
|
|
|
984f77 |
index 0bbf64fff1..534302d188 100644
|
|
|
984f77 |
--- a/src/core/dbus-scope.c
|
|
|
984f77 |
+++ b/src/core/dbus-scope.c
|
|
|
984f77 |
@@ -178,6 +178,12 @@ int bus_scope_set_property(
|
|
|
984f77 |
r = bus_kill_context_set_transient_property(u, &s->kill_context, name, message, flags, error);
|
|
|
984f77 |
if (r != 0)
|
|
|
984f77 |
return r;
|
|
|
984f77 |
+
|
|
|
984f77 |
+ if (streq(name, "User"))
|
|
|
984f77 |
+ return bus_set_transient_user_relaxed(u, name, &s->user, message, flags, error);
|
|
|
984f77 |
+
|
|
|
984f77 |
+ if (streq(name, "Group"))
|
|
|
984f77 |
+ return bus_set_transient_user_relaxed(u, name, &s->group, message, flags, error);
|
|
|
984f77 |
}
|
|
|
984f77 |
|
|
|
984f77 |
return 0;
|
|
|
984f77 |
diff --git a/src/core/scope.c b/src/core/scope.c
|
|
|
984f77 |
index 5a595c65a6..9cc5f89099 100644
|
|
|
984f77 |
--- a/src/core/scope.c
|
|
|
984f77 |
+++ b/src/core/scope.c
|
|
|
984f77 |
@@ -5,6 +5,8 @@
|
|
|
984f77 |
|
|
|
984f77 |
#include "alloc-util.h"
|
|
|
984f77 |
#include "dbus-scope.h"
|
|
|
984f77 |
+#include "dbus-unit.h"
|
|
|
984f77 |
+#include "exit-status.h"
|
|
|
984f77 |
#include "load-dropin.h"
|
|
|
984f77 |
#include "log.h"
|
|
|
984f77 |
#include "scope.h"
|
|
|
984f77 |
@@ -14,9 +16,11 @@
|
|
|
984f77 |
#include "strv.h"
|
|
|
984f77 |
#include "unit-name.h"
|
|
|
984f77 |
#include "unit.h"
|
|
|
984f77 |
+#include "user-util.h"
|
|
|
984f77 |
|
|
|
984f77 |
static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = {
|
|
|
984f77 |
[SCOPE_DEAD] = UNIT_INACTIVE,
|
|
|
984f77 |
+ [SCOPE_START_CHOWN] = UNIT_ACTIVATING,
|
|
|
984f77 |
[SCOPE_RUNNING] = UNIT_ACTIVE,
|
|
|
984f77 |
[SCOPE_ABANDONED] = UNIT_ACTIVE,
|
|
|
984f77 |
[SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING,
|
|
|
984f77 |
@@ -34,6 +38,7 @@ static void scope_init(Unit *u) {
|
|
|
984f77 |
|
|
|
984f77 |
s->timeout_stop_usec = u->manager->default_timeout_stop_usec;
|
|
|
984f77 |
u->ignore_on_isolate = true;
|
|
|
984f77 |
+ s->user = s->group = NULL;
|
|
|
984f77 |
}
|
|
|
984f77 |
|
|
|
984f77 |
static void scope_done(Unit *u) {
|
|
|
984f77 |
@@ -45,6 +50,9 @@ static void scope_done(Unit *u) {
|
|
|
984f77 |
s->controller_track = sd_bus_track_unref(s->controller_track);
|
|
|
984f77 |
|
|
|
984f77 |
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
|
|
|
984f77 |
+
|
|
|
984f77 |
+ s->user = mfree(s->user);
|
|
|
984f77 |
+ s->group = mfree(s->group);
|
|
|
984f77 |
}
|
|
|
984f77 |
|
|
|
984f77 |
static int scope_arm_timer(Scope *s, usec_t usec) {
|
|
|
984f77 |
@@ -84,7 +92,7 @@ static void scope_set_state(Scope *s, ScopeState state) {
|
|
|
984f77 |
old_state = s->state;
|
|
|
984f77 |
s->state = state;
|
|
|
984f77 |
|
|
|
984f77 |
- if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
|
|
|
984f77 |
+ if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL, SCOPE_START_CHOWN))
|
|
|
984f77 |
s->timer_event_source = sd_event_source_unref(s->timer_event_source);
|
|
|
984f77 |
|
|
|
984f77 |
if (IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) {
|
|
|
984f77 |
@@ -301,26 +309,72 @@ fail:
|
|
|
984f77 |
scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
|
|
|
984f77 |
}
|
|
|
984f77 |
|
|
|
984f77 |
-static int scope_start(Unit *u) {
|
|
|
984f77 |
- Scope *s = SCOPE(u);
|
|
|
984f77 |
+static int scope_enter_start_chown(Scope *s) {
|
|
|
984f77 |
+ Unit *u = UNIT(s);
|
|
|
984f77 |
+ pid_t pid;
|
|
|
984f77 |
int r;
|
|
|
984f77 |
|
|
|
984f77 |
assert(s);
|
|
|
984f77 |
+ assert(s->user);
|
|
|
984f77 |
|
|
|
984f77 |
- if (unit_has_name(u, SPECIAL_INIT_SCOPE))
|
|
|
984f77 |
- return -EPERM;
|
|
|
984f77 |
+ r = scope_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), u->manager->default_timeout_start_usec));
|
|
|
984f77 |
+ if (r < 0)
|
|
|
984f77 |
+ return r;
|
|
|
984f77 |
|
|
|
984f77 |
- if (s->state == SCOPE_FAILED)
|
|
|
984f77 |
- return -EPERM;
|
|
|
984f77 |
+ r = unit_fork_helper_process(u, "(sd-chown-cgroup)", &pid;;
|
|
|
984f77 |
+ if (r < 0)
|
|
|
984f77 |
+ goto fail;
|
|
|
984f77 |
|
|
|
984f77 |
- /* We can't fulfill this right now, please try again later */
|
|
|
984f77 |
- if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
|
|
|
984f77 |
- return -EAGAIN;
|
|
|
984f77 |
+ if (r == 0) {
|
|
|
984f77 |
+ uid_t uid = UID_INVALID;
|
|
|
984f77 |
+ gid_t gid = GID_INVALID;
|
|
|
984f77 |
|
|
|
984f77 |
- assert(s->state == SCOPE_DEAD);
|
|
|
984f77 |
+ if (!isempty(s->user)) {
|
|
|
984f77 |
+ const char *user = s->user;
|
|
|
984f77 |
|
|
|
984f77 |
- if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
|
|
|
984f77 |
- return -ENOENT;
|
|
|
984f77 |
+ r = get_user_creds(&user, &uid, &gid, NULL, NULL);
|
|
|
984f77 |
+ if (r < 0) {
|
|
|
984f77 |
+ log_unit_error_errno(UNIT(s), r, "Failed to resolve user \"%s\": %m", user);
|
|
|
984f77 |
+ _exit(EXIT_USER);
|
|
|
984f77 |
+ }
|
|
|
984f77 |
+ }
|
|
|
984f77 |
+
|
|
|
984f77 |
+ if (!isempty(s->group)) {
|
|
|
984f77 |
+ const char *group = s->group;
|
|
|
984f77 |
+
|
|
|
984f77 |
+ r = get_group_creds(&group, &gid;;
|
|
|
984f77 |
+ if (r < 0) {
|
|
|
984f77 |
+ log_unit_error_errno(UNIT(s), r, "Failed to resolve group \"%s\": %m", group);
|
|
|
984f77 |
+ _exit(EXIT_GROUP);
|
|
|
984f77 |
+ }
|
|
|
984f77 |
+ }
|
|
|
984f77 |
+
|
|
|
984f77 |
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, uid, gid);
|
|
|
984f77 |
+ if (r < 0) {
|
|
|
984f77 |
+ log_unit_error_errno(UNIT(s), r, "Failed to adjust control group access: %m");
|
|
|
984f77 |
+ _exit(EXIT_CGROUP);
|
|
|
984f77 |
+ }
|
|
|
984f77 |
+
|
|
|
984f77 |
+ _exit(EXIT_SUCCESS);
|
|
|
984f77 |
+ }
|
|
|
984f77 |
+
|
|
|
984f77 |
+ r = unit_watch_pid(UNIT(s), pid, true);
|
|
|
984f77 |
+ if (r < 0)
|
|
|
984f77 |
+ goto fail;
|
|
|
984f77 |
+
|
|
|
984f77 |
+ scope_set_state(s, SCOPE_START_CHOWN);
|
|
|
984f77 |
+
|
|
|
984f77 |
+ return 1;
|
|
|
984f77 |
+fail:
|
|
|
984f77 |
+ s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
|
|
|
984f77 |
+ return r;
|
|
|
984f77 |
+}
|
|
|
984f77 |
+
|
|
|
984f77 |
+static int scope_enter_running(Scope *s) {
|
|
|
984f77 |
+ Unit *u = UNIT(s);
|
|
|
984f77 |
+ int r;
|
|
|
984f77 |
+
|
|
|
984f77 |
+ assert(s);
|
|
|
984f77 |
|
|
|
984f77 |
(void) bus_scope_track_controller(s);
|
|
|
984f77 |
|
|
|
984f77 |
@@ -328,11 +382,7 @@ static int scope_start(Unit *u) {
|
|
|
984f77 |
if (r < 0)
|
|
|
984f77 |
return r;
|
|
|
984f77 |
|
|
|
984f77 |
- (void) unit_realize_cgroup(u);
|
|
|
984f77 |
- (void) unit_reset_cpu_accounting(u);
|
|
|
984f77 |
- (void) unit_reset_ip_accounting(u);
|
|
|
984f77 |
-
|
|
|
984f77 |
- unit_export_state_files(UNIT(s));
|
|
|
984f77 |
+ unit_export_state_files(u);
|
|
|
984f77 |
|
|
|
984f77 |
r = unit_attach_pids_to_cgroup(u, UNIT(s)->pids, NULL);
|
|
|
984f77 |
if (r < 0) {
|
|
|
984f77 |
@@ -350,6 +400,38 @@ static int scope_start(Unit *u) {
|
|
|
984f77 |
return 1;
|
|
|
984f77 |
}
|
|
|
984f77 |
|
|
|
984f77 |
+static int scope_start(Unit *u) {
|
|
|
984f77 |
+ Scope *s = SCOPE(u);
|
|
|
984f77 |
+
|
|
|
984f77 |
+ assert(s);
|
|
|
984f77 |
+
|
|
|
984f77 |
+ if (unit_has_name(u, SPECIAL_INIT_SCOPE))
|
|
|
984f77 |
+ return -EPERM;
|
|
|
984f77 |
+
|
|
|
984f77 |
+ if (s->state == SCOPE_FAILED)
|
|
|
984f77 |
+ return -EPERM;
|
|
|
984f77 |
+
|
|
|
984f77 |
+ /* We can't fulfill this right now, please try again later */
|
|
|
984f77 |
+ if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
|
|
|
984f77 |
+ return -EAGAIN;
|
|
|
984f77 |
+
|
|
|
984f77 |
+ assert(s->state == SCOPE_DEAD);
|
|
|
984f77 |
+
|
|
|
984f77 |
+ if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
|
|
|
984f77 |
+ return -ENOENT;
|
|
|
984f77 |
+
|
|
|
984f77 |
+ (void) unit_realize_cgroup(u);
|
|
|
984f77 |
+ (void) unit_reset_cpu_accounting(u);
|
|
|
984f77 |
+ (void) unit_reset_ip_accounting(u);
|
|
|
984f77 |
+
|
|
|
984f77 |
+ /* We check only for User= option to keep behavior consistent with logic for service units,
|
|
|
984f77 |
+ * i.e. having 'Delegate=true Group=foo' w/o specifing User= has no effect. */
|
|
|
984f77 |
+ if (s->user && unit_cgroup_delegate(u))
|
|
|
984f77 |
+ return scope_enter_start_chown(s);
|
|
|
984f77 |
+
|
|
|
984f77 |
+ return scope_enter_running(s);
|
|
|
984f77 |
+}
|
|
|
984f77 |
+
|
|
|
984f77 |
static int scope_stop(Unit *u) {
|
|
|
984f77 |
Scope *s = SCOPE(u);
|
|
|
984f77 |
|
|
|
984f77 |
@@ -462,7 +544,17 @@ static void scope_notify_cgroup_empty_event(Unit *u) {
|
|
|
984f77 |
}
|
|
|
984f77 |
|
|
|
984f77 |
static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
|
|
|
984f77 |
- assert(u);
|
|
|
984f77 |
+ Scope *s = SCOPE(u);
|
|
|
984f77 |
+
|
|
|
984f77 |
+ assert(s);
|
|
|
984f77 |
+
|
|
|
984f77 |
+ if (s->state == SCOPE_START_CHOWN) {
|
|
|
984f77 |
+ if (!is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
|
|
|
984f77 |
+ scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
|
|
|
984f77 |
+ else
|
|
|
984f77 |
+ scope_enter_running(s);
|
|
|
984f77 |
+ return;
|
|
|
984f77 |
+ }
|
|
|
984f77 |
|
|
|
984f77 |
/* If we get a SIGCHLD event for one of the processes we were interested in, then we look for others to
|
|
|
984f77 |
* watch, under the assumption that we'll sooner or later get a SIGCHLD for them, as the original
|
|
|
984f77 |
@@ -495,6 +587,11 @@ static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *user
|
|
|
984f77 |
scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
|
|
|
984f77 |
break;
|
|
|
984f77 |
|
|
|
984f77 |
+ case SCOPE_START_CHOWN:
|
|
|
984f77 |
+ log_unit_warning(UNIT(s), "User lookup timed out. Entering failed state.");
|
|
|
984f77 |
+ scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
|
|
|
984f77 |
+ break;
|
|
|
984f77 |
+
|
|
|
984f77 |
default:
|
|
|
984f77 |
assert_not_reached("Timeout at wrong time.");
|
|
|
984f77 |
}
|
|
|
984f77 |
diff --git a/src/core/scope.h b/src/core/scope.h
|
|
|
984f77 |
index c38afb5e5d..7bed3eed9e 100644
|
|
|
984f77 |
--- a/src/core/scope.h
|
|
|
984f77 |
+++ b/src/core/scope.h
|
|
|
984f77 |
@@ -32,6 +32,9 @@ struct Scope {
|
|
|
984f77 |
bool was_abandoned;
|
|
|
984f77 |
|
|
|
984f77 |
sd_event_source *timer_event_source;
|
|
|
984f77 |
+
|
|
|
984f77 |
+ char *user;
|
|
|
984f77 |
+ char *group;
|
|
|
984f77 |
};
|
|
|
984f77 |
|
|
|
984f77 |
extern const UnitVTable scope_vtable;
|
|
|
984f77 |
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
|
|
|
984f77 |
index 3910dfa812..c475bbafe0 100644
|
|
|
984f77 |
--- a/src/shared/bus-unit-util.c
|
|
|
984f77 |
+++ b/src/shared/bus-unit-util.c
|
|
|
984f77 |
@@ -1615,6 +1615,11 @@ static int bus_append_unit_property(sd_bus_message *m, const char *field, const
|
|
|
984f77 |
|
|
|
984f77 |
return bus_append_parse_sec_rename(m, field, eq);
|
|
|
984f77 |
|
|
|
984f77 |
+ /* Scope units don't have execution context but we still want to allow setting these two,
|
|
|
984f77 |
+ * so let's handle them separately. */
|
|
|
984f77 |
+ if (STR_IN_SET(field, "User", "Group"))
|
|
|
984f77 |
+ return bus_append_string(m, field, eq);
|
|
|
984f77 |
+
|
|
|
984f77 |
if (streq(field, "StartLimitBurst"))
|
|
|
984f77 |
|
|
|
984f77 |
return bus_append_safe_atou(m, field, eq);
|
|
|
984f77 |
diff --git a/test/TEST-19-DELEGATE/testsuite.sh b/test/TEST-19-DELEGATE/testsuite.sh
|
|
|
984f77 |
index c738bea10e..c4c948cc11 100755
|
|
|
984f77 |
--- a/test/TEST-19-DELEGATE/testsuite.sh
|
|
|
984f77 |
+++ b/test/TEST-19-DELEGATE/testsuite.sh
|
|
|
984f77 |
@@ -4,6 +4,16 @@
|
|
|
984f77 |
set -ex
|
|
|
984f77 |
set -o pipefail
|
|
|
984f77 |
|
|
|
984f77 |
+test_scope_unpriv_delegation() {
|
|
|
984f77 |
+ useradd test ||:
|
|
|
984f77 |
+ trap "userdel -r test" RETURN
|
|
|
984f77 |
+
|
|
|
984f77 |
+ systemd-run --uid=test -p User=test -p Delegate=yes --slice workload.slice --unit workload0.scope --scope \
|
|
|
984f77 |
+ test -w /sys/fs/cgroup/workload.slice/workload0.scope -a \
|
|
|
984f77 |
+ -w /sys/fs/cgroup/workload.slice/workload0.scope/cgroup.procs -a \
|
|
|
984f77 |
+ -w /sys/fs/cgroup/workload.slice/workload0.scope/cgroup.subtree_control
|
|
|
984f77 |
+}
|
|
|
984f77 |
+
|
|
|
984f77 |
if grep -q cgroup2 /proc/filesystems ; then
|
|
|
984f77 |
systemd-run --wait --unit=test0.service -p "DynamicUser=1" -p "Delegate=" \
|
|
|
984f77 |
test -w /sys/fs/cgroup/system.slice/test0.service/ -a \
|
|
|
984f77 |
@@ -15,6 +25,9 @@ if grep -q cgroup2 /proc/filesystems ; then
|
|
|
984f77 |
|
|
|
984f77 |
systemd-run --wait --unit=test2.service -p "DynamicUser=1" -p "Delegate=memory pids" \
|
|
|
984f77 |
grep pids /sys/fs/cgroup/system.slice/test2.service/cgroup.controllers
|
|
|
984f77 |
+
|
|
|
984f77 |
+ # Check that unprivileged delegation works for scopes
|
|
|
984f77 |
+ test_scope_unpriv_delegation
|
|
|
984f77 |
else
|
|
|
984f77 |
echo "Skipping TEST-19-DELEGATE, as the kernel doesn't actually support cgroupsv2" >&2
|
|
|
984f77 |
fi
|