Blob Blame History Raw
From 5b5571de21d1ddf9a00511a6b2f25d630a903f05 Mon Sep 17 00:00:00 2001
From: Michal Sekletar <msekleta@redhat.com>
Date: Wed, 1 Jun 2022 10:15:06 +0200
Subject: [PATCH] scope: allow unprivileged delegation on scopes

Previously it was possible to set delegate property for scope, but you
were not able to allow unprivileged process to manage the scope's cgroup
hierarchy. This is useful when launching manager process that  will run
unprivileged but is supposed to manage its own (scope) sub-hierarchy.

Fixes #21683

(cherry picked from commit 03860190fefce8bbea3a6f0e77919b882ade517c)

Resolves: #2068575
---
 src/basic/unit-def.c               |   1 +
 src/basic/unit-def.h               |   1 +
 src/core/dbus-scope.c              |   6 ++
 src/core/scope.c                   | 135 +++++++++++++++++++++++++----
 src/core/scope.h                   |   3 +
 src/shared/bus-unit-util.c         |   5 ++
 test/TEST-19-DELEGATE/testsuite.sh |  13 +++
 7 files changed, 145 insertions(+), 19 deletions(-)

diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c
index e79cc73dd3..16c4d38d41 100644
--- a/src/basic/unit-def.c
+++ b/src/basic/unit-def.c
@@ -160,6 +160,7 @@ DEFINE_STRING_TABLE_LOOKUP(path_state, PathState);
 
 static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
         [SCOPE_DEAD] = "dead",
+        [SCOPE_START_CHOWN] = "start-chown",
         [SCOPE_RUNNING] = "running",
         [SCOPE_ABANDONED] = "abandoned",
         [SCOPE_STOP_SIGTERM] = "stop-sigterm",
diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h
index 8eea379a6d..03d151ec19 100644
--- a/src/basic/unit-def.h
+++ b/src/basic/unit-def.h
@@ -99,6 +99,7 @@ typedef enum PathState {
 
 typedef enum ScopeState {
         SCOPE_DEAD,
+        SCOPE_START_CHOWN,
         SCOPE_RUNNING,
         SCOPE_ABANDONED,
         SCOPE_STOP_SIGTERM,
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
index 0bbf64fff1..534302d188 100644
--- a/src/core/dbus-scope.c
+++ b/src/core/dbus-scope.c
@@ -178,6 +178,12 @@ int bus_scope_set_property(
                 r = bus_kill_context_set_transient_property(u, &s->kill_context, name, message, flags, error);
                 if (r != 0)
                         return r;
+
+                if (streq(name, "User"))
+                        return bus_set_transient_user_relaxed(u, name, &s->user, message, flags, error);
+
+                if (streq(name, "Group"))
+                        return bus_set_transient_user_relaxed(u, name, &s->group, message, flags, error);
         }
 
         return 0;
diff --git a/src/core/scope.c b/src/core/scope.c
index 5a595c65a6..9cc5f89099 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -5,6 +5,8 @@
 
 #include "alloc-util.h"
 #include "dbus-scope.h"
+#include "dbus-unit.h"
+#include "exit-status.h"
 #include "load-dropin.h"
 #include "log.h"
 #include "scope.h"
@@ -14,9 +16,11 @@
 #include "strv.h"
 #include "unit-name.h"
 #include "unit.h"
+#include "user-util.h"
 
 static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = {
         [SCOPE_DEAD] = UNIT_INACTIVE,
+        [SCOPE_START_CHOWN] = UNIT_ACTIVATING,
         [SCOPE_RUNNING] = UNIT_ACTIVE,
         [SCOPE_ABANDONED] = UNIT_ACTIVE,
         [SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING,
@@ -34,6 +38,7 @@ static void scope_init(Unit *u) {
 
         s->timeout_stop_usec = u->manager->default_timeout_stop_usec;
         u->ignore_on_isolate = true;
+        s->user = s->group = NULL;
 }
 
 static void scope_done(Unit *u) {
@@ -45,6 +50,9 @@ static void scope_done(Unit *u) {
         s->controller_track = sd_bus_track_unref(s->controller_track);
 
         s->timer_event_source = sd_event_source_unref(s->timer_event_source);
+
+        s->user = mfree(s->user);
+        s->group = mfree(s->group);
 }
 
 static int scope_arm_timer(Scope *s, usec_t usec) {
@@ -84,7 +92,7 @@ static void scope_set_state(Scope *s, ScopeState state) {
         old_state = s->state;
         s->state = state;
 
-        if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+        if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL, SCOPE_START_CHOWN))
                 s->timer_event_source = sd_event_source_unref(s->timer_event_source);
 
         if (IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) {
@@ -301,26 +309,72 @@ fail:
         scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
 }
 
-static int scope_start(Unit *u) {
-        Scope *s = SCOPE(u);
+static int scope_enter_start_chown(Scope *s) {
+        Unit *u = UNIT(s);
+        pid_t pid;
         int r;
 
         assert(s);
+        assert(s->user);
 
-        if (unit_has_name(u, SPECIAL_INIT_SCOPE))
-                return -EPERM;
+        r = scope_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), u->manager->default_timeout_start_usec));
+        if (r < 0)
+                return r;
 
-        if (s->state == SCOPE_FAILED)
-                return -EPERM;
+        r = unit_fork_helper_process(u, "(sd-chown-cgroup)", &pid);
+        if (r < 0)
+                goto fail;
 
-        /* We can't fulfill this right now, please try again later */
-        if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
-                return -EAGAIN;
+        if (r == 0) {
+                uid_t uid = UID_INVALID;
+                gid_t gid = GID_INVALID;
 
-        assert(s->state == SCOPE_DEAD);
+                if (!isempty(s->user)) {
+                        const char *user = s->user;
 
-        if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
-                return -ENOENT;
+                        r = get_user_creds(&user, &uid, &gid, NULL, NULL);
+                        if (r < 0) {
+                                log_unit_error_errno(UNIT(s), r, "Failed to resolve user \"%s\": %m", user);
+                                _exit(EXIT_USER);
+                        }
+                }
+
+                if (!isempty(s->group)) {
+                        const char *group = s->group;
+
+                        r = get_group_creds(&group, &gid);
+                        if (r < 0) {
+                                log_unit_error_errno(UNIT(s), r, "Failed to resolve group \"%s\": %m", group);
+                                _exit(EXIT_GROUP);
+                        }
+                }
+
+                r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, uid, gid);
+                if (r < 0) {
+                        log_unit_error_errno(UNIT(s), r, "Failed to adjust control group access: %m");
+                        _exit(EXIT_CGROUP);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        r = unit_watch_pid(UNIT(s), pid, true);
+        if (r < 0)
+                goto fail;
+
+        scope_set_state(s, SCOPE_START_CHOWN);
+
+        return 1;
+fail:
+        s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
+        return r;
+}
+
+static int scope_enter_running(Scope *s) {
+        Unit *u = UNIT(s);
+        int r;
+
+        assert(s);
 
         (void) bus_scope_track_controller(s);
 
@@ -328,11 +382,7 @@ static int scope_start(Unit *u) {
         if (r < 0)
                 return r;
 
-        (void) unit_realize_cgroup(u);
-        (void) unit_reset_cpu_accounting(u);
-        (void) unit_reset_ip_accounting(u);
-
-        unit_export_state_files(UNIT(s));
+        unit_export_state_files(u);
 
         r = unit_attach_pids_to_cgroup(u, UNIT(s)->pids, NULL);
         if (r < 0) {
@@ -350,6 +400,38 @@ static int scope_start(Unit *u) {
         return 1;
 }
 
+static int scope_start(Unit *u) {
+        Scope *s = SCOPE(u);
+
+        assert(s);
+
+        if (unit_has_name(u, SPECIAL_INIT_SCOPE))
+                return -EPERM;
+
+        if (s->state == SCOPE_FAILED)
+                return -EPERM;
+
+        /* We can't fulfill this right now, please try again later */
+        if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+                return -EAGAIN;
+
+        assert(s->state == SCOPE_DEAD);
+
+        if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
+                return -ENOENT;
+
+        (void) unit_realize_cgroup(u);
+        (void) unit_reset_cpu_accounting(u);
+        (void) unit_reset_ip_accounting(u);
+
+        /* We check only for User= option to keep behavior consistent with logic for service units,
+         * i.e. having 'Delegate=true Group=foo' w/o specifing User= has no effect. */
+        if (s->user && unit_cgroup_delegate(u))
+                return scope_enter_start_chown(s);
+
+        return scope_enter_running(s);
+}
+
 static int scope_stop(Unit *u) {
         Scope *s = SCOPE(u);
 
@@ -462,7 +544,17 @@ static void scope_notify_cgroup_empty_event(Unit *u) {
 }
 
 static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
-        assert(u);
+        Scope *s = SCOPE(u);
+
+        assert(s);
+
+        if (s->state == SCOPE_START_CHOWN) {
+                if (!is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
+                        scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
+                else
+                        scope_enter_running(s);
+                return;
+        }
 
         /* If we get a SIGCHLD event for one of the processes we were interested in, then we look for others to
          * watch, under the assumption that we'll sooner or later get a SIGCHLD for them, as the original
@@ -495,6 +587,11 @@ static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *user
                 scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
                 break;
 
+        case SCOPE_START_CHOWN:
+                log_unit_warning(UNIT(s), "User lookup timed out. Entering failed state.");
+                scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
+                break;
+
         default:
                 assert_not_reached("Timeout at wrong time.");
         }
diff --git a/src/core/scope.h b/src/core/scope.h
index c38afb5e5d..7bed3eed9e 100644
--- a/src/core/scope.h
+++ b/src/core/scope.h
@@ -32,6 +32,9 @@ struct Scope {
         bool was_abandoned;
 
         sd_event_source *timer_event_source;
+
+        char *user;
+        char *group;
 };
 
 extern const UnitVTable scope_vtable;
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index 3910dfa812..c475bbafe0 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -1615,6 +1615,11 @@ static int bus_append_unit_property(sd_bus_message *m, const char *field, const
 
                 return bus_append_parse_sec_rename(m, field, eq);
 
+        /* Scope units don't have execution context but we still want to allow setting these two,
+         * so let's handle them separately. */
+        if (STR_IN_SET(field, "User", "Group"))
+                return bus_append_string(m, field, eq);
+
         if (streq(field, "StartLimitBurst"))
 
                 return bus_append_safe_atou(m, field, eq);
diff --git a/test/TEST-19-DELEGATE/testsuite.sh b/test/TEST-19-DELEGATE/testsuite.sh
index c738bea10e..c4c948cc11 100755
--- a/test/TEST-19-DELEGATE/testsuite.sh
+++ b/test/TEST-19-DELEGATE/testsuite.sh
@@ -4,6 +4,16 @@
 set -ex
 set -o pipefail
 
+test_scope_unpriv_delegation() {
+    useradd test ||:
+    trap "userdel -r test" RETURN
+
+    systemd-run --uid=test -p User=test -p Delegate=yes --slice workload.slice --unit workload0.scope --scope \
+            test -w /sys/fs/cgroup/workload.slice/workload0.scope -a \
+            -w /sys/fs/cgroup/workload.slice/workload0.scope/cgroup.procs -a \
+            -w /sys/fs/cgroup/workload.slice/workload0.scope/cgroup.subtree_control
+}
+
 if grep -q cgroup2 /proc/filesystems ; then
         systemd-run --wait --unit=test0.service -p "DynamicUser=1" -p "Delegate=" \
                     test -w /sys/fs/cgroup/system.slice/test0.service/ -a \
@@ -15,6 +25,9 @@ if grep -q cgroup2 /proc/filesystems ; then
 
         systemd-run --wait --unit=test2.service -p "DynamicUser=1" -p "Delegate=memory pids" \
                     grep pids /sys/fs/cgroup/system.slice/test2.service/cgroup.controllers
+
+        # Check that unprivileged delegation works for scopes
+        test_scope_unpriv_delegation
 else
         echo "Skipping TEST-19-DELEGATE, as the kernel doesn't actually support cgroupsv2" >&2
 fi