8d419f
From f23d0328e563fa5534377297153daa098664147f Mon Sep 17 00:00:00 2001
8d419f
From: Michal Sekletar <msekleta@redhat.com>
8d419f
Date: Wed, 1 Jun 2022 10:15:06 +0200
8d419f
Subject: [PATCH] scope: allow unprivileged delegation on scopes
8d419f
8d419f
Previously it was possible to set delegate property for scope, but you
8d419f
were not able to allow unprivileged process to manage the scope's cgroup
8d419f
hierarchy. This is useful when launching manager process that  will run
8d419f
unprivileged but is supposed to manage its own (scope) sub-hierarchy.
8d419f
8d419f
Fixes #21683
8d419f
8d419f
(cherry picked from commit 03860190fefce8bbea3a6f0e77919b882ade517c)
8d419f
8d419f
Resolves: #2120604
8d419f
---
8d419f
 src/basic/unit-def.c       |   1 +
8d419f
 src/basic/unit-def.h       |   1 +
8d419f
 src/core/dbus-scope.c      |   6 ++
8d419f
 src/core/scope.c           | 130 ++++++++++++++++++++++++++++++++-----
8d419f
 src/core/scope.h           |   3 +
8d419f
 src/shared/bus-unit-util.c |   5 ++
8d419f
 test/units/testsuite-19.sh |  14 ++++
8d419f
 7 files changed, 143 insertions(+), 17 deletions(-)
8d419f
8d419f
diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c
8d419f
index 2667e61dc4..9408141842 100644
8d419f
--- a/src/basic/unit-def.c
8d419f
+++ b/src/basic/unit-def.c
8d419f
@@ -170,6 +170,7 @@ DEFINE_STRING_TABLE_LOOKUP(path_state, PathState);
8d419f
 static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
8d419f
         [SCOPE_DEAD] = "dead",
8d419f
         [SCOPE_RUNNING] = "running",
8d419f
+        [SCOPE_START_CHOWN] = "start-chown",
8d419f
         [SCOPE_ABANDONED] = "abandoned",
8d419f
         [SCOPE_STOP_SIGTERM] = "stop-sigterm",
8d419f
         [SCOPE_STOP_SIGKILL] = "stop-sigkill",
8d419f
diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h
8d419f
index f80e554d2b..5fcd51c095 100644
8d419f
--- a/src/basic/unit-def.h
8d419f
+++ b/src/basic/unit-def.h
8d419f
@@ -114,6 +114,7 @@ typedef enum PathState {
8d419f
 
8d419f
 typedef enum ScopeState {
8d419f
         SCOPE_DEAD,
8d419f
+        SCOPE_START_CHOWN,
8d419f
         SCOPE_RUNNING,
8d419f
         SCOPE_ABANDONED,
8d419f
         SCOPE_STOP_SIGTERM,
8d419f
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
8d419f
index 109ad6f2ef..0f59622166 100644
8d419f
--- a/src/core/dbus-scope.c
8d419f
+++ b/src/core/dbus-scope.c
8d419f
@@ -186,6 +186,12 @@ int bus_scope_set_property(
8d419f
                 r = bus_kill_context_set_transient_property(u, &s->kill_context, name, message, flags, error);
8d419f
                 if (r != 0)
8d419f
                         return r;
8d419f
+
8d419f
+                if (streq(name, "User"))
8d419f
+                        return bus_set_transient_user_relaxed(u, name, &s->user, message, flags, error);
8d419f
+
8d419f
+                if (streq(name, "Group"))
8d419f
+                        return bus_set_transient_user_relaxed(u, name, &s->group, message, flags, error);
8d419f
         }
8d419f
 
8d419f
         return 0;
8d419f
diff --git a/src/core/scope.c b/src/core/scope.c
8d419f
index 63d3288caf..a4da45ac43 100644
8d419f
--- a/src/core/scope.c
8d419f
+++ b/src/core/scope.c
8d419f
@@ -6,6 +6,7 @@
8d419f
 #include "alloc-util.h"
8d419f
 #include "dbus-scope.h"
8d419f
 #include "dbus-unit.h"
8d419f
+#include "exit-status.h"
8d419f
 #include "load-dropin.h"
8d419f
 #include "log.h"
8d419f
 #include "process-util.h"
8d419f
@@ -18,9 +19,11 @@
8d419f
 #include "strv.h"
8d419f
 #include "unit-name.h"
8d419f
 #include "unit.h"
8d419f
+#include "user-util.h"
8d419f
 
8d419f
 static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = {
8d419f
         [SCOPE_DEAD] = UNIT_INACTIVE,
8d419f
+        [SCOPE_START_CHOWN] = UNIT_ACTIVATING,
8d419f
         [SCOPE_RUNNING] = UNIT_ACTIVE,
8d419f
         [SCOPE_ABANDONED] = UNIT_ACTIVE,
8d419f
         [SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING,
8d419f
@@ -39,6 +42,7 @@ static void scope_init(Unit *u) {
8d419f
         s->runtime_max_usec = USEC_INFINITY;
8d419f
         s->timeout_stop_usec = u->manager->default_timeout_stop_usec;
8d419f
         u->ignore_on_isolate = true;
8d419f
+        s->user = s->group = NULL;
8d419f
 }
8d419f
 
8d419f
 static void scope_done(Unit *u) {
8d419f
@@ -50,6 +54,9 @@ static void scope_done(Unit *u) {
8d419f
         s->controller_track = sd_bus_track_unref(s->controller_track);
8d419f
 
8d419f
         s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
8d419f
+
8d419f
+        s->user = mfree(s->user);
8d419f
+        s->group = mfree(s->group);
8d419f
 }
8d419f
 
8d419f
 static usec_t scope_running_timeout(Scope *s) {
8d419f
@@ -107,7 +114,7 @@ static void scope_set_state(Scope *s, ScopeState state) {
8d419f
         old_state = s->state;
8d419f
         s->state = state;
8d419f
 
8d419f
-        if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
8d419f
+        if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL, SCOPE_START_CHOWN))
8d419f
                 s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
8d419f
 
8d419f
         if (IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) {
8d419f
@@ -353,26 +360,72 @@ fail:
8d419f
         scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
8d419f
 }
8d419f
 
8d419f
-static int scope_start(Unit *u) {
8d419f
-        Scope *s = SCOPE(u);
8d419f
+static int scope_enter_start_chown(Scope *s) {
8d419f
+        Unit *u = UNIT(s);
8d419f
+        pid_t pid;
8d419f
         int r;
8d419f
 
8d419f
         assert(s);
8d419f
+        assert(s->user);
8d419f
 
8d419f
-        if (unit_has_name(u, SPECIAL_INIT_SCOPE))
8d419f
-                return -EPERM;
8d419f
+        r = scope_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), u->manager->default_timeout_start_usec));
8d419f
+        if (r < 0)
8d419f
+                return r;
8d419f
 
8d419f
-        if (s->state == SCOPE_FAILED)
8d419f
-                return -EPERM;
8d419f
+        r = unit_fork_helper_process(u, "(sd-chown-cgroup)", &pid;;
8d419f
+        if (r < 0)
8d419f
+                goto fail;
8d419f
 
8d419f
-        /* We can't fulfill this right now, please try again later */
8d419f
-        if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
8d419f
-                return -EAGAIN;
8d419f
+        if (r == 0) {
8d419f
+                uid_t uid = UID_INVALID;
8d419f
+                gid_t gid = GID_INVALID;
8d419f
 
8d419f
-        assert(s->state == SCOPE_DEAD);
8d419f
+                if (!isempty(s->user)) {
8d419f
+                        const char *user = s->user;
8d419f
 
8d419f
-        if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
8d419f
-                return -ENOENT;
8d419f
+                        r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
8d419f
+                        if (r < 0) {
8d419f
+                                log_unit_error_errno(UNIT(s), r, "Failed to resolve user \"%s\": %m", user);
8d419f
+                                _exit(EXIT_USER);
8d419f
+                        }
8d419f
+                }
8d419f
+
8d419f
+                if (!isempty(s->group)) {
8d419f
+                        const char *group = s->group;
8d419f
+
8d419f
+                        r = get_group_creds(&group, &gid, 0);
8d419f
+                        if (r < 0) {
8d419f
+                                log_unit_error_errno(UNIT(s), r, "Failed to resolve group \"%s\": %m", group);
8d419f
+                                _exit(EXIT_GROUP);
8d419f
+                        }
8d419f
+                }
8d419f
+
8d419f
+                r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, uid, gid);
8d419f
+                if (r < 0) {
8d419f
+                        log_unit_error_errno(UNIT(s), r, "Failed to adjust control group access: %m");
8d419f
+                        _exit(EXIT_CGROUP);
8d419f
+                }
8d419f
+
8d419f
+                _exit(EXIT_SUCCESS);
8d419f
+        }
8d419f
+
8d419f
+        r = unit_watch_pid(UNIT(s), pid, true);
8d419f
+        if (r < 0)
8d419f
+                goto fail;
8d419f
+
8d419f
+        scope_set_state(s, SCOPE_START_CHOWN);
8d419f
+
8d419f
+        return 1;
8d419f
+fail:
8d419f
+        s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
8d419f
+        return r;
8d419f
+}
8d419f
+
8d419f
+static int scope_enter_running(Scope *s) {
8d419f
+        Unit *u = UNIT(s);
8d419f
+        int r;
8d419f
+
8d419f
+        assert(s);
8d419f
 
8d419f
         (void) bus_scope_track_controller(s);
8d419f
 
8d419f
@@ -380,9 +433,6 @@ static int scope_start(Unit *u) {
8d419f
         if (r < 0)
8d419f
                 return r;
8d419f
 
8d419f
-        (void) unit_realize_cgroup(u);
8d419f
-        (void) unit_reset_accounting(u);
8d419f
-
8d419f
         unit_export_state_files(u);
8d419f
 
8d419f
         r = unit_attach_pids_to_cgroup(u, u->pids, NULL);
8d419f
@@ -416,6 +466,37 @@ static int scope_start(Unit *u) {
8d419f
         return 1;
8d419f
 }
8d419f
 
8d419f
+static int scope_start(Unit *u) {
8d419f
+        Scope *s = SCOPE(u);
8d419f
+
8d419f
+        assert(s);
8d419f
+
8d419f
+        if (unit_has_name(u, SPECIAL_INIT_SCOPE))
8d419f
+                return -EPERM;
8d419f
+
8d419f
+        if (s->state == SCOPE_FAILED)
8d419f
+                return -EPERM;
8d419f
+
8d419f
+        /* We can't fulfill this right now, please try again later */
8d419f
+        if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
8d419f
+                return -EAGAIN;
8d419f
+
8d419f
+        assert(s->state == SCOPE_DEAD);
8d419f
+
8d419f
+        if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
8d419f
+                return -ENOENT;
8d419f
+
8d419f
+        (void) unit_realize_cgroup(u);
8d419f
+        (void) unit_reset_accounting(u);
8d419f
+
8d419f
+        /* We check only for User= option to keep behavior consistent with logic for service units,
8d419f
+         * i.e. having 'Delegate=true Group=foo' w/o specifing User= has no effect. */
8d419f
+        if (s->user && unit_cgroup_delegate(u))
8d419f
+                return scope_enter_start_chown(s);
8d419f
+
8d419f
+        return scope_enter_running(s);
8d419f
+}
8d419f
+
8d419f
 static int scope_stop(Unit *u) {
8d419f
         Scope *s = SCOPE(u);
8d419f
 
8d419f
@@ -547,7 +628,17 @@ static void scope_notify_cgroup_empty_event(Unit *u) {
8d419f
 }
8d419f
 
8d419f
 static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
8d419f
-        assert(u);
8d419f
+        Scope *s = SCOPE(u);
8d419f
+
8d419f
+        assert(s);
8d419f
+
8d419f
+        if (s->state == SCOPE_START_CHOWN) {
8d419f
+                if (!is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
8d419f
+                        scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
8d419f
+                else
8d419f
+                        scope_enter_running(s);
8d419f
+                return;
8d419f
+        }
8d419f
 
8d419f
         /* If we get a SIGCHLD event for one of the processes we were interested in, then we look for others to
8d419f
          * watch, under the assumption that we'll sooner or later get a SIGCHLD for them, as the original
8d419f
@@ -585,6 +676,11 @@ static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *user
8d419f
                 scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
8d419f
                 break;
8d419f
 
8d419f
+        case SCOPE_START_CHOWN:
8d419f
+                log_unit_warning(UNIT(s), "User lookup timed out. Entering failed state.");
8d419f
+                scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
8d419f
+                break;
8d419f
+
8d419f
         default:
8d419f
                 assert_not_reached();
8d419f
         }
8d419f
diff --git a/src/core/scope.h b/src/core/scope.h
8d419f
index 03a9ba4324..def1541652 100644
8d419f
--- a/src/core/scope.h
8d419f
+++ b/src/core/scope.h
8d419f
@@ -34,6 +34,9 @@ struct Scope {
8d419f
         bool was_abandoned;
8d419f
 
8d419f
         sd_event_source *timer_event_source;
8d419f
+
8d419f
+        char *user;
8d419f
+        char *group;
8d419f
 };
8d419f
 
8d419f
 extern const UnitVTable scope_vtable;
8d419f
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
8d419f
index c211fe34d5..33b0b947b7 100644
8d419f
--- a/src/shared/bus-unit-util.c
8d419f
+++ b/src/shared/bus-unit-util.c
8d419f
@@ -2130,6 +2130,11 @@ static int bus_append_scope_property(sd_bus_message *m, const char *field, const
8d419f
         if (streq(field, "TimeoutStopSec"))
8d419f
                 return bus_append_parse_sec_rename(m, field, eq);
8d419f
 
8d419f
+        /* Scope units don't have execution context but we still want to allow setting these two,
8d419f
+         * so let's handle them separately. */
8d419f
+        if (STR_IN_SET(field, "User", "Group"))
8d419f
+                return bus_append_string(m, field, eq);
8d419f
+
8d419f
         return 0;
8d419f
 }
8d419f
 
8d419f
diff --git a/test/units/testsuite-19.sh b/test/units/testsuite-19.sh
8d419f
index ee4eb8431e..6ce6d3d429 100755
8d419f
--- a/test/units/testsuite-19.sh
8d419f
+++ b/test/units/testsuite-19.sh
8d419f
@@ -3,6 +3,16 @@
8d419f
 set -eux
8d419f
 set -o pipefail
8d419f
 
8d419f
+test_scope_unpriv_delegation() {
8d419f
+    useradd test ||:
8d419f
+    trap "userdel -r test" RETURN
8d419f
+
8d419f
+    systemd-run --uid=test -p User=test -p Delegate=yes --slice workload.slice --unit workload0.scope --scope \
8d419f
+            test -w /sys/fs/cgroup/workload.slice/workload0.scope -a \
8d419f
+            -w /sys/fs/cgroup/workload.slice/workload0.scope/cgroup.procs -a \
8d419f
+            -w /sys/fs/cgroup/workload.slice/workload0.scope/cgroup.subtree_control
8d419f
+}
8d419f
+
8d419f
 if grep -q cgroup2 /proc/filesystems ; then
8d419f
     systemd-run --wait --unit=test0.service -p "DynamicUser=1" -p "Delegate=" \
8d419f
                 test -w /sys/fs/cgroup/system.slice/test0.service/ -a \
8d419f
@@ -31,6 +41,10 @@ if grep -q cgroup2 /proc/filesystems ; then
8d419f
 
8d419f
     # And now check again, "io" should have vanished
8d419f
     grep -qv io /sys/fs/cgroup/system.slice/cgroup.controllers
8d419f
+
8d419f
+    # Check that unprivileged delegation works for scopes
8d419f
+    test_scope_unpriv_delegation
8d419f
+
8d419f
 else
8d419f
     echo "Skipping TEST-19-DELEGATE, as the kernel doesn't actually support cgroup v2" >&2
8d419f
 fi