b7dd4d
From 5b5571de21d1ddf9a00511a6b2f25d630a903f05 Mon Sep 17 00:00:00 2001
b7dd4d
From: Michal Sekletar <msekleta@redhat.com>
b7dd4d
Date: Wed, 1 Jun 2022 10:15:06 +0200
b7dd4d
Subject: [PATCH] scope: allow unprivileged delegation on scopes
b7dd4d
b7dd4d
Previously it was possible to set delegate property for scope, but you
b7dd4d
were not able to allow unprivileged process to manage the scope's cgroup
b7dd4d
hierarchy. This is useful when launching manager process that  will run
b7dd4d
unprivileged but is supposed to manage its own (scope) sub-hierarchy.
b7dd4d
b7dd4d
Fixes #21683
b7dd4d
b7dd4d
(cherry picked from commit 03860190fefce8bbea3a6f0e77919b882ade517c)
b7dd4d
b7dd4d
Resolves: #2068575
b7dd4d
---
b7dd4d
 src/basic/unit-def.c               |   1 +
b7dd4d
 src/basic/unit-def.h               |   1 +
b7dd4d
 src/core/dbus-scope.c              |   6 ++
b7dd4d
 src/core/scope.c                   | 135 +++++++++++++++++++++++++----
b7dd4d
 src/core/scope.h                   |   3 +
b7dd4d
 src/shared/bus-unit-util.c         |   5 ++
b7dd4d
 test/TEST-19-DELEGATE/testsuite.sh |  13 +++
b7dd4d
 7 files changed, 145 insertions(+), 19 deletions(-)
b7dd4d
b7dd4d
diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c
b7dd4d
index e79cc73dd3..16c4d38d41 100644
b7dd4d
--- a/src/basic/unit-def.c
b7dd4d
+++ b/src/basic/unit-def.c
b7dd4d
@@ -160,6 +160,7 @@ DEFINE_STRING_TABLE_LOOKUP(path_state, PathState);
b7dd4d
 
b7dd4d
 static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
b7dd4d
         [SCOPE_DEAD] = "dead",
b7dd4d
+        [SCOPE_START_CHOWN] = "start-chown",
b7dd4d
         [SCOPE_RUNNING] = "running",
b7dd4d
         [SCOPE_ABANDONED] = "abandoned",
b7dd4d
         [SCOPE_STOP_SIGTERM] = "stop-sigterm",
b7dd4d
diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h
b7dd4d
index 8eea379a6d..03d151ec19 100644
b7dd4d
--- a/src/basic/unit-def.h
b7dd4d
+++ b/src/basic/unit-def.h
b7dd4d
@@ -99,6 +99,7 @@ typedef enum PathState {
b7dd4d
 
b7dd4d
 typedef enum ScopeState {
b7dd4d
         SCOPE_DEAD,
b7dd4d
+        SCOPE_START_CHOWN,
b7dd4d
         SCOPE_RUNNING,
b7dd4d
         SCOPE_ABANDONED,
b7dd4d
         SCOPE_STOP_SIGTERM,
b7dd4d
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
b7dd4d
index 0bbf64fff1..534302d188 100644
b7dd4d
--- a/src/core/dbus-scope.c
b7dd4d
+++ b/src/core/dbus-scope.c
b7dd4d
@@ -178,6 +178,12 @@ int bus_scope_set_property(
b7dd4d
                 r = bus_kill_context_set_transient_property(u, &s->kill_context, name, message, flags, error);
b7dd4d
                 if (r != 0)
b7dd4d
                         return r;
b7dd4d
+
b7dd4d
+                if (streq(name, "User"))
b7dd4d
+                        return bus_set_transient_user_relaxed(u, name, &s->user, message, flags, error);
b7dd4d
+
b7dd4d
+                if (streq(name, "Group"))
b7dd4d
+                        return bus_set_transient_user_relaxed(u, name, &s->group, message, flags, error);
b7dd4d
         }
b7dd4d
 
b7dd4d
         return 0;
b7dd4d
diff --git a/src/core/scope.c b/src/core/scope.c
b7dd4d
index 5a595c65a6..9cc5f89099 100644
b7dd4d
--- a/src/core/scope.c
b7dd4d
+++ b/src/core/scope.c
b7dd4d
@@ -5,6 +5,8 @@
b7dd4d
 
b7dd4d
 #include "alloc-util.h"
b7dd4d
 #include "dbus-scope.h"
b7dd4d
+#include "dbus-unit.h"
b7dd4d
+#include "exit-status.h"
b7dd4d
 #include "load-dropin.h"
b7dd4d
 #include "log.h"
b7dd4d
 #include "scope.h"
b7dd4d
@@ -14,9 +16,11 @@
b7dd4d
 #include "strv.h"
b7dd4d
 #include "unit-name.h"
b7dd4d
 #include "unit.h"
b7dd4d
+#include "user-util.h"
b7dd4d
 
b7dd4d
 static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = {
b7dd4d
         [SCOPE_DEAD] = UNIT_INACTIVE,
b7dd4d
+        [SCOPE_START_CHOWN] = UNIT_ACTIVATING,
b7dd4d
         [SCOPE_RUNNING] = UNIT_ACTIVE,
b7dd4d
         [SCOPE_ABANDONED] = UNIT_ACTIVE,
b7dd4d
         [SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING,
b7dd4d
@@ -34,6 +38,7 @@ static void scope_init(Unit *u) {
b7dd4d
 
b7dd4d
         s->timeout_stop_usec = u->manager->default_timeout_stop_usec;
b7dd4d
         u->ignore_on_isolate = true;
b7dd4d
+        s->user = s->group = NULL;
b7dd4d
 }
b7dd4d
 
b7dd4d
 static void scope_done(Unit *u) {
b7dd4d
@@ -45,6 +50,9 @@ static void scope_done(Unit *u) {
b7dd4d
         s->controller_track = sd_bus_track_unref(s->controller_track);
b7dd4d
 
b7dd4d
         s->timer_event_source = sd_event_source_unref(s->timer_event_source);
b7dd4d
+
b7dd4d
+        s->user = mfree(s->user);
b7dd4d
+        s->group = mfree(s->group);
b7dd4d
 }
b7dd4d
 
b7dd4d
 static int scope_arm_timer(Scope *s, usec_t usec) {
b7dd4d
@@ -84,7 +92,7 @@ static void scope_set_state(Scope *s, ScopeState state) {
b7dd4d
         old_state = s->state;
b7dd4d
         s->state = state;
b7dd4d
 
b7dd4d
-        if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
b7dd4d
+        if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL, SCOPE_START_CHOWN))
b7dd4d
                 s->timer_event_source = sd_event_source_unref(s->timer_event_source);
b7dd4d
 
b7dd4d
         if (IN_SET(state, SCOPE_DEAD, SCOPE_FAILED)) {
b7dd4d
@@ -301,26 +309,72 @@ fail:
b7dd4d
         scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
b7dd4d
 }
b7dd4d
 
b7dd4d
-static int scope_start(Unit *u) {
b7dd4d
-        Scope *s = SCOPE(u);
b7dd4d
+static int scope_enter_start_chown(Scope *s) {
b7dd4d
+        Unit *u = UNIT(s);
b7dd4d
+        pid_t pid;
b7dd4d
         int r;
b7dd4d
 
b7dd4d
         assert(s);
b7dd4d
+        assert(s->user);
b7dd4d
 
b7dd4d
-        if (unit_has_name(u, SPECIAL_INIT_SCOPE))
b7dd4d
-                return -EPERM;
b7dd4d
+        r = scope_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), u->manager->default_timeout_start_usec));
b7dd4d
+        if (r < 0)
b7dd4d
+                return r;
b7dd4d
 
b7dd4d
-        if (s->state == SCOPE_FAILED)
b7dd4d
-                return -EPERM;
b7dd4d
+        r = unit_fork_helper_process(u, "(sd-chown-cgroup)", &pid;;
b7dd4d
+        if (r < 0)
b7dd4d
+                goto fail;
b7dd4d
 
b7dd4d
-        /* We can't fulfill this right now, please try again later */
b7dd4d
-        if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
b7dd4d
-                return -EAGAIN;
b7dd4d
+        if (r == 0) {
b7dd4d
+                uid_t uid = UID_INVALID;
b7dd4d
+                gid_t gid = GID_INVALID;
b7dd4d
 
b7dd4d
-        assert(s->state == SCOPE_DEAD);
b7dd4d
+                if (!isempty(s->user)) {
b7dd4d
+                        const char *user = s->user;
b7dd4d
 
b7dd4d
-        if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
b7dd4d
-                return -ENOENT;
b7dd4d
+                        r = get_user_creds(&user, &uid, &gid, NULL, NULL);
b7dd4d
+                        if (r < 0) {
b7dd4d
+                                log_unit_error_errno(UNIT(s), r, "Failed to resolve user \"%s\": %m", user);
b7dd4d
+                                _exit(EXIT_USER);
b7dd4d
+                        }
b7dd4d
+                }
b7dd4d
+
b7dd4d
+                if (!isempty(s->group)) {
b7dd4d
+                        const char *group = s->group;
b7dd4d
+
b7dd4d
+                        r = get_group_creds(&group, &gid;;
b7dd4d
+                        if (r < 0) {
b7dd4d
+                                log_unit_error_errno(UNIT(s), r, "Failed to resolve group \"%s\": %m", group);
b7dd4d
+                                _exit(EXIT_GROUP);
b7dd4d
+                        }
b7dd4d
+                }
b7dd4d
+
b7dd4d
+                r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, uid, gid);
b7dd4d
+                if (r < 0) {
b7dd4d
+                        log_unit_error_errno(UNIT(s), r, "Failed to adjust control group access: %m");
b7dd4d
+                        _exit(EXIT_CGROUP);
b7dd4d
+                }
b7dd4d
+
b7dd4d
+                _exit(EXIT_SUCCESS);
b7dd4d
+        }
b7dd4d
+
b7dd4d
+        r = unit_watch_pid(UNIT(s), pid, true);
b7dd4d
+        if (r < 0)
b7dd4d
+                goto fail;
b7dd4d
+
b7dd4d
+        scope_set_state(s, SCOPE_START_CHOWN);
b7dd4d
+
b7dd4d
+        return 1;
b7dd4d
+fail:
b7dd4d
+        s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
b7dd4d
+        return r;
b7dd4d
+}
b7dd4d
+
b7dd4d
+static int scope_enter_running(Scope *s) {
b7dd4d
+        Unit *u = UNIT(s);
b7dd4d
+        int r;
b7dd4d
+
b7dd4d
+        assert(s);
b7dd4d
 
b7dd4d
         (void) bus_scope_track_controller(s);
b7dd4d
 
b7dd4d
@@ -328,11 +382,7 @@ static int scope_start(Unit *u) {
b7dd4d
         if (r < 0)
b7dd4d
                 return r;
b7dd4d
 
b7dd4d
-        (void) unit_realize_cgroup(u);
b7dd4d
-        (void) unit_reset_cpu_accounting(u);
b7dd4d
-        (void) unit_reset_ip_accounting(u);
b7dd4d
-
b7dd4d
-        unit_export_state_files(UNIT(s));
b7dd4d
+        unit_export_state_files(u);
b7dd4d
 
b7dd4d
         r = unit_attach_pids_to_cgroup(u, UNIT(s)->pids, NULL);
b7dd4d
         if (r < 0) {
b7dd4d
@@ -350,6 +400,38 @@ static int scope_start(Unit *u) {
b7dd4d
         return 1;
b7dd4d
 }
b7dd4d
 
b7dd4d
+static int scope_start(Unit *u) {
b7dd4d
+        Scope *s = SCOPE(u);
b7dd4d
+
b7dd4d
+        assert(s);
b7dd4d
+
b7dd4d
+        if (unit_has_name(u, SPECIAL_INIT_SCOPE))
b7dd4d
+                return -EPERM;
b7dd4d
+
b7dd4d
+        if (s->state == SCOPE_FAILED)
b7dd4d
+                return -EPERM;
b7dd4d
+
b7dd4d
+        /* We can't fulfill this right now, please try again later */
b7dd4d
+        if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
b7dd4d
+                return -EAGAIN;
b7dd4d
+
b7dd4d
+        assert(s->state == SCOPE_DEAD);
b7dd4d
+
b7dd4d
+        if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
b7dd4d
+                return -ENOENT;
b7dd4d
+
b7dd4d
+        (void) unit_realize_cgroup(u);
b7dd4d
+        (void) unit_reset_cpu_accounting(u);
b7dd4d
+        (void) unit_reset_ip_accounting(u);
b7dd4d
+
b7dd4d
+        /* We check only for User= option to keep behavior consistent with logic for service units,
b7dd4d
+         * i.e. having 'Delegate=true Group=foo' w/o specifing User= has no effect. */
b7dd4d
+        if (s->user && unit_cgroup_delegate(u))
b7dd4d
+                return scope_enter_start_chown(s);
b7dd4d
+
b7dd4d
+        return scope_enter_running(s);
b7dd4d
+}
b7dd4d
+
b7dd4d
 static int scope_stop(Unit *u) {
b7dd4d
         Scope *s = SCOPE(u);
b7dd4d
 
b7dd4d
@@ -462,7 +544,17 @@ static void scope_notify_cgroup_empty_event(Unit *u) {
b7dd4d
 }
b7dd4d
 
b7dd4d
 static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
b7dd4d
-        assert(u);
b7dd4d
+        Scope *s = SCOPE(u);
b7dd4d
+
b7dd4d
+        assert(s);
b7dd4d
+
b7dd4d
+        if (s->state == SCOPE_START_CHOWN) {
b7dd4d
+                if (!is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
b7dd4d
+                        scope_enter_dead(s, SCOPE_FAILURE_RESOURCES);
b7dd4d
+                else
b7dd4d
+                        scope_enter_running(s);
b7dd4d
+                return;
b7dd4d
+        }
b7dd4d
 
b7dd4d
         /* If we get a SIGCHLD event for one of the processes we were interested in, then we look for others to
b7dd4d
          * watch, under the assumption that we'll sooner or later get a SIGCHLD for them, as the original
b7dd4d
@@ -495,6 +587,11 @@ static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *user
b7dd4d
                 scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
b7dd4d
                 break;
b7dd4d
 
b7dd4d
+        case SCOPE_START_CHOWN:
b7dd4d
+                log_unit_warning(UNIT(s), "User lookup timed out. Entering failed state.");
b7dd4d
+                scope_enter_dead(s, SCOPE_FAILURE_TIMEOUT);
b7dd4d
+                break;
b7dd4d
+
b7dd4d
         default:
b7dd4d
                 assert_not_reached("Timeout at wrong time.");
b7dd4d
         }
b7dd4d
diff --git a/src/core/scope.h b/src/core/scope.h
b7dd4d
index c38afb5e5d..7bed3eed9e 100644
b7dd4d
--- a/src/core/scope.h
b7dd4d
+++ b/src/core/scope.h
b7dd4d
@@ -32,6 +32,9 @@ struct Scope {
b7dd4d
         bool was_abandoned;
b7dd4d
 
b7dd4d
         sd_event_source *timer_event_source;
b7dd4d
+
b7dd4d
+        char *user;
b7dd4d
+        char *group;
b7dd4d
 };
b7dd4d
 
b7dd4d
 extern const UnitVTable scope_vtable;
b7dd4d
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
b7dd4d
index 3910dfa812..c475bbafe0 100644
b7dd4d
--- a/src/shared/bus-unit-util.c
b7dd4d
+++ b/src/shared/bus-unit-util.c
b7dd4d
@@ -1615,6 +1615,11 @@ static int bus_append_unit_property(sd_bus_message *m, const char *field, const
b7dd4d
 
b7dd4d
                 return bus_append_parse_sec_rename(m, field, eq);
b7dd4d
 
b7dd4d
+        /* Scope units don't have execution context but we still want to allow setting these two,
b7dd4d
+         * so let's handle them separately. */
b7dd4d
+        if (STR_IN_SET(field, "User", "Group"))
b7dd4d
+                return bus_append_string(m, field, eq);
b7dd4d
+
b7dd4d
         if (streq(field, "StartLimitBurst"))
b7dd4d
 
b7dd4d
                 return bus_append_safe_atou(m, field, eq);
b7dd4d
diff --git a/test/TEST-19-DELEGATE/testsuite.sh b/test/TEST-19-DELEGATE/testsuite.sh
b7dd4d
index c738bea10e..c4c948cc11 100755
b7dd4d
--- a/test/TEST-19-DELEGATE/testsuite.sh
b7dd4d
+++ b/test/TEST-19-DELEGATE/testsuite.sh
b7dd4d
@@ -4,6 +4,16 @@
b7dd4d
 set -ex
b7dd4d
 set -o pipefail
b7dd4d
 
b7dd4d
+test_scope_unpriv_delegation() {
b7dd4d
+    useradd test ||:
b7dd4d
+    trap "userdel -r test" RETURN
b7dd4d
+
b7dd4d
+    systemd-run --uid=test -p User=test -p Delegate=yes --slice workload.slice --unit workload0.scope --scope \
b7dd4d
+            test -w /sys/fs/cgroup/workload.slice/workload0.scope -a \
b7dd4d
+            -w /sys/fs/cgroup/workload.slice/workload0.scope/cgroup.procs -a \
b7dd4d
+            -w /sys/fs/cgroup/workload.slice/workload0.scope/cgroup.subtree_control
b7dd4d
+}
b7dd4d
+
b7dd4d
 if grep -q cgroup2 /proc/filesystems ; then
b7dd4d
         systemd-run --wait --unit=test0.service -p "DynamicUser=1" -p "Delegate=" \
b7dd4d
                     test -w /sys/fs/cgroup/system.slice/test0.service/ -a \
b7dd4d
@@ -15,6 +25,9 @@ if grep -q cgroup2 /proc/filesystems ; then
b7dd4d
 
b7dd4d
         systemd-run --wait --unit=test2.service -p "DynamicUser=1" -p "Delegate=memory pids" \
b7dd4d
                     grep pids /sys/fs/cgroup/system.slice/test2.service/cgroup.controllers
b7dd4d
+
b7dd4d
+        # Check that unprivileged delegation works for scopes
b7dd4d
+        test_scope_unpriv_delegation
b7dd4d
 else
b7dd4d
         echo "Skipping TEST-19-DELEGATE, as the kernel doesn't actually support cgroupsv2" >&2
b7dd4d
 fi