diff --git a/0001-Temporary-workaround-PrivateUsers-full-implies-Deleg.patch b/0001-Temporary-workaround-PrivateUsers-full-implies-Deleg.patch new file mode 100644 index 0000000..2167c5c --- /dev/null +++ b/0001-Temporary-workaround-PrivateUsers-full-implies-Deleg.patch @@ -0,0 +1,36 @@ +From 2641ff693f715dd5094c56c59e0e660b9b35c9e2 Mon Sep 17 00:00:00 2001 +From: Ryan Wilson +Date: Thu, 5 Dec 2024 08:31:42 -0800 +Subject: [PATCH] Temporary workaround: PrivateUsers=full implies + DelegateNamespaces=yes + +--- + src/core/exec-invoke.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c +index 8305bb2bcf..8c2a689d6e 100644 +--- a/src/core/exec-invoke.c ++++ b/src/core/exec-invoke.c +@@ -4061,6 +4061,9 @@ static bool exec_context_need_unprivileged_private_users( + assert(context); + assert(params); + ++ if (context->private_users == PRIVATE_USERS_FULL) ++ return true; ++ + /* These options require PrivateUsers= when used in user units, as we need to be in a user namespace + * to have permission to enable them when not running as root. If we have effective CAP_SYS_ADMIN + * (system manager) then we have privileges and don't need this. */ +@@ -5015,7 +5018,7 @@ int exec_invoke( + + /* The kernel requires /proc/pid/setgroups be set to "deny" prior to writing /proc/pid/gid_map in + * unprivileged user namespaces. */ +- r = setup_private_users(pu, saved_uid, saved_gid, uid, gid, /* allow_setgroups= */ false); ++ r = setup_private_users(pu, saved_uid, saved_gid, uid, gid, /* allow_setgroups= */ params->runtime_scope != RUNTIME_SCOPE_USER); + /* If it was requested explicitly and we can't set it up, fail early. Otherwise, continue and let + * the actual requested operations fail (or silently continue). */ + if (r < 0 && context->private_users != PRIVATE_USERS_NO) { +-- +2.43.5 + diff --git a/35183.patch b/35183.patch new file mode 100644 index 0000000..f8d952a --- /dev/null +++ b/35183.patch @@ -0,0 +1,273 @@ +From 705cc82938b67fa110f2f6f5d28bfb9ec2f339c0 Mon Sep 17 00:00:00 2001 +From: Ryan Wilson +Date: Fri, 15 Nov 2024 06:56:05 -0800 +Subject: [PATCH 1/2] core: Add PrivateUsers=full + +Recently, PrivateUsers=identity was added to support mapping the first +65536 UIDs/GIDs from parent to the child namespace and mapping the other +UID/GIDs to the nobody user. + +However, there are use cases where users have UIDs/GIDs > 65536 and need +to do a similar identity mapping. Moreover, in some of those cases, users +want a full identity mapping from 0 -> UID_MAX. + +Note to differentiate ourselves from the init user namespace, we need to +set up the uid_map/gid_map like: +``` +0 0 1 +1 1 UINT32_MAX - 1 +``` + +as the init user namedspace uses `0 0 UINT32_MAX` and some applications - +like systemd itself - determine if its a non-init user namespace based on +uid_map/gid_map files. Note systemd will remove this heuristic in +running_in_userns() in version 258 and uses namespace inode. But some users +may be running a container image with older systemd < 258 so we keep this +hack until version 259. + +To support this, we add PrivateUsers=full that does identity mapping for +all available UID/GIDs. + +Fixes: #35168 +--- + man/systemd.exec.xml | 8 +++++-- + src/core/exec-invoke.c | 28 ++++++++++++++++++++++++ + src/core/namespace.c | 1 + + src/core/namespace.h | 1 + + test/units/TEST-07-PID1.private-users.sh | 2 ++ + 5 files changed, 38 insertions(+), 2 deletions(-) + +diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml +index 607c88128ded4..482dbbda80a84 100644 +--- a/man/systemd.exec.xml ++++ b/man/systemd.exec.xml +@@ -2009,8 +2009,8 @@ BindReadOnlyPaths=/var/lib/systemd + + PrivateUsers= + +- Takes a boolean argument or one of self or +- identity. Defaults to false. If enabled, sets up a new user namespace for the ++ Takes a boolean argument or one of self, identity, ++ or full. Defaults to false. If enabled, sets up a new user namespace for the + executed processes and configures a user and group mapping. If set to a true value or + self, a minimal user and group mapping is configured that maps the + root user and group as well as the unit's own user and group to themselves and +@@ -2026,6 +2026,10 @@ BindReadOnlyPaths=/var/lib/systemd + since all UIDs/GIDs are chosen identically it does provide process capability isolation, and hence is + often a good choice if proper user namespacing with distinct UID maps is not appropriate. + ++ If the parameter is full, user namespacing is set up with an identity ++ mapping for all UIDs/GIDs. Similar to identity, this does not provide UID/GID ++ isolation, but it does provide process capability isolation. ++ + If this mode is enabled, all unit processes are run without privileges in the host user + namespace (regardless if the unit's own user/group is root or not). Specifically + this means that the process will have zero process capabilities on the host's user namespace, but +diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c +index 9d636f552950d..682d6449d76f3 100644 +--- a/src/core/exec-invoke.c ++++ b/src/core/exec-invoke.c +@@ -2103,6 +2103,29 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi + uid_map = strdup("0 0 65536\n"); + if (!uid_map) + return -ENOMEM; ++ } else if (private_users == PRIVATE_USERS_FULL) { ++ /* Map all UID/GID from original to new user namespace. We can't use `0 0 UINT32_MAX` because ++ * this is the same UID/GID map as the init user namespace and systemd's running_in_userns() ++ * checks whether its in a user namespace by comparing uid_map/gid_map to `0 0 UINT32_MAX`. ++ * Thus, we still map all UIDs/GIDs but do it using two extents to differentiate the new user ++ * namespace from the init namespace: ++ * 0 0 1 ++ * 1 1 UINT32_MAX - 1 ++ * ++ * systemd will remove the heuristic in running_in_userns() and use namespace inodes in version 258 ++ * (PR #35382). But some users may be running a container image with older systemd < 258 so we keep ++ * this uid_map/gid_map hack until version 259 for version N-1 compatibility. ++ * ++ * TODO: Switch to `0 0 UINT32_MAX` in systemd v259. ++ * ++ * Note the kernel defines the UID range between 0 and UINT32_MAX so we map all UIDs even though ++ * the UID range beyond INT32_MAX (e.g. i.e. the range above the signed 32-bit range) is ++ * icky. For example, setfsuid() returns the old UID as signed integer. But units can decide to ++ * use these UIDs/GIDs so we need to map them. */ ++ r = asprintf(&uid_map, "0 0 1\n" ++ "1 1 " UID_FMT "\n", (uid_t) (UINT32_MAX - 1)); ++ if (r < 0) ++ return -ENOMEM; + /* Can only set up multiple mappings with CAP_SETUID. */ + } else if (have_effective_cap(CAP_SETUID) > 0 && uid != ouid && uid_is_valid(uid)) { + r = asprintf(&uid_map, +@@ -2123,6 +2146,11 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi + gid_map = strdup("0 0 65536\n"); + if (!gid_map) + return -ENOMEM; ++ } else if (private_users == PRIVATE_USERS_FULL) { ++ r = asprintf(&gid_map, "0 0 1\n" ++ "1 1 " GID_FMT "\n", (gid_t) (UINT32_MAX - 1)); ++ if (r < 0) ++ return -ENOMEM; + /* Can only set up multiple mappings with CAP_SETGID. */ + } else if (have_effective_cap(CAP_SETGID) > 0 && gid != ogid && gid_is_valid(gid)) { + r = asprintf(&gid_map, +diff --git a/src/core/namespace.c b/src/core/namespace.c +index 57dbbc4fc7dc5..c584ea35724d1 100644 +--- a/src/core/namespace.c ++++ b/src/core/namespace.c +@@ -3364,6 +3364,7 @@ static const char* const private_users_table[_PRIVATE_USERS_MAX] = { + [PRIVATE_USERS_NO] = "no", + [PRIVATE_USERS_SELF] = "self", + [PRIVATE_USERS_IDENTITY] = "identity", ++ [PRIVATE_USERS_FULL] = "full", + }; + + DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(private_users, PrivateUsers, PRIVATE_USERS_SELF); +diff --git a/src/core/namespace.h b/src/core/namespace.h +index bd48aa31da71c..5d466a8c1c724 100644 +--- a/src/core/namespace.h ++++ b/src/core/namespace.h +@@ -65,6 +65,7 @@ typedef enum PrivateUsers { + PRIVATE_USERS_NO, + PRIVATE_USERS_SELF, + PRIVATE_USERS_IDENTITY, ++ PRIVATE_USERS_FULL, + _PRIVATE_USERS_MAX, + _PRIVATE_USERS_INVALID = -EINVAL, + } PrivateUsers; +diff --git a/test/units/TEST-07-PID1.private-users.sh b/test/units/TEST-07-PID1.private-users.sh +index 2475b5d365d59..ba85248f9607e 100755 +--- a/test/units/TEST-07-PID1.private-users.sh ++++ b/test/units/TEST-07-PID1.private-users.sh +@@ -10,3 +10,5 @@ systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/uid_ma + systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"' + systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 65536"' + systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 65536"' ++systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/uid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"' ++systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/gid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"' + +From 878e86f12b7184a87a9cc1ecd4f99c5d9744f931 Mon Sep 17 00:00:00 2001 +From: Ryan Wilson +Date: Sat, 30 Nov 2024 14:14:35 -0800 +Subject: [PATCH 2/2] core: Set /proc/pid/setgroups to allow for + PrivateUsers=full + +When trying to run dbus-broker in a systemd unit with PrivateUsers=full, +we see dbus-broker fails with EPERM at `util_audit_drop_permissions`. + +The root cause is dbus-broker calls the setgroups() system call and this +is disallowed via systemd's implementation of PrivateUsers= by setting +/proc/pid/setgroups = deny. This is done to remediate potential privilege +escalation vulnerabilities in user namespaces where an attacker can remove +supplementary groups and gain access to resources where those groups are +restricted. + +However, for OS-like containers, setgroups() is a pretty common API and +disabling it is not feasible. So we allow setgroups() by setting +/proc/pid/setgroups to allow in PrivateUsers=full. Note security conscious +users can still use SystemCallFilter= to disable setgroups() if they want +to specifically prevent this system call. + +Fixes: #35425 +--- + man/systemd.exec.xml | 7 +++++-- + src/core/exec-invoke.c | 23 ++++++++++++++++------- + test/units/TEST-07-PID1.private-users.sh | 3 +++ + 3 files changed, 24 insertions(+), 9 deletions(-) + +diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml +index 482dbbda80a84..b31e64f57c844 100644 +--- a/man/systemd.exec.xml ++++ b/man/systemd.exec.xml +@@ -2027,8 +2027,11 @@ BindReadOnlyPaths=/var/lib/systemd + often a good choice if proper user namespacing with distinct UID maps is not appropriate. + + If the parameter is full, user namespacing is set up with an identity +- mapping for all UIDs/GIDs. Similar to identity, this does not provide UID/GID +- isolation, but it does provide process capability isolation. ++ mapping for all UIDs/GIDs. In addition, for system services, full allows the unit ++ to call setgroups() system calls (by setting ++ /proc/pid/setgroups to allow). ++ Similar to identity, this does not provide UID/GID isolation, but it does provide ++ process capability isolation. + + If this mode is enabled, all unit processes are run without privileges in the host user + namespace (regardless if the unit's own user/group is root or not). Specifically +diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c +index 682d6449d76f3..8305bb2bcf7da 100644 +--- a/src/core/exec-invoke.c ++++ b/src/core/exec-invoke.c +@@ -2077,7 +2077,7 @@ static int build_pass_environment(const ExecContext *c, char ***ret) { + return 0; + } + +-static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogid, uid_t uid, gid_t gid) { ++static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogid, uid_t uid, gid_t gid, bool allow_setgroups) { + _cleanup_free_ char *uid_map = NULL, *gid_map = NULL; + _cleanup_close_pair_ int errno_pipe[2] = EBADF_PAIR; + _cleanup_close_ int unshare_ready_fd = -EBADF; +@@ -2196,7 +2196,8 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi + if (read(unshare_ready_fd, &c, sizeof(c)) < 0) + report_errno_and_exit(errno_pipe[1], -errno); + +- /* Disable the setgroups() system call in the child user namespace, for good. */ ++ /* Disable the setgroups() system call in the child user namespace, for good, unless PrivateUsers=full ++ * and using the system service manager. */ + a = procfs_file_alloca(ppid, "setgroups"); + fd = open(a, O_WRONLY|O_CLOEXEC); + if (fd < 0) { +@@ -2207,10 +2208,15 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi + + /* If the file is missing the kernel is too old, let's continue anyway. */ + } else { +- if (write(fd, "deny\n", 5) < 0) { +- r = log_debug_errno(errno, "Failed to write \"deny\" to %s: %m", a); +- report_errno_and_exit(errno_pipe[1], r); ++ if (allow_setgroups) { ++ if (write(fd, "allow\n", 6) < 0) ++ r = log_debug_errno(errno, "Failed to write \"allow\" to %s: %m", a); ++ } else { ++ if (write(fd, "deny\n", 5) < 0) ++ r = log_debug_errno(errno, "Failed to write \"deny\" to %s: %m", a); + } ++ if (r < 0) ++ report_errno_and_exit(errno_pipe[1], r); + + fd = safe_close(fd); + } +@@ -5007,7 +5013,9 @@ int exec_invoke( + if (pu == PRIVATE_USERS_NO) + pu = PRIVATE_USERS_SELF; + +- r = setup_private_users(pu, saved_uid, saved_gid, uid, gid); ++ /* The kernel requires /proc/pid/setgroups be set to "deny" prior to writing /proc/pid/gid_map in ++ * unprivileged user namespaces. */ ++ r = setup_private_users(pu, saved_uid, saved_gid, uid, gid, /* allow_setgroups= */ false); + /* If it was requested explicitly and we can't set it up, fail early. Otherwise, continue and let + * the actual requested operations fail (or silently continue). */ + if (r < 0 && context->private_users != PRIVATE_USERS_NO) { +@@ -5177,7 +5185,8 @@ int exec_invoke( + * different user namespace). */ + + if (needs_sandboxing && !userns_set_up) { +- r = setup_private_users(context->private_users, saved_uid, saved_gid, uid, gid); ++ r = setup_private_users(context->private_users, saved_uid, saved_gid, uid, gid, ++ /* allow_setgroups= */ context->private_users == PRIVATE_USERS_FULL); + if (r < 0) { + *exit_status = EXIT_USER; + return log_exec_error_errno(context, params, r, "Failed to set up user namespacing: %m"); +diff --git a/test/units/TEST-07-PID1.private-users.sh b/test/units/TEST-07-PID1.private-users.sh +index ba85248f9607e..e788f52a2f73f 100755 +--- a/test/units/TEST-07-PID1.private-users.sh ++++ b/test/units/TEST-07-PID1.private-users.sh +@@ -6,9 +6,12 @@ set -o pipefail + + systemd-run -p PrivateUsers=yes --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 1"' + systemd-run -p PrivateUsers=yes --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"' ++systemd-run -p PrivateUsersEx=yes --wait bash -c 'test "$(cat /proc/self/setgroups)" == "deny"' + systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 1"' + systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"' ++systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/setgroups)" == "deny"' + systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 65536"' + systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 65536"' + systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/uid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"' + systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/gid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"' ++systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/setgroups)" == "allow"' diff --git a/35447.patch b/35447.patch new file mode 100644 index 0000000..8513e47 --- /dev/null +++ b/35447.patch @@ -0,0 +1,679 @@ +From 186eb0d3dc17b700a7709ebb23012ed9e3e41d6a Mon Sep 17 00:00:00 2001 +From: Ryan Wilson +Date: Mon, 2 Dec 2024 07:38:06 -0800 +Subject: [PATCH 1/2] core: Migrate ProtectHostname to use enum vs boolean + +Migrating ProtectHostname to enum will set the stage for adding more +properties like ProtectHostname=private in future commits. + +In addition, we add PrivateHostnameEx property to dbus API which uses +string instead of boolean. +--- + man/org.freedesktop.systemd1.xml | 34 +++++++++++---- + src/core/dbus-execute.c | 59 +++++++++++++++++++++++++-- + src/core/exec-invoke.c | 8 ++-- + src/core/execute-serialize.c | 9 ++-- + src/core/execute.c | 2 +- + src/core/execute.h | 2 +- + src/core/load-fragment-gperf.gperf.in | 2 +- + src/core/load-fragment.c | 1 + + src/core/load-fragment.h | 1 + + src/core/namespace.c | 13 ++++-- + src/core/namespace.h | 10 +++++ + src/shared/bus-unit-util.c | 1 + + 12 files changed, 115 insertions(+), 27 deletions(-) + +diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml +index 9cd6a69311a97..d196f4767cea2 100644 +--- a/man/org.freedesktop.systemd1.xml ++++ b/man/org.freedesktop.systemd1.xml +@@ -3359,6 +3359,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b ProtectHostname = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") ++ readonly s ProtectHostnameEx = '...'; ++ @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b MemoryKSM = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s NetworkNamespacePath = '...'; +@@ -3958,8 +3960,6 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + + +- +- + + + +@@ -4682,6 +4682,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + + ++ ++ + + + +@@ -4879,6 +4881,12 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + unit file setting PrivatePIDs= listed in + systemd.exec5. + Note PrivatePIDs is a string type to allow adding more values in the future. ++ ++ ProtectHostnameEx implement the destination parameter of the ++ unit file setting ProtectHostname= listed in ++ systemd.exec5. ++ Unlike boolean ProtectHostname, ProtectHostnameEx ++ is a string type. + + + +@@ -5544,6 +5552,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b ProtectHostname = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") ++ readonly s ProtectHostnameEx = '...'; ++ @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b MemoryKSM = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s NetworkNamespacePath = '...'; +@@ -6155,8 +6165,6 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + +- +- + + + +@@ -6851,6 +6859,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + + ++ ++ + + + +@@ -7551,6 +7561,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b ProtectHostname = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") ++ readonly s ProtectHostnameEx = '...'; ++ @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b MemoryKSM = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s NetworkNamespacePath = '...'; +@@ -8088,8 +8100,6 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + + +- +- + + + +@@ -8696,6 +8706,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + + ++ ++ + + + +@@ -9525,6 +9537,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b ProtectHostname = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") ++ readonly s ProtectHostnameEx = '...'; ++ @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b MemoryKSM = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s NetworkNamespacePath = '...'; +@@ -10048,8 +10062,6 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + + +- +- + + + +@@ -10642,6 +10654,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + + ++ ++ + + + +@@ -12305,6 +12319,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ + ProtectControlGroupsEx, + PrivateUsersEx, and + PrivatePIDs were added in version 257. ++ ProtectHostnameEx was added in version 258. + + + Socket Unit Objects +@@ -12348,6 +12363,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ + ManagedOOMMemoryPressureDurationUSec, + ProtectControlGroupsEx, and + PrivatePIDs were added in version 257. ++ ProtectHostnameEx was added in version 258. + + + Mount Unit Objects +@@ -12388,6 +12404,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ + ManagedOOMMemoryPressureDurationUSec, + ProtectControlGroupsEx, and + PrivatePIDs were added in version 257. ++ ProtectHostnameEx was added in version 258. + + + Swap Unit Objects +@@ -12428,6 +12445,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ + ManagedOOMMemoryPressureDurationUSec, + ProtectControlGroupsEx, and + PrivatePIDs were added in version 257. ++ ProtectHostnameEx was added in version 258. + + + Slice Unit Objects +diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c +index e297323f1d3e7..bfd6694683cf1 100644 +--- a/src/core/dbus-execute.c ++++ b/src/core/dbus-execute.c +@@ -64,6 +64,7 @@ static BUS_DEFINE_PROPERTY_GET_REF(property_get_private_tmp_ex, "s", PrivateTmp, + static BUS_DEFINE_PROPERTY_GET_REF(property_get_private_users_ex, "s", PrivateUsers, private_users_to_string); + static BUS_DEFINE_PROPERTY_GET_REF(property_get_protect_control_groups_ex, "s", ProtectControlGroups, protect_control_groups_to_string); + static BUS_DEFINE_PROPERTY_GET_REF(property_get_private_pids, "s", PrivatePIDs, private_pids_to_string); ++static BUS_DEFINE_PROPERTY_GET_REF(property_get_protect_hostname_ex, "s", ProtectHostname, protect_hostname_to_string); + static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_level, "i", int, LOG_PRI); + static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_facility, "i", int, LOG_FAC); + static BUS_DEFINE_PROPERTY_GET(property_get_cpu_affinity_from_numa, "b", ExecContext, exec_context_get_cpu_affinity_from_numa); +@@ -1068,6 +1069,21 @@ static int property_get_protect_control_groups( + return sd_bus_message_append_basic(reply, 'b', &b); + } + ++static int property_get_protect_hostname( ++ sd_bus *bus, ++ const char *path, ++ const char *interface, ++ const char *property, ++ sd_bus_message *reply, ++ void *userdata, ++ sd_bus_error *error) { ++ ++ ProtectHostname *p = ASSERT_PTR(userdata); ++ int b = *p != PROTECT_HOSTNAME_NO; ++ ++ return sd_bus_message_append_basic(reply, 'b', &b); ++} ++ + const sd_bus_vtable bus_exec_vtable[] = { + SD_BUS_VTABLE_START(0), + SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST), +@@ -1242,7 +1258,8 @@ const sd_bus_vtable bus_exec_vtable[] = { + SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectProc", "s", property_get_protect_proc, offsetof(ExecContext, protect_proc), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProcSubset", "s", property_get_proc_subset, offsetof(ExecContext, proc_subset), SD_BUS_VTABLE_PROPERTY_CONST), +- SD_BUS_PROPERTY("ProtectHostname", "b", bus_property_get_bool, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST), ++ SD_BUS_PROPERTY("ProtectHostname", "b", property_get_protect_hostname, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST), ++ SD_BUS_PROPERTY("ProtectHostnameEx", "s", property_get_protect_hostname_ex, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("MemoryKSM", "b", bus_property_get_tristate, offsetof(ExecContext, memory_ksm), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("IPCNamespacePath", "s", NULL, offsetof(ExecContext, ipc_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST), +@@ -1993,6 +2010,43 @@ int bus_exec_context_set_transient_property( + return 1; + } + ++ if (streq(name, "ProtectHostname")) { ++ int v; ++ ++ r = sd_bus_message_read(message, "b", &v); ++ if (r < 0) ++ return r; ++ ++ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { ++ c->protect_hostname = v ? PROTECT_HOSTNAME_YES : PROTECT_HOSTNAME_NO; ++ (void) unit_write_settingf(u, flags, name, "%s=%s", name, yes_no(v)); ++ } ++ ++ return 1; ++ ++ } ++ ++ if (streq(name, "ProtectHostnameEx")) { ++ const char *s; ++ ProtectHostname t; ++ ++ r = sd_bus_message_read(message, "s", &s); ++ if (r < 0) ++ return r; ++ ++ t = protect_hostname_from_string(s); ++ if (t < 0) ++ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid %s setting: %s", name, s); ++ ++ if (!UNIT_WRITE_FLAGS_NOOP(flags)) { ++ c->protect_hostname = t; ++ (void) unit_write_settingf(u, flags, name, "ProtectHostname=%s", ++ protect_hostname_to_string(c->protect_hostname)); ++ } ++ ++ return 1; ++ } ++ + if (streq(name, "PrivateDevices")) + return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error); + +@@ -2053,9 +2107,6 @@ int bus_exec_context_set_transient_property( + if (streq(name, "LockPersonality")) + return bus_set_transient_bool(u, name, &c->lock_personality, message, flags, error); + +- if (streq(name, "ProtectHostname")) +- return bus_set_transient_bool(u, name, &c->protect_hostname, message, flags, error); +- + if (streq(name, "MemoryKSM")) + return bus_set_transient_tristate(u, name, &c->memory_ksm, message, flags, error); + +diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c +index 9d636f552950d..f4aacb55b22bd 100644 +--- a/src/core/exec-invoke.c ++++ b/src/core/exec-invoke.c +@@ -1341,7 +1341,7 @@ static bool context_has_seccomp(const ExecContext *c) { + c->memory_deny_write_execute || + c->private_devices || + c->protect_clock || +- c->protect_hostname || ++ c->protect_hostname == PROTECT_HOSTNAME_YES || + c->protect_kernel_tunables || + c->protect_kernel_modules || + c->protect_kernel_logs || +@@ -1701,7 +1701,7 @@ static int apply_protect_hostname(const ExecContext *c, const ExecParameters *p, + assert(c); + assert(p); + +- if (!c->protect_hostname) ++ if (c->protect_hostname == PROTECT_HOSTNAME_NO) + return 0; + + if (ns_type_supported(NAMESPACE_UTS)) { +@@ -3417,7 +3417,7 @@ static int apply_mount_namespace( + .protect_kernel_tunables = needs_sandboxing && context->protect_kernel_tunables, + .protect_kernel_modules = needs_sandboxing && context->protect_kernel_modules, + .protect_kernel_logs = needs_sandboxing && context->protect_kernel_logs, +- .protect_hostname = needs_sandboxing && context->protect_hostname, ++ .protect_hostname = needs_sandboxing && context->protect_hostname == PROTECT_HOSTNAME_YES, + + .private_dev = needs_sandboxing && context->private_devices, + .private_network = needs_sandboxing && exec_needs_network_namespace(context), +@@ -4055,7 +4055,7 @@ static bool exec_context_need_unprivileged_private_users( + context->protect_kernel_logs || + exec_needs_cgroup_mount(context, params) || + context->protect_clock || +- context->protect_hostname || ++ context->protect_hostname != PROTECT_HOSTNAME_NO || + !strv_isempty(context->read_write_paths) || + !strv_isempty(context->read_only_paths) || + !strv_isempty(context->inaccessible_paths) || +diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c +index bf6592faedcd2..9dce5a9c2587e 100644 +--- a/src/core/execute-serialize.c ++++ b/src/core/execute-serialize.c +@@ -1978,7 +1978,7 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) { + if (r < 0) + return r; + +- r = serialize_bool_elide(f, "exec-context-protect-hostname", c->protect_hostname); ++ r = serialize_item(f, "exec-context-protect-hostname", protect_hostname_to_string(c->protect_hostname)); + if (r < 0) + return r; + +@@ -2881,10 +2881,9 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) { + if (c->keyring_mode < 0) + return -EINVAL; + } else if ((val = startswith(l, "exec-context-protect-hostname="))) { +- r = parse_boolean(val); +- if (r < 0) +- return r; +- c->protect_hostname = r; ++ c->protect_hostname = protect_hostname_from_string(val); ++ if (c->protect_hostname < 0) ++ return -EINVAL; + } else if ((val = startswith(l, "exec-context-protect-proc="))) { + c->protect_proc = protect_proc_from_string(val); + if (c->protect_proc < 0) +diff --git a/src/core/execute.c b/src/core/execute.c +index 3d55b0b772ece..40ab0ad1c53a9 100644 +--- a/src/core/execute.c ++++ b/src/core/execute.c +@@ -1071,7 +1071,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { + prefix, yes_no(c->restrict_realtime), + prefix, yes_no(c->restrict_suid_sgid), + prefix, exec_keyring_mode_to_string(c->keyring_mode), +- prefix, yes_no(c->protect_hostname), ++ prefix, protect_hostname_to_string(c->protect_hostname), + prefix, protect_proc_to_string(c->protect_proc), + prefix, proc_subset_to_string(c->proc_subset)); + +diff --git a/src/core/execute.h b/src/core/execute.h +index 32dabf177f44a..63a56a900cb8c 100644 +--- a/src/core/execute.h ++++ b/src/core/execute.h +@@ -336,7 +336,7 @@ struct ExecContext { + ProtectSystem protect_system; + ProtectHome protect_home; + PrivatePIDs private_pids; +- bool protect_hostname; ++ ProtectHostname protect_hostname; + + bool dynamic_user; + bool remove_ipc; +diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in +index d7564b3767a06..fa12580ae1113 100644 +--- a/src/core/load-fragment-gperf.gperf.in ++++ b/src/core/load-fragment-gperf.gperf.in +@@ -180,7 +180,7 @@ + {% else %} + {{type}}.SmackProcessLabel, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 + {% endif %} +-{{type}}.ProtectHostname, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_hostname) ++{{type}}.ProtectHostname, config_parse_protect_hostname, 0, offsetof({{type}}, exec_context.protect_hostname) + {{type}}.MemoryKSM, config_parse_tristate, 0, offsetof({{type}}, exec_context.memory_ksm) + {%- endmacro -%} + +diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c +index f34c930f4e4e0..a108216a96045 100644 +--- a/src/core/load-fragment.c ++++ b/src/core/load-fragment.c +@@ -141,6 +141,7 @@ DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMo + DEFINE_CONFIG_PARSE_ENUM(config_parse_job_mode, job_mode, JobMode); + DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess); + DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_home, protect_home, ProtectHome); ++DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_hostname, protect_hostname, ProtectHostname); + DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_system, protect_system, ProtectSystem); + DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_preserve_mode, exec_preserve_mode, ExecPreserveMode); + DEFINE_CONFIG_PARSE_ENUM(config_parse_service_type, service_type, ServiceType); +diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h +index 8ac962a94bd14..881ce152d550b 100644 +--- a/src/core/load-fragment.h ++++ b/src/core/load-fragment.h +@@ -119,6 +119,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_protect_control_groups); + CONFIG_PARSER_PROTOTYPE(config_parse_cpu_quota); + CONFIG_PARSER_PROTOTYPE(config_parse_allowed_cpuset); + CONFIG_PARSER_PROTOTYPE(config_parse_protect_home); ++CONFIG_PARSER_PROTOTYPE(config_parse_protect_hostname); + CONFIG_PARSER_PROTOTYPE(config_parse_protect_system); + CONFIG_PARSER_PROTOTYPE(config_parse_bus_name); + CONFIG_PARSER_PROTOTYPE(config_parse_exec_utmp_mode); +diff --git a/src/core/namespace.c b/src/core/namespace.c +index 57dbbc4fc7dc5..c327c9a3ca488 100644 +--- a/src/core/namespace.c ++++ b/src/core/namespace.c +@@ -250,7 +250,7 @@ static const MountEntry protect_system_strict_table[] = { + }; + + /* ProtectHostname=yes able */ +-static const MountEntry protect_hostname_table[] = { ++static const MountEntry protect_hostname_yes_table[] = { + { "/proc/sys/kernel/hostname", MOUNT_READ_ONLY, false }, + { "/proc/sys/kernel/domainname", MOUNT_READ_ONLY, false }, + }; +@@ -2642,8 +2642,8 @@ int setup_namespace(const NamespaceParameters *p, char **reterr_path) { + if (p->protect_hostname) { + r = append_static_mounts( + &ml, +- protect_hostname_table, +- ELEMENTSOF(protect_hostname_table), ++ protect_hostname_yes_table, ++ ELEMENTSOF(protect_hostname_yes_table), + ignore_protect_proc); + if (r < 0) + return r; +@@ -3305,6 +3305,13 @@ static const char *const protect_home_table[_PROTECT_HOME_MAX] = { + + DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_home, ProtectHome, PROTECT_HOME_YES); + ++static const char *const protect_hostname_table[_PROTECT_HOSTNAME_MAX] = { ++ [PROTECT_HOSTNAME_NO] = "no", ++ [PROTECT_HOSTNAME_YES] = "yes", ++}; ++ ++DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_hostname, ProtectHostname, PROTECT_HOSTNAME_YES); ++ + static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = { + [PROTECT_SYSTEM_NO] = "no", + [PROTECT_SYSTEM_YES] = "yes", +diff --git a/src/core/namespace.h b/src/core/namespace.h +index bd48aa31da71c..8df91e3bdf906 100644 +--- a/src/core/namespace.h ++++ b/src/core/namespace.h +@@ -28,6 +28,13 @@ typedef enum ProtectHome { + _PROTECT_HOME_INVALID = -EINVAL, + } ProtectHome; + ++typedef enum ProtectHostname { ++ PROTECT_HOSTNAME_NO, ++ PROTECT_HOSTNAME_YES, ++ _PROTECT_HOSTNAME_MAX, ++ _PROTECT_HOSTNAME_INVALID = -EINVAL, ++} ProtectHostname; ++ + typedef enum ProtectSystem { + PROTECT_SYSTEM_NO, + PROTECT_SYSTEM_YES, +@@ -215,6 +222,9 @@ int open_shareable_ns_path(int netns_storage_socket[static 2], const char *path, + const char* protect_home_to_string(ProtectHome p) _const_; + ProtectHome protect_home_from_string(const char *s) _pure_; + ++const char* protect_hostname_to_string(ProtectHostname p) _const_; ++ProtectHostname protect_hostname_from_string(const char *s) _pure_; ++ + const char* protect_system_to_string(ProtectSystem p) _const_; + ProtectSystem protect_system_from_string(const char *s) _pure_; + +diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c +index 06bfb90c8fa5d..4e623036d0353 100644 +--- a/src/shared/bus-unit-util.c ++++ b/src/shared/bus-unit-util.c +@@ -1045,6 +1045,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con + "SyslogIdentifier", + "ProtectSystem", + "ProtectHome", ++ "ProtectHostnameEx", + "PrivateTmpEx", + "PrivateUsersEx", + "ProtectControlGroupsEx", + +From 0ca5c9a361732b6b43a8ee9d981539aa24d83623 Mon Sep 17 00:00:00 2001 +From: Ryan Wilson +Date: Mon, 2 Dec 2024 08:10:05 -0800 +Subject: [PATCH 2/2] core: Add ProtectHostname=private + +This allows an option for systemd exec units to enable UTS namespaces +but not restrict changing hostname via seccomp. Thus, units can change +hostname without affecting the host. + +Fixes: #30348 +--- + man/systemd.exec.xml | 13 +++++- + mkosi.conf | 1 + + src/core/exec-invoke.c | 19 +++++---- + src/core/namespace.c | 1 + + src/core/namespace.h | 1 + + test/TEST-07-PID1/test.sh | 2 +- + test/units/TEST-07-PID1.protect-hostname.sh | 44 +++++++++++++++++++++ + 7 files changed, 71 insertions(+), 10 deletions(-) + create mode 100755 test/units/TEST-07-PID1.protect-hostname.sh + +diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml +index 607c88128ded4..5759874741815 100644 +--- a/man/systemd.exec.xml ++++ b/man/systemd.exec.xml +@@ -2055,8 +2055,11 @@ BindReadOnlyPaths=/var/lib/systemd + + ProtectHostname= + +- Takes a boolean argument. When set, sets up a new UTS namespace for the executed +- processes. In addition, changing hostname or domainname is prevented. Defaults to off. ++ Takes a boolean argument or private. If enabled, sets up a new UTS namespace ++ for the executed processes. If set to a true value, changing hostname or domainname via ++ sethostname() and setdomainname() system calls is prevented. If set to ++ private, changing hostname or domainname is allowed but only affects the unit's UTS namespace. ++ Defaults to off. + + Note that the implementation of this setting might be impossible (for example if UTS namespaces + are not available), and the unit should be written in a way that does not solely rely on this setting +@@ -2066,6 +2069,12 @@ BindReadOnlyPaths=/var/lib/systemd + the system into the service, it is hence not suitable for services that need to take notice of system + hostname changes dynamically. + ++ Note that this option does not prevent changing system hostname via hostnamectl. ++ However, User= and Group= may be used to run as an unprivileged user ++ to disallow changing system hostname. See SetHostname() in ++ org.freedesktop.hostname15 ++ for more details. ++ + + + +diff --git a/mkosi.conf b/mkosi.conf +index 35a19a27aad39..535e2bd79bf43 100644 +--- a/mkosi.conf ++++ b/mkosi.conf +@@ -101,6 +101,7 @@ Packages= + gdb + grep + gzip ++ hostname + jq + kbd + kexec-tools +diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c +index f4aacb55b22bd..fd306f1143125 100644 +--- a/src/core/exec-invoke.c ++++ b/src/core/exec-invoke.c +@@ -1726,15 +1726,17 @@ static int apply_protect_hostname(const ExecContext *c, const ExecParameters *p, + "support UTS namespaces, ignoring namespace setup."); + + #if HAVE_SECCOMP +- int r; ++ if (c->protect_hostname == PROTECT_HOSTNAME_YES) { ++ int r; + +- if (skip_seccomp_unavailable(c, p, "ProtectHostname=")) +- return 0; ++ if (skip_seccomp_unavailable(c, p, "ProtectHostname=")) ++ return 0; + +- r = seccomp_protect_hostname(); +- if (r < 0) { +- *ret_exit_status = EXIT_SECCOMP; +- return log_exec_error_errno(c, p, r, "Failed to apply hostname restrictions: %m"); ++ r = seccomp_protect_hostname(); ++ if (r < 0) { ++ *ret_exit_status = EXIT_SECCOMP; ++ return log_exec_error_errno(c, p, r, "Failed to apply hostname restrictions: %m"); ++ } + } + #endif + +@@ -3417,6 +3419,9 @@ static int apply_mount_namespace( + .protect_kernel_tunables = needs_sandboxing && context->protect_kernel_tunables, + .protect_kernel_modules = needs_sandboxing && context->protect_kernel_modules, + .protect_kernel_logs = needs_sandboxing && context->protect_kernel_logs, ++ /* Only mount /proc/sys/kernel/hostname and domainname read-only if ProtectHostname=yes. Otherwise, ProtectHostname=no ++ * allows changing hostname for the host and ProtectHostname=private allows changing the hostname in the unit's UTS ++ * namespace. */ + .protect_hostname = needs_sandboxing && context->protect_hostname == PROTECT_HOSTNAME_YES, + + .private_dev = needs_sandboxing && context->private_devices, +diff --git a/src/core/namespace.c b/src/core/namespace.c +index c327c9a3ca488..2f3b8f03d1308 100644 +--- a/src/core/namespace.c ++++ b/src/core/namespace.c +@@ -3308,6 +3308,7 @@ DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_home, ProtectHome, PROTECT_HOME_ + static const char *const protect_hostname_table[_PROTECT_HOSTNAME_MAX] = { + [PROTECT_HOSTNAME_NO] = "no", + [PROTECT_HOSTNAME_YES] = "yes", ++ [PROTECT_HOSTNAME_PRIVATE] = "private", + }; + + DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_hostname, ProtectHostname, PROTECT_HOSTNAME_YES); +diff --git a/src/core/namespace.h b/src/core/namespace.h +index 8df91e3bdf906..96f62be30a269 100644 +--- a/src/core/namespace.h ++++ b/src/core/namespace.h +@@ -31,6 +31,7 @@ typedef enum ProtectHome { + typedef enum ProtectHostname { + PROTECT_HOSTNAME_NO, + PROTECT_HOSTNAME_YES, ++ PROTECT_HOSTNAME_PRIVATE, + _PROTECT_HOSTNAME_MAX, + _PROTECT_HOSTNAME_INVALID = -EINVAL, + } ProtectHostname; +diff --git a/test/TEST-07-PID1/test.sh b/test/TEST-07-PID1/test.sh +index 66e1b684ea8a3..8e8a799a7150c 100755 +--- a/test/TEST-07-PID1/test.sh ++++ b/test/TEST-07-PID1/test.sh +@@ -13,7 +13,7 @@ TEST_INSTALL_VERITY_MINIMAL=1 + . "${TEST_BASE_DIR:?}/test-functions" + + test_append_files() { +- image_install logger socat ++ image_install logger socat hostname + inst_binary mksquashfs + inst_binary unsquashfs + install_verity_minimal +diff --git a/test/units/TEST-07-PID1.protect-hostname.sh b/test/units/TEST-07-PID1.protect-hostname.sh +new file mode 100755 +index 0000000000000..c2ede395535f5 +--- /dev/null ++++ b/test/units/TEST-07-PID1.protect-hostname.sh +@@ -0,0 +1,44 @@ ++#!/usr/bin/env bash ++# SPDX-License-Identifier: LGPL-2.1-or-later ++# shellcheck disable=SC2016 ++set -eux ++set -o pipefail ++ ++# shellcheck source=test/units/test-control.sh ++. "$(dirname "$0")"/test-control.sh ++# shellcheck source=test/units/util.sh ++. "$(dirname "$0")"/util.sh ++ ++LEGACY_HOSTNAME="$(hostname)" ++HOSTNAME_FROM_SYSTEMD="$(hostnamectl hostname)" ++ ++testcase_yes() { ++ # hostnamectl calls SetHostname method via dbus socket which executes in homenamed ++ # in the init namespace. So hostnamectl is not affected by ProtectHostname=yes or ++ # private since sethostname() system call is executed in the init namespace. ++ # ++ # hostnamed does authentication based on UID via polkit so this guarantees admins ++ # can only set hostname. ++ (! systemd-run --wait -p ProtectHostname=yes hostname foo) ++ ++ systemd-run --wait -p ProtectHostname=yes -p PrivateMounts=yes \ ++ findmnt --mountpoint /proc/sys/kernel/hostname ++} ++ ++testcase_private() { ++ systemd-run --wait -p ProtectHostnameEx=private \ ++ -P bash -xec ' ++ hostname foo ++ test "$(hostname)" = "foo" ++ ' ++ ++ # Verify host hostname is unchanged. ++ test "$(hostname)" = "$LEGACY_HOSTNAME" ++ test "$(hostnamectl hostname)" = "$HOSTNAME_FROM_SYSTEMD" ++ ++ # Verify /proc/sys/kernel/hostname is not bind mounted from host read-only. ++ (! systemd-run --wait -p ProtectHostnameEx=private -p PrivateMounts=yes \ ++ findmnt --mountpoint /proc/sys/kernel/hostname) ++} ++ ++run_testcases diff --git a/systemd.spec b/systemd.spec index f2067c6..828ba91 100644 --- a/systemd.spec +++ b/systemd.spec @@ -160,6 +160,22 @@ Patch0906: 0001-networkctl-Make-lldp-status-backwards-compatible-wit.patch # Revert "network/lldp: do not save LLDP neighbors under /run/systemd" Patch0907: 0001-Revert-network-lldp-do-not-save-LLDP-neighbors-under.patch +# Meta specific patches for builds from git main (1001-1100) +# TODO: These should be removed once they are either merged into git main +# or upstreamed +%if %{with upstream} + +# core: Add ProtectHostname=private +Patch1001: https://github.com/systemd/systemd/pull/35447.patch + +# core: Add PrivateUsers=full +Patch1002: https://github.com/systemd/systemd/pull/35183.patch + +# Temporary workaround: PrivateUsers=full implies DelegateNamespaces=yes +Patch1003: 0001-Temporary-workaround-PrivateUsers-full-implies-Deleg.patch + +%endif + %endif %ifarch %{ix86} x86_64 aarch64 riscv64 @@ -722,6 +738,29 @@ library or other libraries from systemd-libs. This package conflicts with the main systemd package and is meant for use in exitrds. %prep +%if 0%{?facebook} && %{with upstream} + +# Call autosetup but disable patch management, we'll do that with autopatch below +%if %{defined branch} +%autosetup -N -n %{name}-%{branch} +%elif %{defined commit} +%autosetup -N -n %{name}-%{commit} +%else +%autosetup -N -n %{name}-%{version_no_tilde} +%endif + +# systemd-cd build defines autopatch as true to disable autopatch so undo this +# https://gitlab.com/CentOS/Hyperscale/releng/systemd-releng +%if 0%{?autopatch} +%undefine autopatch +%endif + +# Now only install only patches in the specific Meta-only range +%autopatch -m 1001 -M 1100 -p1 + +%else + +# Use standard autosetup with automatic patch management %if %{defined branch} %autosetup -n %{name}-%{branch} -p1 %elif %{defined commit} @@ -730,6 +769,8 @@ main systemd package and is meant for use in exitrds. %autosetup -n %{name}-%{version_no_tilde} -p1 %endif +%endif + %build %global ntpvendor %(source /etc/os-release; echo ${ID}) %{!?ntpvendor: echo 'NTP vendor zone is not set!'; exit 1}