|
Ryan Wilson |
61c859 |
From 705cc82938b67fa110f2f6f5d28bfb9ec2f339c0 Mon Sep 17 00:00:00 2001
|
|
Ryan Wilson |
61c859 |
From: Ryan Wilson <ryantimwilson@meta.com>
|
|
Ryan Wilson |
61c859 |
Date: Fri, 15 Nov 2024 06:56:05 -0800
|
|
Ryan Wilson |
61c859 |
Subject: [PATCH 1/2] core: Add PrivateUsers=full
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
Recently, PrivateUsers=identity was added to support mapping the first
|
|
Ryan Wilson |
61c859 |
65536 UIDs/GIDs from parent to the child namespace and mapping the other
|
|
Ryan Wilson |
61c859 |
UID/GIDs to the nobody user.
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
However, there are use cases where users have UIDs/GIDs > 65536 and need
|
|
Ryan Wilson |
61c859 |
to do a similar identity mapping. Moreover, in some of those cases, users
|
|
Ryan Wilson |
61c859 |
want a full identity mapping from 0 -> UID_MAX.
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
Note to differentiate ourselves from the init user namespace, we need to
|
|
Ryan Wilson |
61c859 |
set up the uid_map/gid_map like:
|
|
Ryan Wilson |
61c859 |
```
|
|
Ryan Wilson |
61c859 |
0 0 1
|
|
Ryan Wilson |
61c859 |
1 1 UINT32_MAX - 1
|
|
Ryan Wilson |
61c859 |
```
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
as the init user namedspace uses `0 0 UINT32_MAX` and some applications -
|
|
Ryan Wilson |
61c859 |
like systemd itself - determine if its a non-init user namespace based on
|
|
Ryan Wilson |
61c859 |
uid_map/gid_map files. Note systemd will remove this heuristic in
|
|
Ryan Wilson |
61c859 |
running_in_userns() in version 258 and uses namespace inode. But some users
|
|
Ryan Wilson |
61c859 |
may be running a container image with older systemd < 258 so we keep this
|
|
Ryan Wilson |
61c859 |
hack until version 259.
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
To support this, we add PrivateUsers=full that does identity mapping for
|
|
Ryan Wilson |
61c859 |
all available UID/GIDs.
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
Fixes: #35168
|
|
Ryan Wilson |
61c859 |
---
|
|
Ryan Wilson |
61c859 |
man/systemd.exec.xml | 8 +++++--
|
|
Ryan Wilson |
61c859 |
src/core/exec-invoke.c | 28 ++++++++++++++++++++++++
|
|
Ryan Wilson |
61c859 |
src/core/namespace.c | 1 +
|
|
Ryan Wilson |
61c859 |
src/core/namespace.h | 1 +
|
|
Ryan Wilson |
61c859 |
test/units/TEST-07-PID1.private-users.sh | 2 ++
|
|
Ryan Wilson |
61c859 |
5 files changed, 38 insertions(+), 2 deletions(-)
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
|
|
Ryan Wilson |
61c859 |
index 607c88128ded4..482dbbda80a84 100644
|
|
Ryan Wilson |
61c859 |
--- a/man/systemd.exec.xml
|
|
Ryan Wilson |
61c859 |
+++ b/man/systemd.exec.xml
|
|
Ryan Wilson |
61c859 |
@@ -2009,8 +2009,8 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
|
|
Ryan Wilson |
61c859 |
<varlistentry>
|
|
Ryan Wilson |
61c859 |
<term><varname>PrivateUsers=</varname></term>
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
- <listitem><para>Takes a boolean argument or one of <literal>self</literal> or
|
|
Ryan Wilson |
61c859 |
- <literal>identity</literal>. Defaults to false. If enabled, sets up a new user namespace for the
|
|
Ryan Wilson |
61c859 |
+ <listitem><para>Takes a boolean argument or one of <literal>self</literal>, <literal>identity</literal>,
|
|
Ryan Wilson |
61c859 |
+ or <literal>full</literal>. Defaults to false. If enabled, sets up a new user namespace for the
|
|
Ryan Wilson |
61c859 |
executed processes and configures a user and group mapping. If set to a true value or
|
|
Ryan Wilson |
61c859 |
<literal>self</literal>, a minimal user and group mapping is configured that maps the
|
|
Ryan Wilson |
61c859 |
<literal>root</literal> user and group as well as the unit's own user and group to themselves and
|
|
Ryan Wilson |
61c859 |
@@ -2026,6 +2026,10 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
|
|
Ryan Wilson |
61c859 |
since all UIDs/GIDs are chosen identically it does provide process capability isolation, and hence is
|
|
Ryan Wilson |
61c859 |
often a good choice if proper user namespacing with distinct UID maps is not appropriate.</para>
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
+ <para>If the parameter is <literal>full</literal>, user namespacing is set up with an identity
|
|
Ryan Wilson |
61c859 |
+ mapping for all UIDs/GIDs. Similar to <literal>identity</literal>, this does not provide UID/GID
|
|
Ryan Wilson |
61c859 |
+ isolation, but it does provide process capability isolation.</para>
|
|
Ryan Wilson |
61c859 |
+
|
|
Ryan Wilson |
61c859 |
<para>If this mode is enabled, all unit processes are run without privileges in the host user
|
|
Ryan Wilson |
61c859 |
namespace (regardless if the unit's own user/group is <literal>root</literal> or not). Specifically
|
|
Ryan Wilson |
61c859 |
this means that the process will have zero process capabilities on the host's user namespace, but
|
|
Ryan Wilson |
61c859 |
diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c
|
|
Ryan Wilson |
61c859 |
index 9d636f552950d..682d6449d76f3 100644
|
|
Ryan Wilson |
61c859 |
--- a/src/core/exec-invoke.c
|
|
Ryan Wilson |
61c859 |
+++ b/src/core/exec-invoke.c
|
|
Ryan Wilson |
61c859 |
@@ -2103,6 +2103,29 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
|
|
Ryan Wilson |
61c859 |
uid_map = strdup("0 0 65536\n");
|
|
Ryan Wilson |
61c859 |
if (!uid_map)
|
|
Ryan Wilson |
61c859 |
return -ENOMEM;
|
|
Ryan Wilson |
61c859 |
+ } else if (private_users == PRIVATE_USERS_FULL) {
|
|
Ryan Wilson |
61c859 |
+ /* Map all UID/GID from original to new user namespace. We can't use `0 0 UINT32_MAX` because
|
|
Ryan Wilson |
61c859 |
+ * this is the same UID/GID map as the init user namespace and systemd's running_in_userns()
|
|
Ryan Wilson |
61c859 |
+ * checks whether its in a user namespace by comparing uid_map/gid_map to `0 0 UINT32_MAX`.
|
|
Ryan Wilson |
61c859 |
+ * Thus, we still map all UIDs/GIDs but do it using two extents to differentiate the new user
|
|
Ryan Wilson |
61c859 |
+ * namespace from the init namespace:
|
|
Ryan Wilson |
61c859 |
+ * 0 0 1
|
|
Ryan Wilson |
61c859 |
+ * 1 1 UINT32_MAX - 1
|
|
Ryan Wilson |
61c859 |
+ *
|
|
Ryan Wilson |
61c859 |
+ * systemd will remove the heuristic in running_in_userns() and use namespace inodes in version 258
|
|
Ryan Wilson |
61c859 |
+ * (PR #35382). But some users may be running a container image with older systemd < 258 so we keep
|
|
Ryan Wilson |
61c859 |
+ * this uid_map/gid_map hack until version 259 for version N-1 compatibility.
|
|
Ryan Wilson |
61c859 |
+ *
|
|
Ryan Wilson |
61c859 |
+ * TODO: Switch to `0 0 UINT32_MAX` in systemd v259.
|
|
Ryan Wilson |
61c859 |
+ *
|
|
Ryan Wilson |
61c859 |
+ * Note the kernel defines the UID range between 0 and UINT32_MAX so we map all UIDs even though
|
|
Ryan Wilson |
61c859 |
+ * the UID range beyond INT32_MAX (e.g. i.e. the range above the signed 32-bit range) is
|
|
Ryan Wilson |
61c859 |
+ * icky. For example, setfsuid() returns the old UID as signed integer. But units can decide to
|
|
Ryan Wilson |
61c859 |
+ * use these UIDs/GIDs so we need to map them. */
|
|
Ryan Wilson |
61c859 |
+ r = asprintf(&uid_map, "0 0 1\n"
|
|
Ryan Wilson |
61c859 |
+ "1 1 " UID_FMT "\n", (uid_t) (UINT32_MAX - 1));
|
|
Ryan Wilson |
61c859 |
+ if (r < 0)
|
|
Ryan Wilson |
61c859 |
+ return -ENOMEM;
|
|
Ryan Wilson |
61c859 |
/* Can only set up multiple mappings with CAP_SETUID. */
|
|
Ryan Wilson |
61c859 |
} else if (have_effective_cap(CAP_SETUID) > 0 && uid != ouid && uid_is_valid(uid)) {
|
|
Ryan Wilson |
61c859 |
r = asprintf(&uid_map,
|
|
Ryan Wilson |
61c859 |
@@ -2123,6 +2146,11 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
|
|
Ryan Wilson |
61c859 |
gid_map = strdup("0 0 65536\n");
|
|
Ryan Wilson |
61c859 |
if (!gid_map)
|
|
Ryan Wilson |
61c859 |
return -ENOMEM;
|
|
Ryan Wilson |
61c859 |
+ } else if (private_users == PRIVATE_USERS_FULL) {
|
|
Ryan Wilson |
61c859 |
+ r = asprintf(&gid_map, "0 0 1\n"
|
|
Ryan Wilson |
61c859 |
+ "1 1 " GID_FMT "\n", (gid_t) (UINT32_MAX - 1));
|
|
Ryan Wilson |
61c859 |
+ if (r < 0)
|
|
Ryan Wilson |
61c859 |
+ return -ENOMEM;
|
|
Ryan Wilson |
61c859 |
/* Can only set up multiple mappings with CAP_SETGID. */
|
|
Ryan Wilson |
61c859 |
} else if (have_effective_cap(CAP_SETGID) > 0 && gid != ogid && gid_is_valid(gid)) {
|
|
Ryan Wilson |
61c859 |
r = asprintf(&gid_map,
|
|
Ryan Wilson |
61c859 |
diff --git a/src/core/namespace.c b/src/core/namespace.c
|
|
Ryan Wilson |
61c859 |
index 57dbbc4fc7dc5..c584ea35724d1 100644
|
|
Ryan Wilson |
61c859 |
--- a/src/core/namespace.c
|
|
Ryan Wilson |
61c859 |
+++ b/src/core/namespace.c
|
|
Ryan Wilson |
61c859 |
@@ -3364,6 +3364,7 @@ static const char* const private_users_table[_PRIVATE_USERS_MAX] = {
|
|
Ryan Wilson |
61c859 |
[PRIVATE_USERS_NO] = "no",
|
|
Ryan Wilson |
61c859 |
[PRIVATE_USERS_SELF] = "self",
|
|
Ryan Wilson |
61c859 |
[PRIVATE_USERS_IDENTITY] = "identity",
|
|
Ryan Wilson |
61c859 |
+ [PRIVATE_USERS_FULL] = "full",
|
|
Ryan Wilson |
61c859 |
};
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(private_users, PrivateUsers, PRIVATE_USERS_SELF);
|
|
Ryan Wilson |
61c859 |
diff --git a/src/core/namespace.h b/src/core/namespace.h
|
|
Ryan Wilson |
61c859 |
index bd48aa31da71c..5d466a8c1c724 100644
|
|
Ryan Wilson |
61c859 |
--- a/src/core/namespace.h
|
|
Ryan Wilson |
61c859 |
+++ b/src/core/namespace.h
|
|
Ryan Wilson |
61c859 |
@@ -65,6 +65,7 @@ typedef enum PrivateUsers {
|
|
Ryan Wilson |
61c859 |
PRIVATE_USERS_NO,
|
|
Ryan Wilson |
61c859 |
PRIVATE_USERS_SELF,
|
|
Ryan Wilson |
61c859 |
PRIVATE_USERS_IDENTITY,
|
|
Ryan Wilson |
61c859 |
+ PRIVATE_USERS_FULL,
|
|
Ryan Wilson |
61c859 |
_PRIVATE_USERS_MAX,
|
|
Ryan Wilson |
61c859 |
_PRIVATE_USERS_INVALID = -EINVAL,
|
|
Ryan Wilson |
61c859 |
} PrivateUsers;
|
|
Ryan Wilson |
61c859 |
diff --git a/test/units/TEST-07-PID1.private-users.sh b/test/units/TEST-07-PID1.private-users.sh
|
|
Ryan Wilson |
61c859 |
index 2475b5d365d59..ba85248f9607e 100755
|
|
Ryan Wilson |
61c859 |
--- a/test/units/TEST-07-PID1.private-users.sh
|
|
Ryan Wilson |
61c859 |
+++ b/test/units/TEST-07-PID1.private-users.sh
|
|
Ryan Wilson |
61c859 |
@@ -10,3 +10,5 @@ systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/uid_ma
|
|
Ryan Wilson |
61c859 |
systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"'
|
|
Ryan Wilson |
61c859 |
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 65536"'
|
|
Ryan Wilson |
61c859 |
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 65536"'
|
|
Ryan Wilson |
61c859 |
+systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/uid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'
|
|
Ryan Wilson |
61c859 |
+systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/gid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
From 878e86f12b7184a87a9cc1ecd4f99c5d9744f931 Mon Sep 17 00:00:00 2001
|
|
Ryan Wilson |
61c859 |
From: Ryan Wilson <ryantimwilson@meta.com>
|
|
Ryan Wilson |
61c859 |
Date: Sat, 30 Nov 2024 14:14:35 -0800
|
|
Ryan Wilson |
61c859 |
Subject: [PATCH 2/2] core: Set /proc/pid/setgroups to allow for
|
|
Ryan Wilson |
61c859 |
PrivateUsers=full
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
When trying to run dbus-broker in a systemd unit with PrivateUsers=full,
|
|
Ryan Wilson |
61c859 |
we see dbus-broker fails with EPERM at `util_audit_drop_permissions`.
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
The root cause is dbus-broker calls the setgroups() system call and this
|
|
Ryan Wilson |
61c859 |
is disallowed via systemd's implementation of PrivateUsers= by setting
|
|
Ryan Wilson |
61c859 |
/proc/pid/setgroups = deny. This is done to remediate potential privilege
|
|
Ryan Wilson |
61c859 |
escalation vulnerabilities in user namespaces where an attacker can remove
|
|
Ryan Wilson |
61c859 |
supplementary groups and gain access to resources where those groups are
|
|
Ryan Wilson |
61c859 |
restricted.
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
However, for OS-like containers, setgroups() is a pretty common API and
|
|
Ryan Wilson |
61c859 |
disabling it is not feasible. So we allow setgroups() by setting
|
|
Ryan Wilson |
61c859 |
/proc/pid/setgroups to allow in PrivateUsers=full. Note security conscious
|
|
Ryan Wilson |
61c859 |
users can still use SystemCallFilter= to disable setgroups() if they want
|
|
Ryan Wilson |
61c859 |
to specifically prevent this system call.
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
Fixes: #35425
|
|
Ryan Wilson |
61c859 |
---
|
|
Ryan Wilson |
61c859 |
man/systemd.exec.xml | 7 +++++--
|
|
Ryan Wilson |
61c859 |
src/core/exec-invoke.c | 23 ++++++++++++++++-------
|
|
Ryan Wilson |
61c859 |
test/units/TEST-07-PID1.private-users.sh | 3 +++
|
|
Ryan Wilson |
61c859 |
3 files changed, 24 insertions(+), 9 deletions(-)
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
|
|
Ryan Wilson |
61c859 |
index 482dbbda80a84..b31e64f57c844 100644
|
|
Ryan Wilson |
61c859 |
--- a/man/systemd.exec.xml
|
|
Ryan Wilson |
61c859 |
+++ b/man/systemd.exec.xml
|
|
Ryan Wilson |
61c859 |
@@ -2027,8 +2027,11 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
|
|
Ryan Wilson |
61c859 |
often a good choice if proper user namespacing with distinct UID maps is not appropriate.</para>
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
<para>If the parameter is <literal>full</literal>, user namespacing is set up with an identity
|
|
Ryan Wilson |
61c859 |
- mapping for all UIDs/GIDs. Similar to <literal>identity</literal>, this does not provide UID/GID
|
|
Ryan Wilson |
61c859 |
- isolation, but it does provide process capability isolation.</para>
|
|
Ryan Wilson |
61c859 |
+ mapping for all UIDs/GIDs. In addition, for system services, <literal>full</literal> allows the unit
|
|
Ryan Wilson |
61c859 |
+ to call <function>setgroups()</function> system calls (by setting
|
|
Ryan Wilson |
61c859 |
+ <filename>/proc/<replaceable>pid</replaceable>/setgroups</filename> to <literal>allow</literal>).
|
|
Ryan Wilson |
61c859 |
+ Similar to <literal>identity</literal>, this does not provide UID/GID isolation, but it does provide
|
|
Ryan Wilson |
61c859 |
+ process capability isolation.</para>
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
<para>If this mode is enabled, all unit processes are run without privileges in the host user
|
|
Ryan Wilson |
61c859 |
namespace (regardless if the unit's own user/group is <literal>root</literal> or not). Specifically
|
|
Ryan Wilson |
61c859 |
diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c
|
|
Ryan Wilson |
61c859 |
index 682d6449d76f3..8305bb2bcf7da 100644
|
|
Ryan Wilson |
61c859 |
--- a/src/core/exec-invoke.c
|
|
Ryan Wilson |
61c859 |
+++ b/src/core/exec-invoke.c
|
|
Ryan Wilson |
61c859 |
@@ -2077,7 +2077,7 @@ static int build_pass_environment(const ExecContext *c, char ***ret) {
|
|
Ryan Wilson |
61c859 |
return 0;
|
|
Ryan Wilson |
61c859 |
}
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
-static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogid, uid_t uid, gid_t gid) {
|
|
Ryan Wilson |
61c859 |
+static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogid, uid_t uid, gid_t gid, bool allow_setgroups) {
|
|
Ryan Wilson |
61c859 |
_cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
|
|
Ryan Wilson |
61c859 |
_cleanup_close_pair_ int errno_pipe[2] = EBADF_PAIR;
|
|
Ryan Wilson |
61c859 |
_cleanup_close_ int unshare_ready_fd = -EBADF;
|
|
Ryan Wilson |
61c859 |
@@ -2196,7 +2196,8 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
|
|
Ryan Wilson |
61c859 |
if (read(unshare_ready_fd, &c, sizeof(c)) < 0)
|
|
Ryan Wilson |
61c859 |
report_errno_and_exit(errno_pipe[1], -errno);
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
- /* Disable the setgroups() system call in the child user namespace, for good. */
|
|
Ryan Wilson |
61c859 |
+ /* Disable the setgroups() system call in the child user namespace, for good, unless PrivateUsers=full
|
|
Ryan Wilson |
61c859 |
+ * and using the system service manager. */
|
|
Ryan Wilson |
61c859 |
a = procfs_file_alloca(ppid, "setgroups");
|
|
Ryan Wilson |
61c859 |
fd = open(a, O_WRONLY|O_CLOEXEC);
|
|
Ryan Wilson |
61c859 |
if (fd < 0) {
|
|
Ryan Wilson |
61c859 |
@@ -2207,10 +2208,15 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
/* If the file is missing the kernel is too old, let's continue anyway. */
|
|
Ryan Wilson |
61c859 |
} else {
|
|
Ryan Wilson |
61c859 |
- if (write(fd, "deny\n", 5) < 0) {
|
|
Ryan Wilson |
61c859 |
- r = log_debug_errno(errno, "Failed to write \"deny\" to %s: %m", a);
|
|
Ryan Wilson |
61c859 |
- report_errno_and_exit(errno_pipe[1], r);
|
|
Ryan Wilson |
61c859 |
+ if (allow_setgroups) {
|
|
Ryan Wilson |
61c859 |
+ if (write(fd, "allow\n", 6) < 0)
|
|
Ryan Wilson |
61c859 |
+ r = log_debug_errno(errno, "Failed to write \"allow\" to %s: %m", a);
|
|
Ryan Wilson |
61c859 |
+ } else {
|
|
Ryan Wilson |
61c859 |
+ if (write(fd, "deny\n", 5) < 0)
|
|
Ryan Wilson |
61c859 |
+ r = log_debug_errno(errno, "Failed to write \"deny\" to %s: %m", a);
|
|
Ryan Wilson |
61c859 |
}
|
|
Ryan Wilson |
61c859 |
+ if (r < 0)
|
|
Ryan Wilson |
61c859 |
+ report_errno_and_exit(errno_pipe[1], r);
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
fd = safe_close(fd);
|
|
Ryan Wilson |
61c859 |
}
|
|
Ryan Wilson |
61c859 |
@@ -5007,7 +5013,9 @@ int exec_invoke(
|
|
Ryan Wilson |
61c859 |
if (pu == PRIVATE_USERS_NO)
|
|
Ryan Wilson |
61c859 |
pu = PRIVATE_USERS_SELF;
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
- r = setup_private_users(pu, saved_uid, saved_gid, uid, gid);
|
|
Ryan Wilson |
61c859 |
+ /* The kernel requires /proc/pid/setgroups be set to "deny" prior to writing /proc/pid/gid_map in
|
|
Ryan Wilson |
61c859 |
+ * unprivileged user namespaces. */
|
|
Ryan Wilson |
61c859 |
+ r = setup_private_users(pu, saved_uid, saved_gid, uid, gid, /* allow_setgroups= */ false);
|
|
Ryan Wilson |
61c859 |
/* If it was requested explicitly and we can't set it up, fail early. Otherwise, continue and let
|
|
Ryan Wilson |
61c859 |
* the actual requested operations fail (or silently continue). */
|
|
Ryan Wilson |
61c859 |
if (r < 0 && context->private_users != PRIVATE_USERS_NO) {
|
|
Ryan Wilson |
61c859 |
@@ -5177,7 +5185,8 @@ int exec_invoke(
|
|
Ryan Wilson |
61c859 |
* different user namespace). */
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
if (needs_sandboxing && !userns_set_up) {
|
|
Ryan Wilson |
61c859 |
- r = setup_private_users(context->private_users, saved_uid, saved_gid, uid, gid);
|
|
Ryan Wilson |
61c859 |
+ r = setup_private_users(context->private_users, saved_uid, saved_gid, uid, gid,
|
|
Ryan Wilson |
61c859 |
+ /* allow_setgroups= */ context->private_users == PRIVATE_USERS_FULL);
|
|
Ryan Wilson |
61c859 |
if (r < 0) {
|
|
Ryan Wilson |
61c859 |
*exit_status = EXIT_USER;
|
|
Ryan Wilson |
61c859 |
return log_exec_error_errno(context, params, r, "Failed to set up user namespacing: %m");
|
|
Ryan Wilson |
61c859 |
diff --git a/test/units/TEST-07-PID1.private-users.sh b/test/units/TEST-07-PID1.private-users.sh
|
|
Ryan Wilson |
61c859 |
index ba85248f9607e..e788f52a2f73f 100755
|
|
Ryan Wilson |
61c859 |
--- a/test/units/TEST-07-PID1.private-users.sh
|
|
Ryan Wilson |
61c859 |
+++ b/test/units/TEST-07-PID1.private-users.sh
|
|
Ryan Wilson |
61c859 |
@@ -6,9 +6,12 @@ set -o pipefail
|
|
Ryan Wilson |
61c859 |
|
|
Ryan Wilson |
61c859 |
systemd-run -p PrivateUsers=yes --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 1"'
|
|
Ryan Wilson |
61c859 |
systemd-run -p PrivateUsers=yes --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"'
|
|
Ryan Wilson |
61c859 |
+systemd-run -p PrivateUsersEx=yes --wait bash -c 'test "$(cat /proc/self/setgroups)" == "deny"'
|
|
Ryan Wilson |
61c859 |
systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 1"'
|
|
Ryan Wilson |
61c859 |
systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"'
|
|
Ryan Wilson |
61c859 |
+systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/setgroups)" == "deny"'
|
|
Ryan Wilson |
61c859 |
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 65536"'
|
|
Ryan Wilson |
61c859 |
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 65536"'
|
|
Ryan Wilson |
61c859 |
systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/uid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'
|
|
Ryan Wilson |
61c859 |
systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/gid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'
|
|
Ryan Wilson |
61c859 |
+systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/setgroups)" == "allow"'
|