From 5b98412c6f0cb9e63a7c8f795064d2043cc0baaa Mon Sep 17 00:00:00 2001
From: Yu Watanabe <watanabe.yu+github@gmail.com>
Date: Sun, 6 Dec 2020 22:29:43 +0900
Subject: [PATCH] core/namespace: use existing /proc when not enough priviledge
Fixes #17860.
---
src/core/namespace.c | 61 ++++++++++++++++++++++++--------------------
1 file changed, 34 insertions(+), 27 deletions(-)
diff --git a/src/core/namespace.c b/src/core/namespace.c
index cdf427a6ea9..8560ad9a754 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -859,25 +859,15 @@ static int mount_sysfs(const MountEntry *m) {
}
static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
+ _cleanup_free_ char *opts = NULL;
const char *entry_path;
- int r;
+ int r, n;
assert(m);
assert(ns_info);
- entry_path = mount_entry_path(m);
-
- /* Mount a new instance, so that we get the one that matches our user namespace, if we are running in
- * one. i.e we don't reuse existing mounts here under any condition, we want a new instance owned by
- * our user namespace and with our hidepid= settings applied. Hence, let's get rid of everything
- * mounted on /proc/ first. */
-
- (void) mkdir_p_label(entry_path, 0755);
- (void) umount_recursive(entry_path, 0);
-
if (ns_info->protect_proc != PROTECT_PROC_DEFAULT ||
ns_info->proc_subset != PROC_SUBSET_ALL) {
- _cleanup_free_ char *opts = NULL;
/* Starting with kernel 5.8 procfs' hidepid= logic is truly per-instance (previously it
* pretended to be per-instance but actually was per-namespace), hence let's make use of it
@@ -891,23 +881,40 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
ns_info->proc_subset == PROC_SUBSET_PID ? ",subset=pid" : "");
if (!opts)
return -ENOMEM;
-
- r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
- if (r < 0) {
- if (r != -EINVAL)
- return r;
-
- /* If this failed with EINVAL then this likely means the textual hidepid= stuff is
- * not supported by the kernel, and thus the per-instance hidepid= neither, which
- * means we really don't want to use it, since it would affect our host's /proc
- * mount. Hence let's gracefully fallback to a classic, unrestricted version. */
- } else
- return 1;
}
- r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
- if (r < 0)
- return r;
+ entry_path = mount_entry_path(m);
+ (void) mkdir_p_label(entry_path, 0755);
+
+ /* Mount a new instance, so that we get the one that matches our user namespace, if we are running in
+ * one. i.e we don't reuse existing mounts here under any condition, we want a new instance owned by
+ * our user namespace and with our hidepid= settings applied. Hence, let's get rid of everything
+ * mounted on /proc/ first. */
+
+ n = umount_recursive(entry_path, 0);
+
+ r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
+ if (r == -EINVAL && opts)
+ /* If this failed with EINVAL then this likely means the textual hidepid= stuff is
+ * not supported by the kernel, and thus the per-instance hidepid= neither, which
+ * means we really don't want to use it, since it would affect our host's /proc
+ * mount. Hence let's gracefully fallback to a classic, unrestricted version. */
+ r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+ if (r == -EPERM) {
+ /* When we do not have enough priviledge to mount /proc, fallback to use existing /proc. */
+
+ if (n > 0)
+ /* /proc or some of sub-mounts are umounted in the above. Refuse incomplete tree.
+ * Propagate the original error code returned by mount() in the above. */
+ return -EPERM;
+
+ r = path_is_mount_point(entry_path, NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m");
+ if (r == 0)
+ /* /proc is not mounted. Propagate the original error code. */
+ return -EPERM;
+ }
return 1;
}