Blob Blame History Raw
From 5b98412c6f0cb9e63a7c8f795064d2043cc0baaa Mon Sep 17 00:00:00 2001
From: Yu Watanabe <watanabe.yu+github@gmail.com>
Date: Sun, 6 Dec 2020 22:29:43 +0900
Subject: [PATCH] core/namespace: use existing /proc when not enough priviledge

Fixes #17860.
---
 src/core/namespace.c | 61 ++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 27 deletions(-)

diff --git a/src/core/namespace.c b/src/core/namespace.c
index cdf427a6ea9..8560ad9a754 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -859,25 +859,15 @@ static int mount_sysfs(const MountEntry *m) {
 }
 
 static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
+        _cleanup_free_ char *opts = NULL;
         const char *entry_path;
-        int r;
+        int r, n;
 
         assert(m);
         assert(ns_info);
 
-        entry_path = mount_entry_path(m);
-
-        /* Mount a new instance, so that we get the one that matches our user namespace, if we are running in
-         * one. i.e we don't reuse existing mounts here under any condition, we want a new instance owned by
-         * our user namespace and with our hidepid= settings applied. Hence, let's get rid of everything
-         * mounted on /proc/ first. */
-
-        (void) mkdir_p_label(entry_path, 0755);
-        (void) umount_recursive(entry_path, 0);
-
         if (ns_info->protect_proc != PROTECT_PROC_DEFAULT ||
             ns_info->proc_subset != PROC_SUBSET_ALL) {
-                _cleanup_free_ char *opts = NULL;
 
                 /* Starting with kernel 5.8 procfs' hidepid= logic is truly per-instance (previously it
                  * pretended to be per-instance but actually was per-namespace), hence let's make use of it
@@ -891,23 +881,40 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
                                ns_info->proc_subset == PROC_SUBSET_PID ? ",subset=pid" : "");
                 if (!opts)
                         return -ENOMEM;
-
-                r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
-                if (r < 0) {
-                        if (r != -EINVAL)
-                                return r;
-
-                        /* If this failed with EINVAL then this likely means the textual hidepid= stuff is
-                         * not supported by the kernel, and thus the per-instance hidepid= neither, which
-                         * means we really don't want to use it, since it would affect our host's /proc
-                         * mount. Hence let's gracefully fallback to a classic, unrestricted version. */
-                } else
-                        return 1;
         }
 
-        r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
-        if (r < 0)
-                return r;
+        entry_path = mount_entry_path(m);
+        (void) mkdir_p_label(entry_path, 0755);
+
+        /* Mount a new instance, so that we get the one that matches our user namespace, if we are running in
+         * one. i.e we don't reuse existing mounts here under any condition, we want a new instance owned by
+         * our user namespace and with our hidepid= settings applied. Hence, let's get rid of everything
+         * mounted on /proc/ first. */
+
+        n = umount_recursive(entry_path, 0);
+
+        r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
+        if (r == -EINVAL && opts)
+                /* If this failed with EINVAL then this likely means the textual hidepid= stuff is
+                 * not supported by the kernel, and thus the per-instance hidepid= neither, which
+                 * means we really don't want to use it, since it would affect our host's /proc
+                 * mount. Hence let's gracefully fallback to a classic, unrestricted version. */
+                r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+        if (r == -EPERM) {
+                /* When we do not have enough priviledge to mount /proc, fallback to use existing /proc. */
+
+                if (n > 0)
+                        /* /proc or some of sub-mounts are umounted in the above. Refuse incomplete tree.
+                         * Propagate the original error code returned by mount() in the above. */
+                        return -EPERM;
+
+                r = path_is_mount_point(entry_path, NULL, 0);
+                if (r < 0)
+                        return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m");
+                if (r == 0)
+                        /* /proc is not mounted. Propagate the original error code. */
+                        return -EPERM;
+        }
 
         return 1;
 }