6c7d9c
From 5b98412c6f0cb9e63a7c8f795064d2043cc0baaa Mon Sep 17 00:00:00 2001
505ca8
From: Yu Watanabe <watanabe.yu+github@gmail.com>
505ca8
Date: Sun, 6 Dec 2020 22:29:43 +0900
6c7d9c
Subject: [PATCH] core/namespace: use existing /proc when not enough priviledge
505ca8
505ca8
Fixes #17860.
505ca8
---
505ca8
 src/core/namespace.c | 61 ++++++++++++++++++++++++--------------------
505ca8
 1 file changed, 34 insertions(+), 27 deletions(-)
505ca8
505ca8
diff --git a/src/core/namespace.c b/src/core/namespace.c
6c7d9c
index cdf427a6ea9..8560ad9a754 100644
505ca8
--- a/src/core/namespace.c
505ca8
+++ b/src/core/namespace.c
505ca8
@@ -859,25 +859,15 @@ static int mount_sysfs(const MountEntry *m) {
505ca8
 }
505ca8
 
505ca8
 static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
505ca8
+        _cleanup_free_ char *opts = NULL;
505ca8
         const char *entry_path;
505ca8
-        int r;
505ca8
+        int r, n;
505ca8
 
505ca8
         assert(m);
505ca8
         assert(ns_info);
505ca8
 
505ca8
-        entry_path = mount_entry_path(m);
505ca8
-
505ca8
-        /* Mount a new instance, so that we get the one that matches our user namespace, if we are running in
505ca8
-         * one. i.e we don't reuse existing mounts here under any condition, we want a new instance owned by
505ca8
-         * our user namespace and with our hidepid= settings applied. Hence, let's get rid of everything
505ca8
-         * mounted on /proc/ first. */
505ca8
-
505ca8
-        (void) mkdir_p_label(entry_path, 0755);
505ca8
-        (void) umount_recursive(entry_path, 0);
505ca8
-
505ca8
         if (ns_info->protect_proc != PROTECT_PROC_DEFAULT ||
505ca8
             ns_info->proc_subset != PROC_SUBSET_ALL) {
505ca8
-                _cleanup_free_ char *opts = NULL;
505ca8
 
505ca8
                 /* Starting with kernel 5.8 procfs' hidepid= logic is truly per-instance (previously it
505ca8
                  * pretended to be per-instance but actually was per-namespace), hence let's make use of it
505ca8
@@ -891,23 +881,40 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
505ca8
                                ns_info->proc_subset == PROC_SUBSET_PID ? ",subset=pid" : "");
505ca8
                 if (!opts)
505ca8
                         return -ENOMEM;
505ca8
-
505ca8
-                r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
505ca8
-                if (r < 0) {
505ca8
-                        if (r != -EINVAL)
505ca8
-                                return r;
505ca8
-
505ca8
-                        /* If this failed with EINVAL then this likely means the textual hidepid= stuff is
505ca8
-                         * not supported by the kernel, and thus the per-instance hidepid= neither, which
505ca8
-                         * means we really don't want to use it, since it would affect our host's /proc
505ca8
-                         * mount. Hence let's gracefully fallback to a classic, unrestricted version. */
505ca8
-                } else
505ca8
-                        return 1;
505ca8
         }
505ca8
 
505ca8
-        r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
505ca8
-        if (r < 0)
505ca8
-                return r;
505ca8
+        entry_path = mount_entry_path(m);
505ca8
+        (void) mkdir_p_label(entry_path, 0755);
505ca8
+
505ca8
+        /* Mount a new instance, so that we get the one that matches our user namespace, if we are running in
505ca8
+         * one. i.e we don't reuse existing mounts here under any condition, we want a new instance owned by
505ca8
+         * our user namespace and with our hidepid= settings applied. Hence, let's get rid of everything
505ca8
+         * mounted on /proc/ first. */
505ca8
+
505ca8
+        n = umount_recursive(entry_path, 0);
505ca8
+
505ca8
+        r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
505ca8
+        if (r == -EINVAL && opts)
505ca8
+                /* If this failed with EINVAL then this likely means the textual hidepid= stuff is
505ca8
+                 * not supported by the kernel, and thus the per-instance hidepid= neither, which
505ca8
+                 * means we really don't want to use it, since it would affect our host's /proc
505ca8
+                 * mount. Hence let's gracefully fallback to a classic, unrestricted version. */
505ca8
+                r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
505ca8
+        if (r == -EPERM) {
505ca8
+                /* When we do not have enough priviledge to mount /proc, fallback to use existing /proc. */
505ca8
+
505ca8
+                if (n > 0)
505ca8
+                        /* /proc or some of sub-mounts are umounted in the above. Refuse incomplete tree.
505ca8
+                         * Propagate the original error code returned by mount() in the above. */
505ca8
+                        return -EPERM;
505ca8
+
505ca8
+                r = path_is_mount_point(entry_path, NULL, 0);
505ca8
+                if (r < 0)
505ca8
+                        return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m");
505ca8
+                if (r == 0)
505ca8
+                        /* /proc is not mounted. Propagate the original error code. */
505ca8
+                        return -EPERM;
505ca8
+        }
505ca8
 
505ca8
         return 1;
505ca8
 }