dd65c9
From e5ac7ba7a16445f3ad23d9931979c20214eae913 Mon Sep 17 00:00:00 2001
dd65c9
From: Jan Synacek <jsynacek@redhat.com>
dd65c9
Date: Thu, 14 Sep 2017 16:27:08 +0200
dd65c9
Subject: [PATCH] path-util: make use of "mnt_id" field exported in
dd65c9
 /proc/self/fdinfo/<fd>
dd65c9
dd65c9
This commit is not a backport of a specific commit. It includes parts of
dd65c9
several upstream commits (3f72b427b44f39a1aec6806dad6f6b57103ae9ed,
dd65c9
5d409034017e9f9f8c4392157d95511fc2e05d87 and others).
dd65c9
dd65c9
The main goal was to bring path_is_mount_point() up to date, which meant
dd65c9
introducing fd_fdinfo_mnt_id() and fd_is_mount_point(). These were
dd65c9
needed mainly because we need to determine mount points based on
dd65c9
/proc/self/fdinfo/<fd> in containers. Also, there are more places in the
dd65c9
code where checks for mount points are performed, which would benefit from
dd65c9
this fix as well. Additionally, corresponding tests has been added.
dd65c9
dd65c9
Resolves: #1472439
dd65c9
---
dd65c9
 src/nspawn/nspawn.c       |   2 +-
23b3cf
 src/shared/path-util.c    | 219 +++++++++++++++++++++++++++++---------
dd65c9
 src/shared/path-util.h    |   1 +
23b3cf
 src/test/test-path-util.c |  62 +++++++++++
dd65c9
 4 files changed, 235 insertions(+), 49 deletions(-)
dd65c9
dd65c9
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
dd65c9
index ea365b3f9..ee2e1832f 100644
dd65c9
--- a/src/nspawn/nspawn.c
dd65c9
+++ b/src/nspawn/nspawn.c
dd65c9
@@ -990,7 +990,7 @@ static int mount_cgroup_hierarchy(const char *dest, const char *controller, cons
dd65c9
         to = strjoina(dest, "/sys/fs/cgroup/", hierarchy);
dd65c9
 
dd65c9
         r = path_is_mount_point(to, false);
dd65c9
-        if (r < 0)
dd65c9
+        if (r < 0 && r != -ENOENT)
dd65c9
                 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
dd65c9
         if (r > 0)
dd65c9
                 return 0;
dd65c9
diff --git a/src/shared/path-util.c b/src/shared/path-util.c
dd65c9
index 1181ffb9d..5d4de9ec4 100644
dd65c9
--- a/src/shared/path-util.c
dd65c9
+++ b/src/shared/path-util.c
dd65c9
@@ -36,6 +36,7 @@
dd65c9
 #include "strv.h"
dd65c9
 #include "path-util.h"
dd65c9
 #include "missing.h"
dd65c9
+#include "fileio.h"
dd65c9
 
dd65c9
 bool path_is_absolute(const char *p) {
dd65c9
         return p[0] == '/';
dd65c9
@@ -473,87 +474,209 @@ char* path_join(const char *root, const char *path, const char *rest) {
dd65c9
                                NULL);
dd65c9
 }
dd65c9
 
dd65c9
-int path_is_mount_point(const char *t, bool allow_symlink) {
dd65c9
+static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
dd65c9
+        char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
dd65c9
+        _cleanup_free_ char *fdinfo = NULL;
dd65c9
+        _cleanup_close_ int subfd = -1;
dd65c9
+        char *p;
dd65c9
+        int r;
dd65c9
+
dd65c9
+        if ((flags & AT_EMPTY_PATH) && isempty(filename))
dd65c9
+                xsprintf(path, "/proc/self/fdinfo/%i", fd);
dd65c9
+        else {
dd65c9
+                subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
dd65c9
+                if (subfd < 0)
dd65c9
+                        return -errno;
dd65c9
 
dd65c9
-        union file_handle_union h = FILE_HANDLE_INIT;
dd65c9
+                xsprintf(path, "/proc/self/fdinfo/%i", subfd);
dd65c9
+        }
dd65c9
+
dd65c9
+        r = read_full_file(path, &fdinfo, NULL);
dd65c9
+        if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
dd65c9
+                return -EOPNOTSUPP;
dd65c9
+        if (r < 0)
dd65c9
+                return -errno;
dd65c9
+
dd65c9
+        p = startswith(fdinfo, "mnt_id:");
dd65c9
+        if (!p) {
dd65c9
+                p = strstr(fdinfo, "\nmnt_id:");
dd65c9
+                if (!p) /* The mnt_id field is a relatively new addition */
dd65c9
+                        return -EOPNOTSUPP;
dd65c9
+
dd65c9
+                p += 8;
dd65c9
+        }
dd65c9
+
dd65c9
+        p += strspn(p, WHITESPACE);
dd65c9
+        p[strcspn(p, WHITESPACE)] = 0;
dd65c9
+
dd65c9
+        return safe_atoi(p, mnt_id);
dd65c9
+}
dd65c9
+
dd65c9
+int fd_is_mount_point(int fd, const char *filename, int flags) {
dd65c9
+        union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
dd65c9
         int mount_id = -1, mount_id_parent = -1;
dd65c9
-        _cleanup_free_ char *parent = NULL;
dd65c9
+        bool nosupp = false, check_st_dev = true;
dd65c9
         struct stat a, b;
dd65c9
         int r;
dd65c9
-        bool nosupp = false;
dd65c9
 
dd65c9
-        /* We are not actually interested in the file handles, but
dd65c9
-         * name_to_handle_at() also passes us the mount ID, hence use
dd65c9
-         * it but throw the handle away */
dd65c9
+        assert(fd >= 0);
dd65c9
+        assert(filename);
dd65c9
 
dd65c9
-        if (path_equal(t, "/"))
dd65c9
-                return 1;
dd65c9
-
dd65c9
-        r = name_to_handle_at(AT_FDCWD, t, &h.handle, &mount_id, allow_symlink ? AT_SYMLINK_FOLLOW : 0);
dd65c9
+        /* First we will try the name_to_handle_at() syscall, which
dd65c9
+         * tells us the mount id and an opaque file "handle". It is
dd65c9
+         * not supported everywhere though (kernel compile-time
dd65c9
+         * option, not all file systems are hooked up). If it works
dd65c9
+         * the mount id is usually good enough to tell us whether
dd65c9
+         * something is a mount point.
dd65c9
+         *
dd65c9
+         * If that didn't work we will try to read the mount id from
dd65c9
+         * /proc/self/fdinfo/<fd>. This is almost as good as
dd65c9
+         * name_to_handle_at(), however, does not return the
dd65c9
+         * opaque file handle. The opaque file handle is pretty useful
dd65c9
+         * to detect the root directory, which we should always
dd65c9
+         * consider a mount point. Hence we use this only as
dd65c9
+         * fallback. Exporting the mnt_id in fdinfo is a pretty recent
dd65c9
+         * kernel addition.
dd65c9
+         *
dd65c9
+         * As last fallback we do traditional fstat() based st_dev
dd65c9
+         * comparisons. This is how things were traditionally done,
dd65c9
+         * but unionfs breaks this since it exposes file
dd65c9
+         * systems with a variety of st_dev reported. Also, btrfs
dd65c9
+         * subvolumes have different st_dev, even though they aren't
dd65c9
+         * real mounts of their own. */
dd65c9
+
dd65c9
+        r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
dd65c9
         if (r < 0) {
dd65c9
-                if (errno == ENOSYS)
dd65c9
-                        /* This kernel does not support name_to_handle_at()
dd65c9
-                         * fall back to the traditional stat() logic. */
dd65c9
-                        goto fallback;
dd65c9
+                if (IN_SET(errno, ENOSYS, EACCES, EPERM))
dd65c9
+                        /* This kernel does not support name_to_handle_at() at all, or the syscall was blocked (maybe
dd65c9
+                         * through seccomp, because we are running inside of a container?): fall back to simpler
dd65c9
+                         * logic. */
dd65c9
+                        goto fallback_fdinfo;
dd65c9
                 else if (errno == EOPNOTSUPP)
dd65c9
                         /* This kernel or file system does not support
dd65c9
-                         * name_to_handle_at(), hence fallback to the
dd65c9
+                         * name_to_handle_at(), hence let's see if the
dd65c9
+                         * upper fs supports it (in which case it is a
dd65c9
+                         * mount point), otherwise fallback to the
dd65c9
                          * traditional stat() logic */
dd65c9
                         nosupp = true;
dd65c9
-                else if (errno == ENOENT)
dd65c9
-                        return 0;
dd65c9
                 else
dd65c9
                         return -errno;
dd65c9
         }
dd65c9
 
dd65c9
-        r = path_get_parent(t, &parent);
dd65c9
-        if (r < 0)
dd65c9
-                return r;
dd65c9
-
dd65c9
-        h.handle.handle_bytes = MAX_HANDLE_SZ;
dd65c9
-        r = name_to_handle_at(AT_FDCWD, parent, &h.handle, &mount_id_parent, AT_SYMLINK_FOLLOW);
dd65c9
-        if (r < 0)
dd65c9
-                if (errno == EOPNOTSUPP)
dd65c9
+        r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
dd65c9
+        if (r < 0) {
dd65c9
+                if (errno == EOPNOTSUPP) {
dd65c9
                         if (nosupp)
dd65c9
                                 /* Neither parent nor child do name_to_handle_at()?
dd65c9
                                    We have no choice but to fall back. */
dd65c9
-                                goto fallback;
dd65c9
+                                goto fallback_fdinfo;
dd65c9
                         else
dd65c9
-                                /* The parent can't do name_to_handle_at() but
dd65c9
-                                 * the directory we are interested in can?
dd65c9
-                                 * Or the other way around?
dd65c9
+                                /* The parent can't do name_to_handle_at() but the
dd65c9
+                                 * directory we are interested in can?
dd65c9
                                  * If so, it must be a mount point. */
dd65c9
                                 return 1;
dd65c9
-                else
dd65c9
+                } else
dd65c9
                         return -errno;
dd65c9
-        else
dd65c9
-                return mount_id != mount_id_parent;
dd65c9
+        }
dd65c9
 
dd65c9
-fallback:
dd65c9
-        if (allow_symlink)
dd65c9
-                r = stat(t, &a);
dd65c9
-        else
dd65c9
-                r = lstat(t, &a);
dd65c9
+        /* The parent can do name_to_handle_at() but the
dd65c9
+         * directory we are interested in can't? If so, it
dd65c9
+         * must be a mount point. */
dd65c9
+        if (nosupp)
dd65c9
+                return 1;
dd65c9
 
dd65c9
-        if (r < 0) {
dd65c9
-                if (errno == ENOENT)
dd65c9
-                        return 0;
dd65c9
+        /* If the file handle for the directory we are
dd65c9
+         * interested in and its parent are identical, we
dd65c9
+         * assume this is the root directory, which is a mount
dd65c9
+         * point. */
dd65c9
 
dd65c9
-                return -errno;
dd65c9
-        }
dd65c9
+        if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
dd65c9
+            h.handle.handle_type == h_parent.handle.handle_type &&
dd65c9
+            memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
dd65c9
+                return 1;
dd65c9
 
dd65c9
-        free(parent);
dd65c9
-        parent = NULL;
dd65c9
+        return mount_id != mount_id_parent;
dd65c9
 
dd65c9
-        r = path_get_parent(t, &parent);
dd65c9
+fallback_fdinfo:
dd65c9
+        r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
dd65c9
+        if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
dd65c9
+                goto fallback_fstat;
dd65c9
         if (r < 0)
dd65c9
                 return r;
dd65c9
 
dd65c9
-        r = stat(parent, &b);
dd65c9
+        r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
dd65c9
         if (r < 0)
dd65c9
+                return r;
dd65c9
+
dd65c9
+        if (mount_id != mount_id_parent)
dd65c9
+                return 1;
dd65c9
+
dd65c9
+        /* Hmm, so, the mount ids are the same. This leaves one
dd65c9
+         * special case though for the root file system. For that,
dd65c9
+         * let's see if the parent directory has the same inode as we
dd65c9
+         * are interested in. Hence, let's also do fstat() checks now,
dd65c9
+         * too, but avoid the st_dev comparisons, since they aren't
dd65c9
+         * that useful on unionfs mounts. */
dd65c9
+        check_st_dev = false;
dd65c9
+
dd65c9
+fallback_fstat:
dd65c9
+        /* yay for fstatat() taking a different set of flags than the other
dd65c9
+         * _at() above */
dd65c9
+        if (flags & AT_SYMLINK_FOLLOW)
dd65c9
+                flags &= ~AT_SYMLINK_FOLLOW;
dd65c9
+        else
dd65c9
+                flags |= AT_SYMLINK_NOFOLLOW;
dd65c9
+        if (fstatat(fd, filename, &a, flags) < 0)
dd65c9
+                return -errno;
dd65c9
+
dd65c9
+        if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
dd65c9
+                return -errno;
dd65c9
+
dd65c9
+        /* A directory with same device and inode as its parent? Must
dd65c9
+         * be the root directory */
dd65c9
+        if (a.st_dev == b.st_dev &&
dd65c9
+            a.st_ino == b.st_ino)
dd65c9
+                return 1;
dd65c9
+
dd65c9
+        return check_st_dev && (a.st_dev != b.st_dev);
dd65c9
+}
dd65c9
+
dd65c9
+
dd65c9
+
dd65c9
+int path_is_mount_point(const char *t, bool allow_symlink) {
dd65c9
+        _cleanup_free_ char *canonical = NULL, *parent = NULL;
dd65c9
+        _cleanup_close_ int fd = -1;
dd65c9
+        int flags = allow_symlink ? AT_SYMLINK_FOLLOW : 0;
dd65c9
+
dd65c9
+        assert(t);
dd65c9
+
dd65c9
+        if (path_equal(t, "/"))
dd65c9
+                return 1;
dd65c9
+
dd65c9
+        /* we need to resolve symlinks manually, we can't just rely on
dd65c9
+         * fd_is_mount_point() to do that for us; if we have a structure like
dd65c9
+         * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
dd65c9
+         * look at needs to be /usr, not /. */
dd65c9
+        if (flags & AT_SYMLINK_FOLLOW) {
dd65c9
+                canonical = canonicalize_file_name(t);
dd65c9
+                if (!canonical) {
dd65c9
+                        if (errno == ENOENT)
dd65c9
+                                return 0;
dd65c9
+                        else
dd65c9
+                                return -errno;
dd65c9
+                }
dd65c9
+                t = canonical;
dd65c9
+        }
dd65c9
+
dd65c9
+        parent = dirname_malloc(t);
dd65c9
+        if (!parent)
dd65c9
+                return -ENOMEM;
dd65c9
+
dd65c9
+        fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
dd65c9
+        if (fd < 0)
dd65c9
                 return -errno;
dd65c9
 
dd65c9
-        return a.st_dev != b.st_dev;
dd65c9
+        return fd_is_mount_point(fd, basename(t), flags);
dd65c9
 }
dd65c9
 
dd65c9
 int path_is_read_only_fs(const char *path) {
dd65c9
diff --git a/src/shared/path-util.h b/src/shared/path-util.h
dd65c9
index 71bb740e9..34c016229 100644
dd65c9
--- a/src/shared/path-util.h
dd65c9
+++ b/src/shared/path-util.h
dd65c9
@@ -53,6 +53,7 @@ char** path_strv_make_absolute_cwd(char **l);
dd65c9
 char** path_strv_resolve(char **l, const char *prefix);
dd65c9
 char** path_strv_resolve_uniq(char **l, const char *prefix);
dd65c9
 
dd65c9
+int fd_is_mount_point(int fd, const char *filename, int flags);
dd65c9
 int path_is_mount_point(const char *path, bool allow_symlink);
dd65c9
 int path_is_read_only_fs(const char *path);
dd65c9
 int path_is_os_tree(const char *path);
dd65c9
diff --git a/src/test/test-path-util.c b/src/test/test-path-util.c
dd65c9
index 6396fcb39..a4fec07e7 100644
dd65c9
--- a/src/test/test-path-util.c
dd65c9
+++ b/src/test/test-path-util.c
dd65c9
@@ -21,6 +21,7 @@
dd65c9
 
dd65c9
 #include <stdio.h>
dd65c9
 #include <unistd.h>
dd65c9
+#include <sys/mount.h>
dd65c9
 
dd65c9
 #include "path-util.h"
dd65c9
 #include "util.h"
dd65c9
@@ -99,6 +100,66 @@ static void test_path(void) {
dd65c9
         }
dd65c9
 }
dd65c9
 
dd65c9
+static void test_path_is_mount_point(void) {
dd65c9
+        int fd, rt, rf, rlt, rlf;
dd65c9
+        char tmp_dir[] = "/tmp/test-path-is-mount-point-XXXXXX";
dd65c9
+        _cleanup_free_ char *file1 = NULL, *file2 = NULL, *link1 = NULL, *link2 = NULL;
dd65c9
+
dd65c9
+        assert_se(path_is_mount_point("/", true) > 0);
dd65c9
+        assert_se(path_is_mount_point("/", false) > 0);
dd65c9
+
dd65c9
+        assert_se(path_is_mount_point("/proc", true) > 0);
dd65c9
+        assert_se(path_is_mount_point("/proc", false) > 0);
dd65c9
+
dd65c9
+        assert_se(path_is_mount_point("/proc/1", true) == 0);
dd65c9
+        assert_se(path_is_mount_point("/proc/1", false) == 0);
dd65c9
+
dd65c9
+        assert_se(path_is_mount_point("/sys", true) > 0);
dd65c9
+        assert_se(path_is_mount_point("/sys", false) > 0);
dd65c9
+
dd65c9
+        /* file mountpoints */
dd65c9
+        assert_se(mkdtemp(tmp_dir) != NULL);
dd65c9
+        file1 = path_join(NULL, tmp_dir, "file1");
dd65c9
+        assert_se(file1);
dd65c9
+        file2 = path_join(NULL, tmp_dir, "file2");
dd65c9
+        assert_se(file2);
dd65c9
+        fd = open(file1, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
dd65c9
+        assert_se(fd > 0);
dd65c9
+        close(fd);
dd65c9
+        fd = open(file2, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
dd65c9
+        assert_se(fd > 0);
dd65c9
+        close(fd);
dd65c9
+        link1 = path_join(NULL, tmp_dir, "link1");
dd65c9
+        assert_se(link1);
dd65c9
+        assert_se(symlink("file1", link1) == 0);
dd65c9
+        link2 = path_join(NULL, tmp_dir, "link2");
dd65c9
+        assert_se(link1);
dd65c9
+        assert_se(symlink("file2", link2) == 0);
dd65c9
+
dd65c9
+        assert_se(path_is_mount_point(file1, true) == 0);
dd65c9
+        assert_se(path_is_mount_point(file1, false) == 0);
dd65c9
+        assert_se(path_is_mount_point(link1, true) == 0);
dd65c9
+        assert_se(path_is_mount_point(link1, false) == 0);
dd65c9
+
dd65c9
+        /* this test will only work as root */
dd65c9
+        if (mount(file1, file2, NULL, MS_BIND, NULL) >= 0) {
dd65c9
+                rf = path_is_mount_point(file2, false);
dd65c9
+                rt = path_is_mount_point(file2, true);
dd65c9
+                rlf = path_is_mount_point(link2, false);
dd65c9
+                rlt = path_is_mount_point(link2, true);
dd65c9
+
dd65c9
+                assert_se(umount(file2) == 0);
dd65c9
+
dd65c9
+                assert_se(rf == 1);
dd65c9
+                assert_se(rt == 1);
dd65c9
+                assert_se(rlf == 0);
dd65c9
+                assert_se(rlt == 1);
dd65c9
+        } else
dd65c9
+                printf("Skipping bind mount file test: %m\n");
dd65c9
+
dd65c9
+        assert_se(rm_rf(tmp_dir, false, true, false) == 0);
dd65c9
+}
dd65c9
+
dd65c9
 static void test_find_binary(const char *self, bool local) {
dd65c9
         char *p;
dd65c9
 
dd65c9
@@ -288,6 +349,7 @@ int main(int argc, char **argv) {
dd65c9
         test_make_relative();
dd65c9
         test_strv_resolve();
dd65c9
         test_path_startswith();
dd65c9
+        test_path_is_mount_point();
dd65c9
 
dd65c9
         return 0;
dd65c9
 }