923a60
From e5ac7ba7a16445f3ad23d9931979c20214eae913 Mon Sep 17 00:00:00 2001
923a60
From: Jan Synacek <jsynacek@redhat.com>
923a60
Date: Thu, 14 Sep 2017 16:27:08 +0200
923a60
Subject: [PATCH] path-util: make use of "mnt_id" field exported in
923a60
 /proc/self/fdinfo/<fd>
923a60
923a60
This commit is not a backport of a specific commit. It includes parts of
923a60
several upstream commits (3f72b427b44f39a1aec6806dad6f6b57103ae9ed,
923a60
5d409034017e9f9f8c4392157d95511fc2e05d87 and others).
923a60
923a60
The main goal was to bring path_is_mount_point() up to date, which meant
923a60
introducing fd_fdinfo_mnt_id() and fd_is_mount_point(). These were
923a60
needed mainly because we need to determine mount points based on
923a60
/proc/self/fdinfo/<fd> in containers. Also, there are more places in the
923a60
code where checks for mount points are performed, which would benefit from
923a60
this fix as well. Additionally, corresponding tests has been added.
923a60
923a60
Resolves: #1472439
923a60
---
923a60
 src/nspawn/nspawn.c       |   2 +-
923a60
 src/shared/path-util.c    | 219 +++++++++++++++++++++++++++++---------
923a60
 src/shared/path-util.h    |   1 +
923a60
 src/test/test-path-util.c |  62 +++++++++++
923a60
 4 files changed, 235 insertions(+), 49 deletions(-)
923a60
923a60
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
923a60
index ea365b3f9b..ee2e1832f1 100644
923a60
--- a/src/nspawn/nspawn.c
923a60
+++ b/src/nspawn/nspawn.c
923a60
@@ -990,7 +990,7 @@ static int mount_cgroup_hierarchy(const char *dest, const char *controller, cons
923a60
         to = strjoina(dest, "/sys/fs/cgroup/", hierarchy);
923a60
 
923a60
         r = path_is_mount_point(to, false);
923a60
-        if (r < 0)
923a60
+        if (r < 0 && r != -ENOENT)
923a60
                 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
923a60
         if (r > 0)
923a60
                 return 0;
923a60
diff --git a/src/shared/path-util.c b/src/shared/path-util.c
923a60
index 1181ffb9d4..5d4de9ec4d 100644
923a60
--- a/src/shared/path-util.c
923a60
+++ b/src/shared/path-util.c
923a60
@@ -36,6 +36,7 @@
923a60
 #include "strv.h"
923a60
 #include "path-util.h"
923a60
 #include "missing.h"
923a60
+#include "fileio.h"
923a60
 
923a60
 bool path_is_absolute(const char *p) {
923a60
         return p[0] == '/';
923a60
@@ -473,87 +474,209 @@ char* path_join(const char *root, const char *path, const char *rest) {
923a60
                                NULL);
923a60
 }
923a60
 
923a60
-int path_is_mount_point(const char *t, bool allow_symlink) {
923a60
+static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
923a60
+        char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
923a60
+        _cleanup_free_ char *fdinfo = NULL;
923a60
+        _cleanup_close_ int subfd = -1;
923a60
+        char *p;
923a60
+        int r;
923a60
+
923a60
+        if ((flags & AT_EMPTY_PATH) && isempty(filename))
923a60
+                xsprintf(path, "/proc/self/fdinfo/%i", fd);
923a60
+        else {
923a60
+                subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
923a60
+                if (subfd < 0)
923a60
+                        return -errno;
923a60
 
923a60
-        union file_handle_union h = FILE_HANDLE_INIT;
923a60
+                xsprintf(path, "/proc/self/fdinfo/%i", subfd);
923a60
+        }
923a60
+
923a60
+        r = read_full_file(path, &fdinfo, NULL);
923a60
+        if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
923a60
+                return -EOPNOTSUPP;
923a60
+        if (r < 0)
923a60
+                return -errno;
923a60
+
923a60
+        p = startswith(fdinfo, "mnt_id:");
923a60
+        if (!p) {
923a60
+                p = strstr(fdinfo, "\nmnt_id:");
923a60
+                if (!p) /* The mnt_id field is a relatively new addition */
923a60
+                        return -EOPNOTSUPP;
923a60
+
923a60
+                p += 8;
923a60
+        }
923a60
+
923a60
+        p += strspn(p, WHITESPACE);
923a60
+        p[strcspn(p, WHITESPACE)] = 0;
923a60
+
923a60
+        return safe_atoi(p, mnt_id);
923a60
+}
923a60
+
923a60
+int fd_is_mount_point(int fd, const char *filename, int flags) {
923a60
+        union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
923a60
         int mount_id = -1, mount_id_parent = -1;
923a60
-        _cleanup_free_ char *parent = NULL;
923a60
+        bool nosupp = false, check_st_dev = true;
923a60
         struct stat a, b;
923a60
         int r;
923a60
-        bool nosupp = false;
923a60
 
923a60
-        /* We are not actually interested in the file handles, but
923a60
-         * name_to_handle_at() also passes us the mount ID, hence use
923a60
-         * it but throw the handle away */
923a60
+        assert(fd >= 0);
923a60
+        assert(filename);
923a60
 
923a60
-        if (path_equal(t, "/"))
923a60
-                return 1;
923a60
-
923a60
-        r = name_to_handle_at(AT_FDCWD, t, &h.handle, &mount_id, allow_symlink ? AT_SYMLINK_FOLLOW : 0);
923a60
+        /* First we will try the name_to_handle_at() syscall, which
923a60
+         * tells us the mount id and an opaque file "handle". It is
923a60
+         * not supported everywhere though (kernel compile-time
923a60
+         * option, not all file systems are hooked up). If it works
923a60
+         * the mount id is usually good enough to tell us whether
923a60
+         * something is a mount point.
923a60
+         *
923a60
+         * If that didn't work we will try to read the mount id from
923a60
+         * /proc/self/fdinfo/<fd>. This is almost as good as
923a60
+         * name_to_handle_at(), however, does not return the
923a60
+         * opaque file handle. The opaque file handle is pretty useful
923a60
+         * to detect the root directory, which we should always
923a60
+         * consider a mount point. Hence we use this only as
923a60
+         * fallback. Exporting the mnt_id in fdinfo is a pretty recent
923a60
+         * kernel addition.
923a60
+         *
923a60
+         * As last fallback we do traditional fstat() based st_dev
923a60
+         * comparisons. This is how things were traditionally done,
923a60
+         * but unionfs breaks this since it exposes file
923a60
+         * systems with a variety of st_dev reported. Also, btrfs
923a60
+         * subvolumes have different st_dev, even though they aren't
923a60
+         * real mounts of their own. */
923a60
+
923a60
+        r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
923a60
         if (r < 0) {
923a60
-                if (errno == ENOSYS)
923a60
-                        /* This kernel does not support name_to_handle_at()
923a60
-                         * fall back to the traditional stat() logic. */
923a60
-                        goto fallback;
923a60
+                if (IN_SET(errno, ENOSYS, EACCES, EPERM))
923a60
+                        /* This kernel does not support name_to_handle_at() at all, or the syscall was blocked (maybe
923a60
+                         * through seccomp, because we are running inside of a container?): fall back to simpler
923a60
+                         * logic. */
923a60
+                        goto fallback_fdinfo;
923a60
                 else if (errno == EOPNOTSUPP)
923a60
                         /* This kernel or file system does not support
923a60
-                         * name_to_handle_at(), hence fallback to the
923a60
+                         * name_to_handle_at(), hence let's see if the
923a60
+                         * upper fs supports it (in which case it is a
923a60
+                         * mount point), otherwise fallback to the
923a60
                          * traditional stat() logic */
923a60
                         nosupp = true;
923a60
-                else if (errno == ENOENT)
923a60
-                        return 0;
923a60
                 else
923a60
                         return -errno;
923a60
         }
923a60
 
923a60
-        r = path_get_parent(t, &parent);
923a60
-        if (r < 0)
923a60
-                return r;
923a60
-
923a60
-        h.handle.handle_bytes = MAX_HANDLE_SZ;
923a60
-        r = name_to_handle_at(AT_FDCWD, parent, &h.handle, &mount_id_parent, AT_SYMLINK_FOLLOW);
923a60
-        if (r < 0)
923a60
-                if (errno == EOPNOTSUPP)
923a60
+        r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
923a60
+        if (r < 0) {
923a60
+                if (errno == EOPNOTSUPP) {
923a60
                         if (nosupp)
923a60
                                 /* Neither parent nor child do name_to_handle_at()?
923a60
                                    We have no choice but to fall back. */
923a60
-                                goto fallback;
923a60
+                                goto fallback_fdinfo;
923a60
                         else
923a60
-                                /* The parent can't do name_to_handle_at() but
923a60
-                                 * the directory we are interested in can?
923a60
-                                 * Or the other way around?
923a60
+                                /* The parent can't do name_to_handle_at() but the
923a60
+                                 * directory we are interested in can?
923a60
                                  * If so, it must be a mount point. */
923a60
                                 return 1;
923a60
-                else
923a60
+                } else
923a60
                         return -errno;
923a60
-        else
923a60
-                return mount_id != mount_id_parent;
923a60
+        }
923a60
 
923a60
-fallback:
923a60
-        if (allow_symlink)
923a60
-                r = stat(t, &a);
923a60
-        else
923a60
-                r = lstat(t, &a);
923a60
+        /* The parent can do name_to_handle_at() but the
923a60
+         * directory we are interested in can't? If so, it
923a60
+         * must be a mount point. */
923a60
+        if (nosupp)
923a60
+                return 1;
923a60
 
923a60
-        if (r < 0) {
923a60
-                if (errno == ENOENT)
923a60
-                        return 0;
923a60
+        /* If the file handle for the directory we are
923a60
+         * interested in and its parent are identical, we
923a60
+         * assume this is the root directory, which is a mount
923a60
+         * point. */
923a60
 
923a60
-                return -errno;
923a60
-        }
923a60
+        if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
923a60
+            h.handle.handle_type == h_parent.handle.handle_type &&
923a60
+            memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
923a60
+                return 1;
923a60
 
923a60
-        free(parent);
923a60
-        parent = NULL;
923a60
+        return mount_id != mount_id_parent;
923a60
 
923a60
-        r = path_get_parent(t, &parent);
923a60
+fallback_fdinfo:
923a60
+        r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
923a60
+        if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
923a60
+                goto fallback_fstat;
923a60
         if (r < 0)
923a60
                 return r;
923a60
 
923a60
-        r = stat(parent, &b);
923a60
+        r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
923a60
         if (r < 0)
923a60
+                return r;
923a60
+
923a60
+        if (mount_id != mount_id_parent)
923a60
+                return 1;
923a60
+
923a60
+        /* Hmm, so, the mount ids are the same. This leaves one
923a60
+         * special case though for the root file system. For that,
923a60
+         * let's see if the parent directory has the same inode as we
923a60
+         * are interested in. Hence, let's also do fstat() checks now,
923a60
+         * too, but avoid the st_dev comparisons, since they aren't
923a60
+         * that useful on unionfs mounts. */
923a60
+        check_st_dev = false;
923a60
+
923a60
+fallback_fstat:
923a60
+        /* yay for fstatat() taking a different set of flags than the other
923a60
+         * _at() above */
923a60
+        if (flags & AT_SYMLINK_FOLLOW)
923a60
+                flags &= ~AT_SYMLINK_FOLLOW;
923a60
+        else
923a60
+                flags |= AT_SYMLINK_NOFOLLOW;
923a60
+        if (fstatat(fd, filename, &a, flags) < 0)
923a60
+                return -errno;
923a60
+
923a60
+        if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
923a60
+                return -errno;
923a60
+
923a60
+        /* A directory with same device and inode as its parent? Must
923a60
+         * be the root directory */
923a60
+        if (a.st_dev == b.st_dev &&
923a60
+            a.st_ino == b.st_ino)
923a60
+                return 1;
923a60
+
923a60
+        return check_st_dev && (a.st_dev != b.st_dev);
923a60
+}
923a60
+
923a60
+
923a60
+
923a60
+int path_is_mount_point(const char *t, bool allow_symlink) {
923a60
+        _cleanup_free_ char *canonical = NULL, *parent = NULL;
923a60
+        _cleanup_close_ int fd = -1;
923a60
+        int flags = allow_symlink ? AT_SYMLINK_FOLLOW : 0;
923a60
+
923a60
+        assert(t);
923a60
+
923a60
+        if (path_equal(t, "/"))
923a60
+                return 1;
923a60
+
923a60
+        /* we need to resolve symlinks manually, we can't just rely on
923a60
+         * fd_is_mount_point() to do that for us; if we have a structure like
923a60
+         * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
923a60
+         * look at needs to be /usr, not /. */
923a60
+        if (flags & AT_SYMLINK_FOLLOW) {
923a60
+                canonical = canonicalize_file_name(t);
923a60
+                if (!canonical) {
923a60
+                        if (errno == ENOENT)
923a60
+                                return 0;
923a60
+                        else
923a60
+                                return -errno;
923a60
+                }
923a60
+                t = canonical;
923a60
+        }
923a60
+
923a60
+        parent = dirname_malloc(t);
923a60
+        if (!parent)
923a60
+                return -ENOMEM;
923a60
+
923a60
+        fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
923a60
+        if (fd < 0)
923a60
                 return -errno;
923a60
 
923a60
-        return a.st_dev != b.st_dev;
923a60
+        return fd_is_mount_point(fd, basename(t), flags);
923a60
 }
923a60
 
923a60
 int path_is_read_only_fs(const char *path) {
923a60
diff --git a/src/shared/path-util.h b/src/shared/path-util.h
923a60
index 71bb740e98..34c016229c 100644
923a60
--- a/src/shared/path-util.h
923a60
+++ b/src/shared/path-util.h
923a60
@@ -53,6 +53,7 @@ char** path_strv_make_absolute_cwd(char **l);
923a60
 char** path_strv_resolve(char **l, const char *prefix);
923a60
 char** path_strv_resolve_uniq(char **l, const char *prefix);
923a60
 
923a60
+int fd_is_mount_point(int fd, const char *filename, int flags);
923a60
 int path_is_mount_point(const char *path, bool allow_symlink);
923a60
 int path_is_read_only_fs(const char *path);
923a60
 int path_is_os_tree(const char *path);
923a60
diff --git a/src/test/test-path-util.c b/src/test/test-path-util.c
923a60
index 6396fcb398..a4fec07e7c 100644
923a60
--- a/src/test/test-path-util.c
923a60
+++ b/src/test/test-path-util.c
923a60
@@ -21,6 +21,7 @@
923a60
 
923a60
 #include <stdio.h>
923a60
 #include <unistd.h>
923a60
+#include <sys/mount.h>
923a60
 
923a60
 #include "path-util.h"
923a60
 #include "util.h"
923a60
@@ -99,6 +100,66 @@ static void test_path(void) {
923a60
         }
923a60
 }
923a60
 
923a60
+static void test_path_is_mount_point(void) {
923a60
+        int fd, rt, rf, rlt, rlf;
923a60
+        char tmp_dir[] = "/tmp/test-path-is-mount-point-XXXXXX";
923a60
+        _cleanup_free_ char *file1 = NULL, *file2 = NULL, *link1 = NULL, *link2 = NULL;
923a60
+
923a60
+        assert_se(path_is_mount_point("/", true) > 0);
923a60
+        assert_se(path_is_mount_point("/", false) > 0);
923a60
+
923a60
+        assert_se(path_is_mount_point("/proc", true) > 0);
923a60
+        assert_se(path_is_mount_point("/proc", false) > 0);
923a60
+
923a60
+        assert_se(path_is_mount_point("/proc/1", true) == 0);
923a60
+        assert_se(path_is_mount_point("/proc/1", false) == 0);
923a60
+
923a60
+        assert_se(path_is_mount_point("/sys", true) > 0);
923a60
+        assert_se(path_is_mount_point("/sys", false) > 0);
923a60
+
923a60
+        /* file mountpoints */
923a60
+        assert_se(mkdtemp(tmp_dir) != NULL);
923a60
+        file1 = path_join(NULL, tmp_dir, "file1");
923a60
+        assert_se(file1);
923a60
+        file2 = path_join(NULL, tmp_dir, "file2");
923a60
+        assert_se(file2);
923a60
+        fd = open(file1, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
923a60
+        assert_se(fd > 0);
923a60
+        close(fd);
923a60
+        fd = open(file2, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0664);
923a60
+        assert_se(fd > 0);
923a60
+        close(fd);
923a60
+        link1 = path_join(NULL, tmp_dir, "link1");
923a60
+        assert_se(link1);
923a60
+        assert_se(symlink("file1", link1) == 0);
923a60
+        link2 = path_join(NULL, tmp_dir, "link2");
923a60
+        assert_se(link1);
923a60
+        assert_se(symlink("file2", link2) == 0);
923a60
+
923a60
+        assert_se(path_is_mount_point(file1, true) == 0);
923a60
+        assert_se(path_is_mount_point(file1, false) == 0);
923a60
+        assert_se(path_is_mount_point(link1, true) == 0);
923a60
+        assert_se(path_is_mount_point(link1, false) == 0);
923a60
+
923a60
+        /* this test will only work as root */
923a60
+        if (mount(file1, file2, NULL, MS_BIND, NULL) >= 0) {
923a60
+                rf = path_is_mount_point(file2, false);
923a60
+                rt = path_is_mount_point(file2, true);
923a60
+                rlf = path_is_mount_point(link2, false);
923a60
+                rlt = path_is_mount_point(link2, true);
923a60
+
923a60
+                assert_se(umount(file2) == 0);
923a60
+
923a60
+                assert_se(rf == 1);
923a60
+                assert_se(rt == 1);
923a60
+                assert_se(rlf == 0);
923a60
+                assert_se(rlt == 1);
923a60
+        } else
923a60
+                printf("Skipping bind mount file test: %m\n");
923a60
+
923a60
+        assert_se(rm_rf(tmp_dir, false, true, false) == 0);
923a60
+}
923a60
+
923a60
 static void test_find_binary(const char *self, bool local) {
923a60
         char *p;
923a60
 
923a60
@@ -288,6 +349,7 @@ int main(int argc, char **argv) {
923a60
         test_make_relative();
923a60
         test_strv_resolve();
923a60
         test_path_startswith();
923a60
+        test_path_is_mount_point();
923a60
 
923a60
         return 0;
923a60
 }