ddf19c
From 8721796f22a8a61d82974088e542377ee6db209e Mon Sep 17 00:00:00 2001
ddf19c
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
ddf19c
Date: Tue, 3 Mar 2020 18:43:14 +0000
ddf19c
Subject: [PATCH 18/18] virtiofsd: Fix xattr operations
ddf19c
MIME-Version: 1.0
ddf19c
Content-Type: text/plain; charset=UTF-8
ddf19c
Content-Transfer-Encoding: 8bit
ddf19c
ddf19c
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
ddf19c
Message-id: <20200303184314.155564-8-dgilbert@redhat.com>
ddf19c
Patchwork-id: 94123
ddf19c
O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 7/7] virtiofsd: Fix xattr operations
ddf19c
Bugzilla: 1797064
ddf19c
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
ddf19c
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
ddf19c
RH-Acked-by: Ján Tomko <jtomko@redhat.com>
ddf19c
ddf19c
From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
ddf19c
ddf19c
Current virtiofsd has problems about xattr operations and
ddf19c
they does not work properly for directory/symlink/special file.
ddf19c
ddf19c
The fundamental cause is that virtiofsd uses openat() + f...xattr()
ddf19c
systemcalls for xattr operation but we should not open symlink/special
ddf19c
file in the daemon. Therefore the function is restricted.
ddf19c
ddf19c
Fix this problem by:
ddf19c
 1. during setup of each thread, call unshare(CLONE_FS)
ddf19c
 2. in xattr operations (i.e. lo_getxattr), if inode is not a regular
ddf19c
    file or directory, use fchdir(proc_loot_fd) + ...xattr() +
ddf19c
    fchdir(root.fd) instead of openat() + f...xattr()
ddf19c
ddf19c
    (Note: for a regular file/directory openat() + f...xattr()
ddf19c
     is still used for performance reason)
ddf19c
ddf19c
With this patch, xfstests generic/062 passes on virtiofs.
ddf19c
ddf19c
This fix is suggested by Miklos Szeredi and Stefan Hajnoczi.
ddf19c
The original discussion can be found here:
ddf19c
  https://www.redhat.com/archives/virtio-fs/2019-October/msg00046.html
ddf19c
ddf19c
Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
ddf19c
Message-Id: <20200227055927.24566-3-misono.tomohiro@jp.fujitsu.com>
ddf19c
Acked-by: Vivek Goyal <vgoyal@redhat.com>
ddf19c
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
ddf19c
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
ddf19c
(cherry picked from commit bdfd66788349acc43cd3f1298718ad491663cfcc)
ddf19c
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ddf19c
---
ddf19c
 tools/virtiofsd/fuse_virtio.c    |  13 +++++
ddf19c
 tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++------------------
ddf19c
 tools/virtiofsd/seccomp.c        |   6 +++
ddf19c
 3 files changed, 77 insertions(+), 47 deletions(-)
ddf19c
ddf19c
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
ddf19c
index dd1c605..3b6d16a 100644
ddf19c
--- a/tools/virtiofsd/fuse_virtio.c
ddf19c
+++ b/tools/virtiofsd/fuse_virtio.c
ddf19c
@@ -426,6 +426,8 @@ err:
ddf19c
     return ret;
ddf19c
 }
ddf19c
 
ddf19c
+static __thread bool clone_fs_called;
ddf19c
+
ddf19c
 /* Process one FVRequest in a thread pool */
ddf19c
 static void fv_queue_worker(gpointer data, gpointer user_data)
ddf19c
 {
ddf19c
@@ -441,6 +443,17 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
ddf19c
 
ddf19c
     assert(se->bufsize > sizeof(struct fuse_in_header));
ddf19c
 
ddf19c
+    if (!clone_fs_called) {
ddf19c
+        int ret;
ddf19c
+
ddf19c
+        /* unshare FS for xattr operation */
ddf19c
+        ret = unshare(CLONE_FS);
ddf19c
+        /* should not fail */
ddf19c
+        assert(ret == 0);
ddf19c
+
ddf19c
+        clone_fs_called = true;
ddf19c
+    }
ddf19c
+
ddf19c
     /*
ddf19c
      * An element contains one request and the space to send our response
ddf19c
      * They're spread over multiple descriptors in a scatter/gather set
ddf19c
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
ddf19c
index 50c7273..9cba3f1 100644
ddf19c
--- a/tools/virtiofsd/passthrough_ll.c
ddf19c
+++ b/tools/virtiofsd/passthrough_ll.c
ddf19c
@@ -123,7 +123,7 @@ struct lo_inode {
ddf19c
     pthread_mutex_t plock_mutex;
ddf19c
     GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
ddf19c
 
ddf19c
-    bool is_symlink;
ddf19c
+    mode_t filetype;
ddf19c
 };
ddf19c
 
ddf19c
 struct lo_cred {
ddf19c
@@ -695,7 +695,7 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
ddf19c
     struct lo_inode *parent;
ddf19c
     char path[PATH_MAX];
ddf19c
 
ddf19c
-    if (inode->is_symlink) {
ddf19c
+    if (S_ISLNK(inode->filetype)) {
ddf19c
         res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
ddf19c
         if (res == -1 && errno == EINVAL) {
ddf19c
             /* Sorry, no race free way to set times on symlink. */
ddf19c
@@ -929,7 +929,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
ddf19c
             goto out_err;
ddf19c
         }
ddf19c
 
ddf19c
-        inode->is_symlink = S_ISLNK(e->attr.st_mode);
ddf19c
+        /* cache only filetype */
ddf19c
+        inode->filetype = (e->attr.st_mode & S_IFMT);
ddf19c
 
ddf19c
         /*
ddf19c
          * One for the caller and one for nlookup (released in
ddf19c
@@ -1139,7 +1140,7 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
ddf19c
     struct lo_inode *parent;
ddf19c
     char path[PATH_MAX];
ddf19c
 
ddf19c
-    if (inode->is_symlink) {
ddf19c
+    if (S_ISLNK(inode->filetype)) {
ddf19c
         res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
ddf19c
         if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
ddf19c
             /* Sorry, no race free way to hard-link a symlink. */
ddf19c
@@ -2193,12 +2194,6 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
ddf19c
     fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n",
ddf19c
              ino, name, size);
ddf19c
 
ddf19c
-    if (inode->is_symlink) {
ddf19c
-        /* Sorry, no race free way to getxattr on symlink. */
ddf19c
-        saverr = EPERM;
ddf19c
-        goto out;
ddf19c
-    }
ddf19c
-
ddf19c
     if (size) {
ddf19c
         value = malloc(size);
ddf19c
         if (!value) {
ddf19c
@@ -2207,12 +2202,25 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
ddf19c
     }
ddf19c
 
ddf19c
     sprintf(procname, "%i", inode->fd);
ddf19c
-    fd = openat(lo->proc_self_fd, procname, O_RDONLY);
ddf19c
-    if (fd < 0) {
ddf19c
-        goto out_err;
ddf19c
+    /*
ddf19c
+     * It is not safe to open() non-regular/non-dir files in file server
ddf19c
+     * unless O_PATH is used, so use that method for regular files/dir
ddf19c
+     * only (as it seems giving less performance overhead).
ddf19c
+     * Otherwise, call fchdir() to avoid open().
ddf19c
+     */
ddf19c
+    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
ddf19c
+        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
ddf19c
+        if (fd < 0) {
ddf19c
+            goto out_err;
ddf19c
+        }
ddf19c
+        ret = fgetxattr(fd, name, value, size);
ddf19c
+    } else {
ddf19c
+        /* fchdir should not fail here */
ddf19c
+        assert(fchdir(lo->proc_self_fd) == 0);
ddf19c
+        ret = getxattr(procname, name, value, size);
ddf19c
+        assert(fchdir(lo->root.fd) == 0);
ddf19c
     }
ddf19c
 
ddf19c
-    ret = fgetxattr(fd, name, value, size);
ddf19c
     if (ret == -1) {
ddf19c
         goto out_err;
ddf19c
     }
ddf19c
@@ -2266,12 +2274,6 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
ddf19c
     fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino,
ddf19c
              size);
ddf19c
 
ddf19c
-    if (inode->is_symlink) {
ddf19c
-        /* Sorry, no race free way to listxattr on symlink. */
ddf19c
-        saverr = EPERM;
ddf19c
-        goto out;
ddf19c
-    }
ddf19c
-
ddf19c
     if (size) {
ddf19c
         value = malloc(size);
ddf19c
         if (!value) {
ddf19c
@@ -2280,12 +2282,19 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
ddf19c
     }
ddf19c
 
ddf19c
     sprintf(procname, "%i", inode->fd);
ddf19c
-    fd = openat(lo->proc_self_fd, procname, O_RDONLY);
ddf19c
-    if (fd < 0) {
ddf19c
-        goto out_err;
ddf19c
+    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
ddf19c
+        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
ddf19c
+        if (fd < 0) {
ddf19c
+            goto out_err;
ddf19c
+        }
ddf19c
+        ret = flistxattr(fd, value, size);
ddf19c
+    } else {
ddf19c
+        /* fchdir should not fail here */
ddf19c
+        assert(fchdir(lo->proc_self_fd) == 0);
ddf19c
+        ret = listxattr(procname, value, size);
ddf19c
+        assert(fchdir(lo->root.fd) == 0);
ddf19c
     }
ddf19c
 
ddf19c
-    ret = flistxattr(fd, value, size);
ddf19c
     if (ret == -1) {
ddf19c
         goto out_err;
ddf19c
     }
ddf19c
@@ -2339,20 +2348,21 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
ddf19c
     fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64
ddf19c
              ", name=%s value=%s size=%zd)\n", ino, name, value, size);
ddf19c
 
ddf19c
-    if (inode->is_symlink) {
ddf19c
-        /* Sorry, no race free way to setxattr on symlink. */
ddf19c
-        saverr = EPERM;
ddf19c
-        goto out;
ddf19c
-    }
ddf19c
-
ddf19c
     sprintf(procname, "%i", inode->fd);
ddf19c
-    fd = openat(lo->proc_self_fd, procname, O_RDWR);
ddf19c
-    if (fd < 0) {
ddf19c
-        saverr = errno;
ddf19c
-        goto out;
ddf19c
+    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
ddf19c
+        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
ddf19c
+        if (fd < 0) {
ddf19c
+            saverr = errno;
ddf19c
+            goto out;
ddf19c
+        }
ddf19c
+        ret = fsetxattr(fd, name, value, size, flags);
ddf19c
+    } else {
ddf19c
+        /* fchdir should not fail here */
ddf19c
+        assert(fchdir(lo->proc_self_fd) == 0);
ddf19c
+        ret = setxattr(procname, name, value, size, flags);
ddf19c
+        assert(fchdir(lo->root.fd) == 0);
ddf19c
     }
ddf19c
 
ddf19c
-    ret = fsetxattr(fd, name, value, size, flags);
ddf19c
     saverr = ret == -1 ? errno : 0;
ddf19c
 
ddf19c
 out:
ddf19c
@@ -2387,20 +2397,21 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name)
ddf19c
     fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino,
ddf19c
              name);
ddf19c
 
ddf19c
-    if (inode->is_symlink) {
ddf19c
-        /* Sorry, no race free way to setxattr on symlink. */
ddf19c
-        saverr = EPERM;
ddf19c
-        goto out;
ddf19c
-    }
ddf19c
-
ddf19c
     sprintf(procname, "%i", inode->fd);
ddf19c
-    fd = openat(lo->proc_self_fd, procname, O_RDWR);
ddf19c
-    if (fd < 0) {
ddf19c
-        saverr = errno;
ddf19c
-        goto out;
ddf19c
+    if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
ddf19c
+        fd = openat(lo->proc_self_fd, procname, O_RDONLY);
ddf19c
+        if (fd < 0) {
ddf19c
+            saverr = errno;
ddf19c
+            goto out;
ddf19c
+        }
ddf19c
+        ret = fremovexattr(fd, name);
ddf19c
+    } else {
ddf19c
+        /* fchdir should not fail here */
ddf19c
+        assert(fchdir(lo->proc_self_fd) == 0);
ddf19c
+        ret = removexattr(procname, name);
ddf19c
+        assert(fchdir(lo->root.fd) == 0);
ddf19c
     }
ddf19c
 
ddf19c
-    ret = fremovexattr(fd, name);
ddf19c
     saverr = ret == -1 ? errno : 0;
ddf19c
 
ddf19c
 out:
ddf19c
@@ -2800,7 +2811,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
ddf19c
         exit(1);
ddf19c
     }
ddf19c
 
ddf19c
-    root->is_symlink = false;
ddf19c
+    root->filetype = S_IFDIR;
ddf19c
     root->fd = fd;
ddf19c
     root->key.ino = stat.st_ino;
ddf19c
     root->key.dev = stat.st_dev;
ddf19c
diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c
ddf19c
index 2d9d4a7..bd9e7b0 100644
ddf19c
--- a/tools/virtiofsd/seccomp.c
ddf19c
+++ b/tools/virtiofsd/seccomp.c
ddf19c
@@ -41,6 +41,7 @@ static const int syscall_whitelist[] = {
ddf19c
     SCMP_SYS(exit),
ddf19c
     SCMP_SYS(exit_group),
ddf19c
     SCMP_SYS(fallocate),
ddf19c
+    SCMP_SYS(fchdir),
ddf19c
     SCMP_SYS(fchmodat),
ddf19c
     SCMP_SYS(fchownat),
ddf19c
     SCMP_SYS(fcntl),
ddf19c
@@ -62,7 +63,9 @@ static const int syscall_whitelist[] = {
ddf19c
     SCMP_SYS(getpid),
ddf19c
     SCMP_SYS(gettid),
ddf19c
     SCMP_SYS(gettimeofday),
ddf19c
+    SCMP_SYS(getxattr),
ddf19c
     SCMP_SYS(linkat),
ddf19c
+    SCMP_SYS(listxattr),
ddf19c
     SCMP_SYS(lseek),
ddf19c
     SCMP_SYS(madvise),
ddf19c
     SCMP_SYS(mkdirat),
ddf19c
@@ -85,6 +88,7 @@ static const int syscall_whitelist[] = {
ddf19c
     SCMP_SYS(recvmsg),
ddf19c
     SCMP_SYS(renameat),
ddf19c
     SCMP_SYS(renameat2),
ddf19c
+    SCMP_SYS(removexattr),
ddf19c
     SCMP_SYS(rt_sigaction),
ddf19c
     SCMP_SYS(rt_sigprocmask),
ddf19c
     SCMP_SYS(rt_sigreturn),
ddf19c
@@ -98,10 +102,12 @@ static const int syscall_whitelist[] = {
ddf19c
     SCMP_SYS(setresuid32),
ddf19c
 #endif
ddf19c
     SCMP_SYS(set_robust_list),
ddf19c
+    SCMP_SYS(setxattr),
ddf19c
     SCMP_SYS(symlinkat),
ddf19c
     SCMP_SYS(time), /* Rarely needed, except on static builds */
ddf19c
     SCMP_SYS(tgkill),
ddf19c
     SCMP_SYS(unlinkat),
ddf19c
+    SCMP_SYS(unshare),
ddf19c
     SCMP_SYS(utimensat),
ddf19c
     SCMP_SYS(write),
ddf19c
     SCMP_SYS(writev),
ddf19c
-- 
ddf19c
1.8.3.1
ddf19c