Blame SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch

ddf19c
From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001
ddf19c
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
ddf19c
Date: Mon, 27 Jan 2020 19:01:17 +0100
ddf19c
Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops
ddf19c
MIME-Version: 1.0
ddf19c
Content-Type: text/plain; charset=UTF-8
ddf19c
Content-Transfer-Encoding: 8bit
ddf19c
ddf19c
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
ddf19c
Message-id: <20200127190227.40942-43-dgilbert@redhat.com>
ddf19c
Patchwork-id: 93496
ddf19c
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops
ddf19c
Bugzilla: 1694164
ddf19c
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
ddf19c
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
ddf19c
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
ddf19c
ddf19c
From: Miklos Szeredi <mszeredi@redhat.com>
ddf19c
ddf19c
We have two operations that cannot be done race-free on a symlink in
ddf19c
certain cases: utimes and link.
ddf19c
ddf19c
Add racy fallback for these if the race-free method doesn't work.  We do
ddf19c
our best to avoid races even in this case:
ddf19c
ddf19c
  - get absolute path by reading /proc/self/fd/NN symlink
ddf19c
ddf19c
  - lookup parent directory: after this we are safe against renames in
ddf19c
    ancestors
ddf19c
ddf19c
  - lookup name in parent directory, and verify that we got to the original
ddf19c
    inode,  if not retry the whole thing
ddf19c
ddf19c
Both utimes(2) and link(2) hold i_lock on the inode across the operation,
ddf19c
so a racing rename/delete by this fuse instance is not possible, only from
ddf19c
other entities changing the filesystem.
ddf19c
ddf19c
If the "norace" option is given, then disable the racy fallbacks.
ddf19c
ddf19c
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
ddf19c
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
ddf19c
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
ddf19c
(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce)
ddf19c
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
ddf19c
---
ddf19c
 tools/virtiofsd/helper.c         |   5 +-
ddf19c
 tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++----
ddf19c
 2 files changed, 145 insertions(+), 17 deletions(-)
ddf19c
ddf19c
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
ddf19c
index b8ec5ac..5531425 100644
ddf19c
--- a/tools/virtiofsd/helper.c
ddf19c
+++ b/tools/virtiofsd/helper.c
ddf19c
@@ -142,7 +142,10 @@ void fuse_cmdline_help(void)
ddf19c
            "    --daemonize                run in background\n"
ddf19c
            "    -o max_idle_threads        the maximum number of idle worker "
ddf19c
            "threads\n"
ddf19c
-           "                               allowed (default: 10)\n");
ddf19c
+           "                               allowed (default: 10)\n"
ddf19c
+           "    -o norace                  disable racy fallback\n"
ddf19c
+           "                               default: false\n"
ddf19c
+          );
ddf19c
 }
ddf19c
 
ddf19c
 static int fuse_helper_opt_proc(void *data, const char *arg, int key,
ddf19c
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
ddf19c
index 9815bfa..ac380ef 100644
ddf19c
--- a/tools/virtiofsd/passthrough_ll.c
ddf19c
+++ b/tools/virtiofsd/passthrough_ll.c
ddf19c
@@ -98,6 +98,7 @@ enum {
ddf19c
 struct lo_data {
ddf19c
     pthread_mutex_t mutex;
ddf19c
     int debug;
ddf19c
+    int norace;
ddf19c
     int writeback;
ddf19c
     int flock;
ddf19c
     int xattr;
ddf19c
@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = {
ddf19c
     { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
ddf19c
     { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
ddf19c
     { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
ddf19c
-
ddf19c
+    { "norace", offsetof(struct lo_data, norace), 1 },
ddf19c
     FUSE_OPT_END
ddf19c
 };
ddf19c
 
ddf19c
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
ddf19c
+
ddf19c
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
ddf19c
+
ddf19c
+
ddf19c
 static struct lo_data *lo_data(fuse_req_t req)
ddf19c
 {
ddf19c
     return (struct lo_data *)fuse_req_userdata(req);
ddf19c
@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
ddf19c
     fuse_reply_attr(req, &buf, lo->timeout);
ddf19c
 }
ddf19c
 
ddf19c
-static int utimensat_empty_nofollow(struct lo_inode *inode,
ddf19c
-                                    const struct timespec *tv)
ddf19c
+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
ddf19c
+                              char path[PATH_MAX], struct lo_inode **parent)
ddf19c
 {
ddf19c
-    int res;
ddf19c
     char procname[64];
ddf19c
+    char *last;
ddf19c
+    struct stat stat;
ddf19c
+    struct lo_inode *p;
ddf19c
+    int retries = 2;
ddf19c
+    int res;
ddf19c
+
ddf19c
+retry:
ddf19c
+    sprintf(procname, "/proc/self/fd/%i", inode->fd);
ddf19c
+
ddf19c
+    res = readlink(procname, path, PATH_MAX);
ddf19c
+    if (res < 0) {
ddf19c
+        fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
ddf19c
+        goto fail_noretry;
ddf19c
+    }
ddf19c
+
ddf19c
+    if (res >= PATH_MAX) {
ddf19c
+        fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
ddf19c
+        goto fail_noretry;
ddf19c
+    }
ddf19c
+    path[res] = '\0';
ddf19c
+
ddf19c
+    last = strrchr(path, '/');
ddf19c
+    if (last == NULL) {
ddf19c
+        /* Shouldn't happen */
ddf19c
+        fuse_log(
ddf19c
+            FUSE_LOG_WARNING,
ddf19c
+            "%s: INTERNAL ERROR: bad path read from proc\n", __func__);
ddf19c
+        goto fail_noretry;
ddf19c
+    }
ddf19c
+    if (last == path) {
ddf19c
+        p = &lo->root;
ddf19c
+        pthread_mutex_lock(&lo->mutex);
ddf19c
+        p->refcount++;
ddf19c
+        pthread_mutex_unlock(&lo->mutex);
ddf19c
+    } else {
ddf19c
+        *last = '\0';
ddf19c
+        res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
ddf19c
+        if (res == -1) {
ddf19c
+            if (!retries) {
ddf19c
+                fuse_log(FUSE_LOG_WARNING,
ddf19c
+                         "%s: failed to stat parent: %m\n", __func__);
ddf19c
+            }
ddf19c
+            goto fail;
ddf19c
+        }
ddf19c
+        p = lo_find(lo, &stat;;
ddf19c
+        if (p == NULL) {
ddf19c
+            if (!retries) {
ddf19c
+                fuse_log(FUSE_LOG_WARNING,
ddf19c
+                         "%s: failed to find parent\n", __func__);
ddf19c
+            }
ddf19c
+            goto fail;
ddf19c
+        }
ddf19c
+    }
ddf19c
+    last++;
ddf19c
+    res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
ddf19c
+    if (res == -1) {
ddf19c
+        if (!retries) {
ddf19c
+            fuse_log(FUSE_LOG_WARNING,
ddf19c
+                     "%s: failed to stat last\n", __func__);
ddf19c
+        }
ddf19c
+        goto fail_unref;
ddf19c
+    }
ddf19c
+    if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
ddf19c
+        if (!retries) {
ddf19c
+            fuse_log(FUSE_LOG_WARNING,
ddf19c
+                     "%s: failed to match last\n", __func__);
ddf19c
+        }
ddf19c
+        goto fail_unref;
ddf19c
+    }
ddf19c
+    *parent = p;
ddf19c
+    memmove(path, last, strlen(last) + 1);
ddf19c
+
ddf19c
+    return 0;
ddf19c
+
ddf19c
+fail_unref:
ddf19c
+    unref_inode(lo, p, 1);
ddf19c
+fail:
ddf19c
+    if (retries) {
ddf19c
+        retries--;
ddf19c
+        goto retry;
ddf19c
+    }
ddf19c
+fail_noretry:
ddf19c
+    errno = EIO;
ddf19c
+    return -1;
ddf19c
+}
ddf19c
+
ddf19c
+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
ddf19c
+                           const struct timespec *tv)
ddf19c
+{
ddf19c
+    int res;
ddf19c
+    struct lo_inode *parent;
ddf19c
+    char path[PATH_MAX];
ddf19c
 
ddf19c
     if (inode->is_symlink) {
ddf19c
-        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
ddf19c
+        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
ddf19c
         if (res == -1 && errno == EINVAL) {
ddf19c
             /* Sorry, no race free way to set times on symlink. */
ddf19c
-            errno = EPERM;
ddf19c
+            if (lo->norace) {
ddf19c
+                errno = EPERM;
ddf19c
+            } else {
ddf19c
+                goto fallback;
ddf19c
+            }
ddf19c
         }
ddf19c
         return res;
ddf19c
     }
ddf19c
-    sprintf(procname, "/proc/self/fd/%i", inode->fd);
ddf19c
+    sprintf(path, "/proc/self/fd/%i", inode->fd);
ddf19c
 
ddf19c
-    return utimensat(AT_FDCWD, procname, tv, 0);
ddf19c
+    return utimensat(AT_FDCWD, path, tv, 0);
ddf19c
+
ddf19c
+fallback:
ddf19c
+    res = lo_parent_and_name(lo, inode, path, &parent);
ddf19c
+    if (res != -1) {
ddf19c
+        res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
ddf19c
+        unref_inode(lo, parent, 1);
ddf19c
+    }
ddf19c
+
ddf19c
+    return res;
ddf19c
 }
ddf19c
 
ddf19c
 static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
ddf19c
@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
ddf19c
 {
ddf19c
     int saverr;
ddf19c
     char procname[64];
ddf19c
+    struct lo_data *lo = lo_data(req);
ddf19c
     struct lo_inode *inode;
ddf19c
     int ifd;
ddf19c
     int res;
ddf19c
@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
ddf19c
         if (fi) {
ddf19c
             res = futimens(fd, tv);
ddf19c
         } else {
ddf19c
-            res = utimensat_empty_nofollow(inode, tv);
ddf19c
+            res = utimensat_empty(lo, inode, tv);
ddf19c
         }
ddf19c
         if (res == -1) {
ddf19c
             goto out_err;
ddf19c
@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
ddf19c
     lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
ddf19c
 }
ddf19c
 
ddf19c
-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
ddf19c
-                                 const char *name)
ddf19c
+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
ddf19c
+                                 int dfd, const char *name)
ddf19c
 {
ddf19c
     int res;
ddf19c
-    char procname[64];
ddf19c
+    struct lo_inode *parent;
ddf19c
+    char path[PATH_MAX];
ddf19c
 
ddf19c
     if (inode->is_symlink) {
ddf19c
         res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
ddf19c
         if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
ddf19c
             /* Sorry, no race free way to hard-link a symlink. */
ddf19c
-            errno = EPERM;
ddf19c
+            if (lo->norace) {
ddf19c
+                errno = EPERM;
ddf19c
+            } else {
ddf19c
+                goto fallback;
ddf19c
+            }
ddf19c
         }
ddf19c
         return res;
ddf19c
     }
ddf19c
 
ddf19c
-    sprintf(procname, "/proc/self/fd/%i", inode->fd);
ddf19c
+    sprintf(path, "/proc/self/fd/%i", inode->fd);
ddf19c
+
ddf19c
+    return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
ddf19c
+
ddf19c
+fallback:
ddf19c
+    res = lo_parent_and_name(lo, inode, path, &parent);
ddf19c
+    if (res != -1) {
ddf19c
+        res = linkat(parent->fd, path, dfd, name, 0);
ddf19c
+        unref_inode(lo, parent, 1);
ddf19c
+    }
ddf19c
 
ddf19c
-    return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
ddf19c
+    return res;
ddf19c
 }
ddf19c
 
ddf19c
 static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
ddf19c
@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
ddf19c
     e.attr_timeout = lo->timeout;
ddf19c
     e.entry_timeout = lo->timeout;
ddf19c
 
ddf19c
-    res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
ddf19c
+    res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
ddf19c
     if (res == -1) {
ddf19c
         goto out_err;
ddf19c
     }
ddf19c
-- 
ddf19c
1.8.3.1
ddf19c