Blame SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch

902636
From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001
902636
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
902636
Date: Mon, 27 Jan 2020 19:01:17 +0100
902636
Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops
902636
MIME-Version: 1.0
902636
Content-Type: text/plain; charset=UTF-8
902636
Content-Transfer-Encoding: 8bit
902636
902636
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
902636
Message-id: <20200127190227.40942-43-dgilbert@redhat.com>
902636
Patchwork-id: 93496
902636
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops
902636
Bugzilla: 1694164
902636
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
902636
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
902636
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
902636
902636
From: Miklos Szeredi <mszeredi@redhat.com>
902636
902636
We have two operations that cannot be done race-free on a symlink in
902636
certain cases: utimes and link.
902636
902636
Add racy fallback for these if the race-free method doesn't work.  We do
902636
our best to avoid races even in this case:
902636
902636
  - get absolute path by reading /proc/self/fd/NN symlink
902636
902636
  - lookup parent directory: after this we are safe against renames in
902636
    ancestors
902636
902636
  - lookup name in parent directory, and verify that we got to the original
902636
    inode,  if not retry the whole thing
902636
902636
Both utimes(2) and link(2) hold i_lock on the inode across the operation,
902636
so a racing rename/delete by this fuse instance is not possible, only from
902636
other entities changing the filesystem.
902636
902636
If the "norace" option is given, then disable the racy fallbacks.
902636
902636
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
902636
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
902636
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
902636
(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce)
902636
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
902636
---
902636
 tools/virtiofsd/helper.c         |   5 +-
902636
 tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++----
902636
 2 files changed, 145 insertions(+), 17 deletions(-)
902636
902636
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
902636
index b8ec5ac..5531425 100644
902636
--- a/tools/virtiofsd/helper.c
902636
+++ b/tools/virtiofsd/helper.c
902636
@@ -142,7 +142,10 @@ void fuse_cmdline_help(void)
902636
            "    --daemonize                run in background\n"
902636
            "    -o max_idle_threads        the maximum number of idle worker "
902636
            "threads\n"
902636
-           "                               allowed (default: 10)\n");
902636
+           "                               allowed (default: 10)\n"
902636
+           "    -o norace                  disable racy fallback\n"
902636
+           "                               default: false\n"
902636
+          );
902636
 }
902636
 
902636
 static int fuse_helper_opt_proc(void *data, const char *arg, int key,
902636
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
902636
index 9815bfa..ac380ef 100644
902636
--- a/tools/virtiofsd/passthrough_ll.c
902636
+++ b/tools/virtiofsd/passthrough_ll.c
902636
@@ -98,6 +98,7 @@ enum {
902636
 struct lo_data {
902636
     pthread_mutex_t mutex;
902636
     int debug;
902636
+    int norace;
902636
     int writeback;
902636
     int flock;
902636
     int xattr;
902636
@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = {
902636
     { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
902636
     { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
902636
     { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
902636
-
902636
+    { "norace", offsetof(struct lo_data, norace), 1 },
902636
     FUSE_OPT_END
902636
 };
902636
 
902636
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
902636
+
902636
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
902636
+
902636
+
902636
 static struct lo_data *lo_data(fuse_req_t req)
902636
 {
902636
     return (struct lo_data *)fuse_req_userdata(req);
902636
@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
902636
     fuse_reply_attr(req, &buf, lo->timeout);
902636
 }
902636
 
902636
-static int utimensat_empty_nofollow(struct lo_inode *inode,
902636
-                                    const struct timespec *tv)
902636
+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
902636
+                              char path[PATH_MAX], struct lo_inode **parent)
902636
 {
902636
-    int res;
902636
     char procname[64];
902636
+    char *last;
902636
+    struct stat stat;
902636
+    struct lo_inode *p;
902636
+    int retries = 2;
902636
+    int res;
902636
+
902636
+retry:
902636
+    sprintf(procname, "/proc/self/fd/%i", inode->fd);
902636
+
902636
+    res = readlink(procname, path, PATH_MAX);
902636
+    if (res < 0) {
902636
+        fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
902636
+        goto fail_noretry;
902636
+    }
902636
+
902636
+    if (res >= PATH_MAX) {
902636
+        fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
902636
+        goto fail_noretry;
902636
+    }
902636
+    path[res] = '\0';
902636
+
902636
+    last = strrchr(path, '/');
902636
+    if (last == NULL) {
902636
+        /* Shouldn't happen */
902636
+        fuse_log(
902636
+            FUSE_LOG_WARNING,
902636
+            "%s: INTERNAL ERROR: bad path read from proc\n", __func__);
902636
+        goto fail_noretry;
902636
+    }
902636
+    if (last == path) {
902636
+        p = &lo->root;
902636
+        pthread_mutex_lock(&lo->mutex);
902636
+        p->refcount++;
902636
+        pthread_mutex_unlock(&lo->mutex);
902636
+    } else {
902636
+        *last = '\0';
902636
+        res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
902636
+        if (res == -1) {
902636
+            if (!retries) {
902636
+                fuse_log(FUSE_LOG_WARNING,
902636
+                         "%s: failed to stat parent: %m\n", __func__);
902636
+            }
902636
+            goto fail;
902636
+        }
902636
+        p = lo_find(lo, &stat;;
902636
+        if (p == NULL) {
902636
+            if (!retries) {
902636
+                fuse_log(FUSE_LOG_WARNING,
902636
+                         "%s: failed to find parent\n", __func__);
902636
+            }
902636
+            goto fail;
902636
+        }
902636
+    }
902636
+    last++;
902636
+    res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
902636
+    if (res == -1) {
902636
+        if (!retries) {
902636
+            fuse_log(FUSE_LOG_WARNING,
902636
+                     "%s: failed to stat last\n", __func__);
902636
+        }
902636
+        goto fail_unref;
902636
+    }
902636
+    if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
902636
+        if (!retries) {
902636
+            fuse_log(FUSE_LOG_WARNING,
902636
+                     "%s: failed to match last\n", __func__);
902636
+        }
902636
+        goto fail_unref;
902636
+    }
902636
+    *parent = p;
902636
+    memmove(path, last, strlen(last) + 1);
902636
+
902636
+    return 0;
902636
+
902636
+fail_unref:
902636
+    unref_inode(lo, p, 1);
902636
+fail:
902636
+    if (retries) {
902636
+        retries--;
902636
+        goto retry;
902636
+    }
902636
+fail_noretry:
902636
+    errno = EIO;
902636
+    return -1;
902636
+}
902636
+
902636
+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
902636
+                           const struct timespec *tv)
902636
+{
902636
+    int res;
902636
+    struct lo_inode *parent;
902636
+    char path[PATH_MAX];
902636
 
902636
     if (inode->is_symlink) {
902636
-        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
902636
+        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
902636
         if (res == -1 && errno == EINVAL) {
902636
             /* Sorry, no race free way to set times on symlink. */
902636
-            errno = EPERM;
902636
+            if (lo->norace) {
902636
+                errno = EPERM;
902636
+            } else {
902636
+                goto fallback;
902636
+            }
902636
         }
902636
         return res;
902636
     }
902636
-    sprintf(procname, "/proc/self/fd/%i", inode->fd);
902636
+    sprintf(path, "/proc/self/fd/%i", inode->fd);
902636
 
902636
-    return utimensat(AT_FDCWD, procname, tv, 0);
902636
+    return utimensat(AT_FDCWD, path, tv, 0);
902636
+
902636
+fallback:
902636
+    res = lo_parent_and_name(lo, inode, path, &parent);
902636
+    if (res != -1) {
902636
+        res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
902636
+        unref_inode(lo, parent, 1);
902636
+    }
902636
+
902636
+    return res;
902636
 }
902636
 
902636
 static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
902636
@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
902636
 {
902636
     int saverr;
902636
     char procname[64];
902636
+    struct lo_data *lo = lo_data(req);
902636
     struct lo_inode *inode;
902636
     int ifd;
902636
     int res;
902636
@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
902636
         if (fi) {
902636
             res = futimens(fd, tv);
902636
         } else {
902636
-            res = utimensat_empty_nofollow(inode, tv);
902636
+            res = utimensat_empty(lo, inode, tv);
902636
         }
902636
         if (res == -1) {
902636
             goto out_err;
902636
@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
902636
     lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
902636
 }
902636
 
902636
-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
902636
-                                 const char *name)
902636
+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
902636
+                                 int dfd, const char *name)
902636
 {
902636
     int res;
902636
-    char procname[64];
902636
+    struct lo_inode *parent;
902636
+    char path[PATH_MAX];
902636
 
902636
     if (inode->is_symlink) {
902636
         res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
902636
         if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
902636
             /* Sorry, no race free way to hard-link a symlink. */
902636
-            errno = EPERM;
902636
+            if (lo->norace) {
902636
+                errno = EPERM;
902636
+            } else {
902636
+                goto fallback;
902636
+            }
902636
         }
902636
         return res;
902636
     }
902636
 
902636
-    sprintf(procname, "/proc/self/fd/%i", inode->fd);
902636
+    sprintf(path, "/proc/self/fd/%i", inode->fd);
902636
+
902636
+    return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
902636
+
902636
+fallback:
902636
+    res = lo_parent_and_name(lo, inode, path, &parent);
902636
+    if (res != -1) {
902636
+        res = linkat(parent->fd, path, dfd, name, 0);
902636
+        unref_inode(lo, parent, 1);
902636
+    }
902636
 
902636
-    return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
902636
+    return res;
902636
 }
902636
 
902636
 static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
902636
@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
902636
     e.attr_timeout = lo->timeout;
902636
     e.entry_timeout = lo->timeout;
902636
 
902636
-    res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
902636
+    res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
902636
     if (res == -1) {
902636
         goto out_err;
902636
     }
902636
-- 
902636
1.8.3.1
902636