From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Mon, 27 Jan 2020 19:02:11 +0000
Subject: [PATCH] virtiofsd: introduce inode refcount to prevent use-after-free
If thread A is using an inode it must not be deleted by thread B when
processing a FUSE_FORGET request.
The FUSE protocol itself already has a counter called nlookup that is
used in FUSE_FORGET messages. We cannot trust this counter since the
untrusted client can manipulate it via FUSE_FORGET messages.
Introduce a new refcount to keep inodes alive for the required lifespan.
lo_inode_put() must be called to release a reference. FUSE's nlookup
counter holds exactly one reference so that the inode stays alive as
long as the client still wants to remember it.
Note that the lo_inode->is_symlink field is moved to avoid creating a
hole in the struct due to struct field alignment.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit c241aa9457d88c6a0d027f48fadfed131646bce3)
---
tools/virtiofsd/passthrough_ll.c | 169 ++++++++++++++++++++++++++-----
1 file changed, 146 insertions(+), 23 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index e3a6d6b611..ab1613586e 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -97,7 +97,13 @@ struct lo_key {
struct lo_inode {
int fd;
- bool is_symlink;
+
+ /*
+ * Atomic reference count for this object. The nlookup field holds a
+ * reference and release it when nlookup reaches 0.
+ */
+ gint refcount;
+
struct lo_key key;
/*
@@ -116,6 +122,8 @@ struct lo_inode {
fuse_ino_t fuse_ino;
pthread_mutex_t plock_mutex;
GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
+
+ bool is_symlink;
};
struct lo_cred {
@@ -471,6 +479,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
return elem - lo_data(req)->ino_map.elems;
}
+static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
+{
+ struct lo_inode *inode = *inodep;
+
+ if (!inode) {
+ return;
+ }
+
+ *inodep = NULL;
+
+ if (g_atomic_int_dec_and_test(&inode->refcount)) {
+ close(inode->fd);
+ free(inode);
+ }
+}
+
+/* Caller must release refcount using lo_inode_put() */
static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
{
struct lo_data *lo = lo_data(req);
@@ -478,6 +503,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
pthread_mutex_lock(&lo->mutex);
elem = lo_map_get(&lo->ino_map, ino);
+ if (elem) {
+ g_atomic_int_inc(&elem->inode->refcount);
+ }
pthread_mutex_unlock(&lo->mutex);
if (!elem) {
@@ -487,10 +515,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
return elem->inode;
}
+/*
+ * TODO Remove this helper and force callers to hold an inode refcount until
+ * they are done with the fd. This will be done in a later patch to make
+ * review easier.
+ */
static int lo_fd(fuse_req_t req, fuse_ino_t ino)
{
struct lo_inode *inode = lo_inode(req, ino);
- return inode ? inode->fd : -1;
+ int fd;
+
+ if (!inode) {
+ return -1;
+ }
+
+ fd = inode->fd;
+ lo_inode_put(lo_data(req), &inode);
+ return fd;
}
static void lo_init(void *userdata, struct fuse_conn_info *conn)
@@ -545,6 +586,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
fuse_reply_attr(req, &buf, lo->timeout);
}
+/*
+ * Increments parent->nlookup and caller must release refcount using
+ * lo_inode_put(&parent).
+ */
static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
char path[PATH_MAX], struct lo_inode **parent)
{
@@ -582,6 +627,7 @@ retry:
p = &lo->root;
pthread_mutex_lock(&lo->mutex);
p->nlookup++;
+ g_atomic_int_inc(&p->refcount);
pthread_mutex_unlock(&lo->mutex);
} else {
*last = '\0';
@@ -625,6 +671,7 @@ retry:
fail_unref:
unref_inode_lolocked(lo, p, 1);
+ lo_inode_put(lo, &p);
fail:
if (retries) {
retries--;
@@ -663,6 +710,7 @@ fallback:
if (res != -1) {
res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
unref_inode_lolocked(lo, parent, 1);
+ lo_inode_put(lo, &parent);
}
return res;
@@ -780,11 +828,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
goto out_err;
}
}
+ lo_inode_put(lo, &inode);
return lo_getattr(req, ino, fi);
out_err:
saverr = errno;
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
@@ -801,6 +851,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
if (p) {
assert(p->nlookup > 0);
p->nlookup++;
+ g_atomic_int_inc(&p->refcount);
}
pthread_mutex_unlock(&lo->mutex);
@@ -820,6 +871,10 @@ static void posix_locks_value_destroy(gpointer data)
free(plock);
}
+/*
+ * Increments nlookup and caller must release refcount using
+ * lo_inode_put(&parent).
+ */
static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
struct fuse_entry_param *e)
{
@@ -827,7 +882,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
int res;
int saverr;
struct lo_data *lo = lo_data(req);
- struct lo_inode *inode, *dir = lo_inode(req, parent);
+ struct lo_inode *inode = NULL;
+ struct lo_inode *dir = lo_inode(req, parent);
/*
* name_to_handle_at() and open_by_handle_at() can reach here with fuse
@@ -868,6 +924,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
}
inode->is_symlink = S_ISLNK(e->attr.st_mode);
+
+ /*
+ * One for the caller and one for nlookup (released in
+ * unref_inode_lolocked())
+ */
+ g_atomic_int_set(&inode->refcount, 2);
+
inode->nlookup = 1;
inode->fd = newfd;
newfd = -1;
@@ -883,6 +946,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
pthread_mutex_unlock(&lo->mutex);
}
e->ino = inode->fuse_ino;
+ lo_inode_put(lo, &inode);
+ lo_inode_put(lo, &dir);
fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
name, (unsigned long long)e->ino);
@@ -894,6 +959,8 @@ out_err:
if (newfd != -1) {
close(newfd);
}
+ lo_inode_put(lo, &inode);
+ lo_inode_put(lo, &dir);
return saverr;
}
@@ -991,6 +1058,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
{
int res;
int saverr;
+ struct lo_data *lo = lo_data(req);
struct lo_inode *dir;
struct fuse_entry_param e;
struct lo_cred old = {};
@@ -1032,9 +1100,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
name, (unsigned long long)e.ino);
fuse_reply_entry(req, &e);
+ lo_inode_put(lo, &dir);
return;
out:
+ lo_inode_put(lo, &dir);
fuse_reply_err(req, saverr);
}
@@ -1085,6 +1155,7 @@ fallback:
if (res != -1) {
res = linkat(parent->fd, path, dfd, name, 0);
unref_inode_lolocked(lo, parent, 1);
+ lo_inode_put(lo, &parent);
}
return res;
@@ -1095,6 +1166,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
{
int res;
struct lo_data *lo = lo_data(req);
+ struct lo_inode *parent_inode;
struct lo_inode *inode;
struct fuse_entry_param e;
int saverr;
@@ -1104,17 +1176,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
return;
}
+ parent_inode = lo_inode(req, parent);
inode = lo_inode(req, ino);
- if (!inode) {
- fuse_reply_err(req, EBADF);
- return;
+ if (!parent_inode || !inode) {
+ errno = EBADF;
+ goto out_err;
}
memset(&e, 0, sizeof(struct fuse_entry_param));
e.attr_timeout = lo->timeout;
e.entry_timeout = lo->timeout;
- res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
+ res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name);
if (res == -1) {
goto out_err;
}
@@ -1133,13 +1206,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
name, (unsigned long long)e.ino);
fuse_reply_entry(req, &e);
+ lo_inode_put(lo, &parent_inode);
+ lo_inode_put(lo, &inode);
return;
out_err:
saverr = errno;
+ lo_inode_put(lo, &parent_inode);
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
+/* Increments nlookup and caller must release refcount using lo_inode_put() */
static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent,
const char *name)
{
@@ -1176,6 +1254,7 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
fuse_reply_err(req, res == -1 ? errno : 0);
unref_inode_lolocked(lo, inode, 1);
+ lo_inode_put(lo, &inode);
}
static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
@@ -1183,8 +1262,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
unsigned int flags)
{
int res;
- struct lo_inode *oldinode;
- struct lo_inode *newinode;
+ struct lo_inode *parent_inode;
+ struct lo_inode *newparent_inode;
+ struct lo_inode *oldinode = NULL;
+ struct lo_inode *newinode = NULL;
struct lo_data *lo = lo_data(req);
if (!is_safe_path_component(name) || !is_safe_path_component(newname)) {
@@ -1192,6 +1273,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
return;
}
+ parent_inode = lo_inode(req, parent);
+ newparent_inode = lo_inode(req, newparent);
+ if (!parent_inode || !newparent_inode) {
+ fuse_reply_err(req, EBADF);
+ goto out;
+ }
+
oldinode = lookup_name(req, parent, name);
newinode = lookup_name(req, newparent, newname);
@@ -1204,8 +1292,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
#ifndef SYS_renameat2
fuse_reply_err(req, EINVAL);
#else
- res = syscall(SYS_renameat2, lo_fd(req, parent), name,
- lo_fd(req, newparent), newname, flags);
+ res = syscall(SYS_renameat2, parent_inode->fd, name,
+ newparent_inode->fd, newname, flags);
if (res == -1 && errno == ENOSYS) {
fuse_reply_err(req, EINVAL);
} else {
@@ -1215,12 +1303,16 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
goto out;
}
- res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname);
+ res = renameat(parent_inode->fd, name, newparent_inode->fd, newname);
fuse_reply_err(req, res == -1 ? errno : 0);
out:
unref_inode_lolocked(lo, oldinode, 1);
unref_inode_lolocked(lo, newinode, 1);
+ lo_inode_put(lo, &oldinode);
+ lo_inode_put(lo, &newinode);
+ lo_inode_put(lo, &parent_inode);
+ lo_inode_put(lo, &newparent_inode);
}
static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
@@ -1244,6 +1336,7 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
fuse_reply_err(req, res == -1 ? errno : 0);
unref_inode_lolocked(lo, inode, 1);
+ lo_inode_put(lo, &inode);
}
static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
@@ -1265,8 +1358,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
g_hash_table_destroy(inode->posix_locks);
pthread_mutex_destroy(&inode->plock_mutex);
pthread_mutex_unlock(&lo->mutex);
- close(inode->fd);
- free(inode);
+
+ /* Drop our refcount from lo_do_lookup() */
+ lo_inode_put(lo, &inode);
} else {
pthread_mutex_unlock(&lo->mutex);
}
@@ -1280,6 +1374,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data)
inode->nlookup = 0;
lo_map_remove(&lo->ino_map, inode->fuse_ino);
close(inode->fd);
+ lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */
return TRUE;
}
@@ -1306,6 +1401,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
(unsigned long long)nlookup);
unref_inode_lolocked(lo, inode, nlookup);
+ lo_inode_put(lo, &inode);
}
static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
@@ -1537,6 +1633,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
err = 0;
error:
lo_dirp_put(&d);
+ lo_inode_put(lo, &dinode);
/*
* If there's an error, we can only signal it if we haven't stored
@@ -1595,6 +1692,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
{
int fd;
struct lo_data *lo = lo_data(req);
+ struct lo_inode *parent_inode;
struct fuse_entry_param e;
int err;
struct lo_cred old = {};
@@ -1607,12 +1705,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
return;
}
+ parent_inode = lo_inode(req, parent);
+ if (!parent_inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
err = lo_change_cred(req, &old);
if (err) {
goto out;
}
- fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
+ fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
mode);
err = fd == -1 ? errno : 0;
lo_restore_cred(&old);
@@ -1625,8 +1729,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
pthread_mutex_unlock(&lo->mutex);
if (fh == -1) {
close(fd);
- fuse_reply_err(req, ENOMEM);
- return;
+ err = ENOMEM;
+ goto out;
}
fi->fh = fh;
@@ -1639,6 +1743,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
}
out:
+ lo_inode_put(lo, &parent_inode);
+
if (err) {
fuse_reply_err(req, err);
} else {
@@ -1712,16 +1818,18 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
plock =
lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
if (!plock) {
- pthread_mutex_unlock(&inode->plock_mutex);
- fuse_reply_err(req, ret);
- return;
+ saverr = ret;
+ goto out;
}
ret = fcntl(plock->fd, F_OFD_GETLK, lock);
if (ret == -1) {
saverr = errno;
}
+
+out:
pthread_mutex_unlock(&inode->plock_mutex);
+ lo_inode_put(lo, &inode);
if (saverr) {
fuse_reply_err(req, saverr);
@@ -1761,9 +1869,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
if (!plock) {
- pthread_mutex_unlock(&inode->plock_mutex);
- fuse_reply_err(req, ret);
- return;
+ saverr = ret;
+ goto out;
}
/* TODO: Is it alright to modify flock? */
@@ -1772,7 +1879,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
if (ret == -1) {
saverr = errno;
}
+
+out:
pthread_mutex_unlock(&inode->plock_mutex);
+ lo_inode_put(lo, &inode);
+
fuse_reply_err(req, saverr);
}
@@ -1898,6 +2009,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
pthread_mutex_unlock(&inode->plock_mutex);
res = close(dup(lo_fi_fd(req, fi)));
+ lo_inode_put(lo_data(req), &inode);
fuse_reply_err(req, res == -1 ? errno : 0);
}
@@ -2115,11 +2227,14 @@ out_free:
if (fd >= 0) {
close(fd);
}
+
+ lo_inode_put(lo, &inode);
return;
out_err:
saverr = errno;
out:
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
goto out_free;
}
@@ -2190,11 +2305,14 @@ out_free:
if (fd >= 0) {
close(fd);
}
+
+ lo_inode_put(lo, &inode);
return;
out_err:
saverr = errno;
out:
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
goto out_free;
}
@@ -2243,6 +2361,8 @@ out:
if (fd >= 0) {
close(fd);
}
+
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
@@ -2289,6 +2409,8 @@ out:
if (fd >= 0) {
close(fd);
}
+
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
@@ -2671,6 +2793,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
root->key.ino = stat.st_ino;
root->key.dev = stat.st_dev;
root->nlookup = 2;
+ g_atomic_int_set(&root->refcount, 2);
}
static guint lo_key_hash(gconstpointer key)