From: Stefan Hajnoczi Date: Mon, 27 Jan 2020 19:02:11 +0000 Subject: [PATCH] virtiofsd: introduce inode refcount to prevent use-after-free If thread A is using an inode it must not be deleted by thread B when processing a FUSE_FORGET request. The FUSE protocol itself already has a counter called nlookup that is used in FUSE_FORGET messages. We cannot trust this counter since the untrusted client can manipulate it via FUSE_FORGET messages. Introduce a new refcount to keep inodes alive for the required lifespan. lo_inode_put() must be called to release a reference. FUSE's nlookup counter holds exactly one reference so that the inode stays alive as long as the client still wants to remember it. Note that the lo_inode->is_symlink field is moved to avoid creating a hole in the struct due to struct field alignment. Signed-off-by: Stefan Hajnoczi Reviewed-by: Misono Tomohiro Reviewed-by: Sergio Lopez Signed-off-by: Dr. David Alan Gilbert (cherry picked from commit c241aa9457d88c6a0d027f48fadfed131646bce3) --- tools/virtiofsd/passthrough_ll.c | 169 ++++++++++++++++++++++++++----- 1 file changed, 146 insertions(+), 23 deletions(-) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index e3a6d6b611..ab1613586e 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -97,7 +97,13 @@ struct lo_key { struct lo_inode { int fd; - bool is_symlink; + + /* + * Atomic reference count for this object. The nlookup field holds a + * reference and release it when nlookup reaches 0. + */ + gint refcount; + struct lo_key key; /* @@ -116,6 +122,8 @@ struct lo_inode { fuse_ino_t fuse_ino; pthread_mutex_t plock_mutex; GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ + + bool is_symlink; }; struct lo_cred { @@ -471,6 +479,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) return elem - lo_data(req)->ino_map.elems; } +static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) +{ + struct lo_inode *inode = *inodep; + + if (!inode) { + return; + } + + *inodep = NULL; + + if (g_atomic_int_dec_and_test(&inode->refcount)) { + close(inode->fd); + free(inode); + } +} + +/* Caller must release refcount using lo_inode_put() */ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) { struct lo_data *lo = lo_data(req); @@ -478,6 +503,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) pthread_mutex_lock(&lo->mutex); elem = lo_map_get(&lo->ino_map, ino); + if (elem) { + g_atomic_int_inc(&elem->inode->refcount); + } pthread_mutex_unlock(&lo->mutex); if (!elem) { @@ -487,10 +515,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) return elem->inode; } +/* + * TODO Remove this helper and force callers to hold an inode refcount until + * they are done with the fd. This will be done in a later patch to make + * review easier. + */ static int lo_fd(fuse_req_t req, fuse_ino_t ino) { struct lo_inode *inode = lo_inode(req, ino); - return inode ? inode->fd : -1; + int fd; + + if (!inode) { + return -1; + } + + fd = inode->fd; + lo_inode_put(lo_data(req), &inode); + return fd; } static void lo_init(void *userdata, struct fuse_conn_info *conn) @@ -545,6 +586,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, fuse_reply_attr(req, &buf, lo->timeout); } +/* + * Increments parent->nlookup and caller must release refcount using + * lo_inode_put(&parent). + */ static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, char path[PATH_MAX], struct lo_inode **parent) { @@ -582,6 +627,7 @@ retry: p = &lo->root; pthread_mutex_lock(&lo->mutex); p->nlookup++; + g_atomic_int_inc(&p->refcount); pthread_mutex_unlock(&lo->mutex); } else { *last = '\0'; @@ -625,6 +671,7 @@ retry: fail_unref: unref_inode_lolocked(lo, p, 1); + lo_inode_put(lo, &p); fail: if (retries) { retries--; @@ -663,6 +710,7 @@ fallback: if (res != -1) { res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); unref_inode_lolocked(lo, parent, 1); + lo_inode_put(lo, &parent); } return res; @@ -780,11 +828,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, goto out_err; } } + lo_inode_put(lo, &inode); return lo_getattr(req, ino, fi); out_err: saverr = errno; + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); } @@ -801,6 +851,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) if (p) { assert(p->nlookup > 0); p->nlookup++; + g_atomic_int_inc(&p->refcount); } pthread_mutex_unlock(&lo->mutex); @@ -820,6 +871,10 @@ static void posix_locks_value_destroy(gpointer data) free(plock); } +/* + * Increments nlookup and caller must release refcount using + * lo_inode_put(&parent). + */ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, struct fuse_entry_param *e) { @@ -827,7 +882,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, int res; int saverr; struct lo_data *lo = lo_data(req); - struct lo_inode *inode, *dir = lo_inode(req, parent); + struct lo_inode *inode = NULL; + struct lo_inode *dir = lo_inode(req, parent); /* * name_to_handle_at() and open_by_handle_at() can reach here with fuse @@ -868,6 +924,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, } inode->is_symlink = S_ISLNK(e->attr.st_mode); + + /* + * One for the caller and one for nlookup (released in + * unref_inode_lolocked()) + */ + g_atomic_int_set(&inode->refcount, 2); + inode->nlookup = 1; inode->fd = newfd; newfd = -1; @@ -883,6 +946,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, pthread_mutex_unlock(&lo->mutex); } e->ino = inode->fuse_ino; + lo_inode_put(lo, &inode); + lo_inode_put(lo, &dir); fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, name, (unsigned long long)e->ino); @@ -894,6 +959,8 @@ out_err: if (newfd != -1) { close(newfd); } + lo_inode_put(lo, &inode); + lo_inode_put(lo, &dir); return saverr; } @@ -991,6 +1058,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, { int res; int saverr; + struct lo_data *lo = lo_data(req); struct lo_inode *dir; struct fuse_entry_param e; struct lo_cred old = {}; @@ -1032,9 +1100,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, name, (unsigned long long)e.ino); fuse_reply_entry(req, &e); + lo_inode_put(lo, &dir); return; out: + lo_inode_put(lo, &dir); fuse_reply_err(req, saverr); } @@ -1085,6 +1155,7 @@ fallback: if (res != -1) { res = linkat(parent->fd, path, dfd, name, 0); unref_inode_lolocked(lo, parent, 1); + lo_inode_put(lo, &parent); } return res; @@ -1095,6 +1166,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, { int res; struct lo_data *lo = lo_data(req); + struct lo_inode *parent_inode; struct lo_inode *inode; struct fuse_entry_param e; int saverr; @@ -1104,17 +1176,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, return; } + parent_inode = lo_inode(req, parent); inode = lo_inode(req, ino); - if (!inode) { - fuse_reply_err(req, EBADF); - return; + if (!parent_inode || !inode) { + errno = EBADF; + goto out_err; } memset(&e, 0, sizeof(struct fuse_entry_param)); e.attr_timeout = lo->timeout; e.entry_timeout = lo->timeout; - res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); + res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name); if (res == -1) { goto out_err; } @@ -1133,13 +1206,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, name, (unsigned long long)e.ino); fuse_reply_entry(req, &e); + lo_inode_put(lo, &parent_inode); + lo_inode_put(lo, &inode); return; out_err: saverr = errno; + lo_inode_put(lo, &parent_inode); + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); } +/* Increments nlookup and caller must release refcount using lo_inode_put() */ static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, const char *name) { @@ -1176,6 +1254,7 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) fuse_reply_err(req, res == -1 ? errno : 0); unref_inode_lolocked(lo, inode, 1); + lo_inode_put(lo, &inode); } static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, @@ -1183,8 +1262,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, unsigned int flags) { int res; - struct lo_inode *oldinode; - struct lo_inode *newinode; + struct lo_inode *parent_inode; + struct lo_inode *newparent_inode; + struct lo_inode *oldinode = NULL; + struct lo_inode *newinode = NULL; struct lo_data *lo = lo_data(req); if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { @@ -1192,6 +1273,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, return; } + parent_inode = lo_inode(req, parent); + newparent_inode = lo_inode(req, newparent); + if (!parent_inode || !newparent_inode) { + fuse_reply_err(req, EBADF); + goto out; + } + oldinode = lookup_name(req, parent, name); newinode = lookup_name(req, newparent, newname); @@ -1204,8 +1292,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, #ifndef SYS_renameat2 fuse_reply_err(req, EINVAL); #else - res = syscall(SYS_renameat2, lo_fd(req, parent), name, - lo_fd(req, newparent), newname, flags); + res = syscall(SYS_renameat2, parent_inode->fd, name, + newparent_inode->fd, newname, flags); if (res == -1 && errno == ENOSYS) { fuse_reply_err(req, EINVAL); } else { @@ -1215,12 +1303,16 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, goto out; } - res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); + res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); fuse_reply_err(req, res == -1 ? errno : 0); out: unref_inode_lolocked(lo, oldinode, 1); unref_inode_lolocked(lo, newinode, 1); + lo_inode_put(lo, &oldinode); + lo_inode_put(lo, &newinode); + lo_inode_put(lo, &parent_inode); + lo_inode_put(lo, &newparent_inode); } static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) @@ -1244,6 +1336,7 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) fuse_reply_err(req, res == -1 ? errno : 0); unref_inode_lolocked(lo, inode, 1); + lo_inode_put(lo, &inode); } static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, @@ -1265,8 +1358,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, g_hash_table_destroy(inode->posix_locks); pthread_mutex_destroy(&inode->plock_mutex); pthread_mutex_unlock(&lo->mutex); - close(inode->fd); - free(inode); + + /* Drop our refcount from lo_do_lookup() */ + lo_inode_put(lo, &inode); } else { pthread_mutex_unlock(&lo->mutex); } @@ -1280,6 +1374,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) inode->nlookup = 0; lo_map_remove(&lo->ino_map, inode->fuse_ino); close(inode->fd); + lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ return TRUE; } @@ -1306,6 +1401,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) (unsigned long long)nlookup); unref_inode_lolocked(lo, inode, nlookup); + lo_inode_put(lo, &inode); } static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) @@ -1537,6 +1633,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, err = 0; error: lo_dirp_put(&d); + lo_inode_put(lo, &dinode); /* * If there's an error, we can only signal it if we haven't stored @@ -1595,6 +1692,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, { int fd; struct lo_data *lo = lo_data(req); + struct lo_inode *parent_inode; struct fuse_entry_param e; int err; struct lo_cred old = {}; @@ -1607,12 +1705,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, return; } + parent_inode = lo_inode(req, parent); + if (!parent_inode) { + fuse_reply_err(req, EBADF); + return; + } + err = lo_change_cred(req, &old); if (err) { goto out; } - fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); err = fd == -1 ? errno : 0; lo_restore_cred(&old); @@ -1625,8 +1729,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, pthread_mutex_unlock(&lo->mutex); if (fh == -1) { close(fd); - fuse_reply_err(req, ENOMEM); - return; + err = ENOMEM; + goto out; } fi->fh = fh; @@ -1639,6 +1743,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, } out: + lo_inode_put(lo, &parent_inode); + if (err) { fuse_reply_err(req, err); } else { @@ -1712,16 +1818,18 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, plock = lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); if (!plock) { - pthread_mutex_unlock(&inode->plock_mutex); - fuse_reply_err(req, ret); - return; + saverr = ret; + goto out; } ret = fcntl(plock->fd, F_OFD_GETLK, lock); if (ret == -1) { saverr = errno; } + +out: pthread_mutex_unlock(&inode->plock_mutex); + lo_inode_put(lo, &inode); if (saverr) { fuse_reply_err(req, saverr); @@ -1761,9 +1869,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); if (!plock) { - pthread_mutex_unlock(&inode->plock_mutex); - fuse_reply_err(req, ret); - return; + saverr = ret; + goto out; } /* TODO: Is it alright to modify flock? */ @@ -1772,7 +1879,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, if (ret == -1) { saverr = errno; } + +out: pthread_mutex_unlock(&inode->plock_mutex); + lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); } @@ -1898,6 +2009,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) pthread_mutex_unlock(&inode->plock_mutex); res = close(dup(lo_fi_fd(req, fi))); + lo_inode_put(lo_data(req), &inode); fuse_reply_err(req, res == -1 ? errno : 0); } @@ -2115,11 +2227,14 @@ out_free: if (fd >= 0) { close(fd); } + + lo_inode_put(lo, &inode); return; out_err: saverr = errno; out: + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); goto out_free; } @@ -2190,11 +2305,14 @@ out_free: if (fd >= 0) { close(fd); } + + lo_inode_put(lo, &inode); return; out_err: saverr = errno; out: + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); goto out_free; } @@ -2243,6 +2361,8 @@ out: if (fd >= 0) { close(fd); } + + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); } @@ -2289,6 +2409,8 @@ out: if (fd >= 0) { close(fd); } + + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); } @@ -2671,6 +2793,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) root->key.ino = stat.st_ino; root->key.dev = stat.st_dev; root->nlookup = 2; + g_atomic_int_set(&root->refcount, 2); } static guint lo_key_hash(gconstpointer key)