peterdelevoryas / rpms / qemu

Forked from rpms/qemu 2 years ago
Clone

Blame 0014-virtiofsd-Add-passthrough_ll.patch

1d442b
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
1d442b
Date: Mon, 27 Jan 2020 19:00:43 +0000
1d442b
Subject: [PATCH] virtiofsd: Add passthrough_ll
1d442b
MIME-Version: 1.0
1d442b
Content-Type: text/plain; charset=UTF-8
1d442b
Content-Transfer-Encoding: 8bit
1d442b
1d442b
passthrough_ll is one of the examples in the upstream fuse project
1d442b
and is the main part of our daemon here.  It passes through requests
1d442b
from fuse to the underlying filesystem, using syscalls as directly
1d442b
as possible.
1d442b
1d442b
>From libfuse fuse-3.8.0
1d442b
1d442b
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
1d442b
  Fixed up 'GPL' to 'GPLv2' as per Dan's comments and consistent
1d442b
  with the 'LICENSE' file in libfuse;  patch sent to libfuse to fix
1d442b
  it upstream.
1d442b
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
1d442b
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
1d442b
(cherry picked from commit 7c6b66027241f41720240fc6ee1021cdbd975b2e)
1d442b
---
1d442b
 tools/virtiofsd/passthrough_ll.c | 1338 ++++++++++++++++++++++++++++++
1d442b
 1 file changed, 1338 insertions(+)
1d442b
 create mode 100644 tools/virtiofsd/passthrough_ll.c
1d442b
1d442b
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
1d442b
new file mode 100644
1d442b
index 0000000000..e1a605691a
1d442b
--- /dev/null
1d442b
+++ b/tools/virtiofsd/passthrough_ll.c
1d442b
@@ -0,0 +1,1338 @@
1d442b
+/*
1d442b
+  FUSE: Filesystem in Userspace
1d442b
+  Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
1d442b
+
1d442b
+  This program can be distributed under the terms of the GNU GPLv2.
1d442b
+  See the file COPYING.
1d442b
+*/
1d442b
+
1d442b
+/** @file
1d442b
+ *
1d442b
+ * This file system mirrors the existing file system hierarchy of the
1d442b
+ * system, starting at the root file system. This is implemented by
1d442b
+ * just "passing through" all requests to the corresponding user-space
1d442b
+ * libc functions. In contrast to passthrough.c and passthrough_fh.c,
1d442b
+ * this implementation uses the low-level API. Its performance should
1d442b
+ * be the least bad among the three, but many operations are not
1d442b
+ * implemented. In particular, it is not possible to remove files (or
1d442b
+ * directories) because the code necessary to defer actual removal
1d442b
+ * until the file is not opened anymore would make the example much
1d442b
+ * more complicated.
1d442b
+ *
1d442b
+ * When writeback caching is enabled (-o writeback mount option), it
1d442b
+ * is only possible to write to files for which the mounting user has
1d442b
+ * read permissions. This is because the writeback cache requires the
1d442b
+ * kernel to be able to issue read requests for all files (which the
1d442b
+ * passthrough filesystem cannot satisfy if it can't read the file in
1d442b
+ * the underlying filesystem).
1d442b
+ *
1d442b
+ * Compile with:
1d442b
+ *
1d442b
+ *     gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll
1d442b
+ *
1d442b
+ * ## Source code ##
1d442b
+ * \include passthrough_ll.c
1d442b
+ */
1d442b
+
1d442b
+#define _GNU_SOURCE
1d442b
+#define FUSE_USE_VERSION 31
1d442b
+
1d442b
+#include "config.h"
1d442b
+
1d442b
+#include <fuse_lowlevel.h>
1d442b
+#include <unistd.h>
1d442b
+#include <stdlib.h>
1d442b
+#include <stdio.h>
1d442b
+#include <stddef.h>
1d442b
+#include <stdbool.h>
1d442b
+#include <string.h>
1d442b
+#include <limits.h>
1d442b
+#include <dirent.h>
1d442b
+#include <assert.h>
1d442b
+#include <errno.h>
1d442b
+#include <inttypes.h>
1d442b
+#include <pthread.h>
1d442b
+#include <sys/file.h>
1d442b
+#include <sys/xattr.h>
1d442b
+
1d442b
+#include "passthrough_helpers.h"
1d442b
+
1d442b
+/* We are re-using pointers to our `struct lo_inode` and `struct
1d442b
+   lo_dirp` elements as inodes. This means that we must be able to
1d442b
+   store uintptr_t values in a fuse_ino_t variable. The following
1d442b
+   incantation checks this condition at compile time. */
1d442b
+#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus
1d442b
+_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t),
1d442b
+	       "fuse_ino_t too small to hold uintptr_t values!");
1d442b
+#else
1d442b
+struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \
1d442b
+	{ unsigned _uintptr_to_must_hold_fuse_ino_t:
1d442b
+			((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); };
1d442b
+#endif
1d442b
+
1d442b
+struct lo_inode {
1d442b
+	struct lo_inode *next; /* protected by lo->mutex */
1d442b
+	struct lo_inode *prev; /* protected by lo->mutex */
1d442b
+	int fd;
1d442b
+	bool is_symlink;
1d442b
+	ino_t ino;
1d442b
+	dev_t dev;
1d442b
+	uint64_t refcount; /* protected by lo->mutex */
1d442b
+};
1d442b
+
1d442b
+enum {
1d442b
+	CACHE_NEVER,
1d442b
+	CACHE_NORMAL,
1d442b
+	CACHE_ALWAYS,
1d442b
+};
1d442b
+
1d442b
+struct lo_data {
1d442b
+	pthread_mutex_t mutex;
1d442b
+	int debug;
1d442b
+	int writeback;
1d442b
+	int flock;
1d442b
+	int xattr;
1d442b
+	const char *source;
1d442b
+	double timeout;
1d442b
+	int cache;
1d442b
+	int timeout_set;
1d442b
+	struct lo_inode root; /* protected by lo->mutex */
1d442b
+};
1d442b
+
1d442b
+static const struct fuse_opt lo_opts[] = {
1d442b
+	{ "writeback",
1d442b
+	  offsetof(struct lo_data, writeback), 1 },
1d442b
+	{ "no_writeback",
1d442b
+	  offsetof(struct lo_data, writeback), 0 },
1d442b
+	{ "source=%s",
1d442b
+	  offsetof(struct lo_data, source), 0 },
1d442b
+	{ "flock",
1d442b
+	  offsetof(struct lo_data, flock), 1 },
1d442b
+	{ "no_flock",
1d442b
+	  offsetof(struct lo_data, flock), 0 },
1d442b
+	{ "xattr",
1d442b
+	  offsetof(struct lo_data, xattr), 1 },
1d442b
+	{ "no_xattr",
1d442b
+	  offsetof(struct lo_data, xattr), 0 },
1d442b
+	{ "timeout=%lf",
1d442b
+	  offsetof(struct lo_data, timeout), 0 },
1d442b
+	{ "timeout=",
1d442b
+	  offsetof(struct lo_data, timeout_set), 1 },
1d442b
+	{ "cache=never",
1d442b
+	  offsetof(struct lo_data, cache), CACHE_NEVER },
1d442b
+	{ "cache=auto",
1d442b
+	  offsetof(struct lo_data, cache), CACHE_NORMAL },
1d442b
+	{ "cache=always",
1d442b
+	  offsetof(struct lo_data, cache), CACHE_ALWAYS },
1d442b
+
1d442b
+	FUSE_OPT_END
1d442b
+};
1d442b
+
1d442b
+static struct lo_data *lo_data(fuse_req_t req)
1d442b
+{
1d442b
+	return (struct lo_data *) fuse_req_userdata(req);
1d442b
+}
1d442b
+
1d442b
+static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
1d442b
+{
1d442b
+	if (ino == FUSE_ROOT_ID)
1d442b
+		return &lo_data(req)->root;
1d442b
+	else
1d442b
+		return (struct lo_inode *) (uintptr_t) ino;
1d442b
+}
1d442b
+
1d442b
+static int lo_fd(fuse_req_t req, fuse_ino_t ino)
1d442b
+{
1d442b
+	return lo_inode(req, ino)->fd;
1d442b
+}
1d442b
+
1d442b
+static bool lo_debug(fuse_req_t req)
1d442b
+{
1d442b
+	return lo_data(req)->debug != 0;
1d442b
+}
1d442b
+
1d442b
+static void lo_init(void *userdata,
1d442b
+		    struct fuse_conn_info *conn)
1d442b
+{
1d442b
+	struct lo_data *lo = (struct lo_data*) userdata;
1d442b
+
1d442b
+	if(conn->capable & FUSE_CAP_EXPORT_SUPPORT)
1d442b
+		conn->want |= FUSE_CAP_EXPORT_SUPPORT;
1d442b
+
1d442b
+	if (lo->writeback &&
1d442b
+	    conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
1d442b
+		if (lo->debug)
1d442b
+			fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n");
1d442b
+		conn->want |= FUSE_CAP_WRITEBACK_CACHE;
1d442b
+	}
1d442b
+	if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) {
1d442b
+		if (lo->debug)
1d442b
+			fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
1d442b
+		conn->want |= FUSE_CAP_FLOCK_LOCKS;
1d442b
+	}
1d442b
+}
1d442b
+
1d442b
+static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
1d442b
+			     struct fuse_file_info *fi)
1d442b
+{
1d442b
+	int res;
1d442b
+	struct stat buf;
1d442b
+	struct lo_data *lo = lo_data(req);
1d442b
+
1d442b
+	(void) fi;
1d442b
+
1d442b
+	res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
1d442b
+	if (res == -1)
1d442b
+		return (void) fuse_reply_err(req, errno);
1d442b
+
1d442b
+	fuse_reply_attr(req, &buf, lo->timeout);
1d442b
+}
1d442b
+
1d442b
+static int utimensat_empty_nofollow(struct lo_inode *inode,
1d442b
+				    const struct timespec *tv)
1d442b
+{
1d442b
+	int res;
1d442b
+	char procname[64];
1d442b
+
1d442b
+	if (inode->is_symlink) {
1d442b
+		res = utimensat(inode->fd, "", tv,
1d442b
+				AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
1d442b
+		if (res == -1 && errno == EINVAL) {
1d442b
+			/* Sorry, no race free way to set times on symlink. */
1d442b
+			errno = EPERM;
1d442b
+		}
1d442b
+		return res;
1d442b
+	}
1d442b
+	sprintf(procname, "/proc/self/fd/%i", inode->fd);
1d442b
+
1d442b
+	return utimensat(AT_FDCWD, procname, tv, 0);
1d442b
+}
1d442b
+
1d442b
+static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
1d442b
+		       int valid, struct fuse_file_info *fi)
1d442b
+{
1d442b
+	int saverr;
1d442b
+	char procname[64];
1d442b
+	struct lo_inode *inode = lo_inode(req, ino);
1d442b
+	int ifd = inode->fd;
1d442b
+	int res;
1d442b
+
1d442b
+	if (valid & FUSE_SET_ATTR_MODE) {
1d442b
+		if (fi) {
1d442b
+			res = fchmod(fi->fh, attr->st_mode);
1d442b
+		} else {
1d442b
+			sprintf(procname, "/proc/self/fd/%i", ifd);
1d442b
+			res = chmod(procname, attr->st_mode);
1d442b
+		}
1d442b
+		if (res == -1)
1d442b
+			goto out_err;
1d442b
+	}
1d442b
+	if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) {
1d442b
+		uid_t uid = (valid & FUSE_SET_ATTR_UID) ?
1d442b
+			attr->st_uid : (uid_t) -1;
1d442b
+		gid_t gid = (valid & FUSE_SET_ATTR_GID) ?
1d442b
+			attr->st_gid : (gid_t) -1;
1d442b
+
1d442b
+		res = fchownat(ifd, "", uid, gid,
1d442b
+			       AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
1d442b
+		if (res == -1)
1d442b
+			goto out_err;
1d442b
+	}
1d442b
+	if (valid & FUSE_SET_ATTR_SIZE) {
1d442b
+		if (fi) {
1d442b
+			res = ftruncate(fi->fh, attr->st_size);
1d442b
+		} else {
1d442b
+			sprintf(procname, "/proc/self/fd/%i", ifd);
1d442b
+			res = truncate(procname, attr->st_size);
1d442b
+		}
1d442b
+		if (res == -1)
1d442b
+			goto out_err;
1d442b
+	}
1d442b
+	if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) {
1d442b
+		struct timespec tv[2];
1d442b
+
1d442b
+		tv[0].tv_sec = 0;
1d442b
+		tv[1].tv_sec = 0;
1d442b
+		tv[0].tv_nsec = UTIME_OMIT;
1d442b
+		tv[1].tv_nsec = UTIME_OMIT;
1d442b
+
1d442b
+		if (valid & FUSE_SET_ATTR_ATIME_NOW)
1d442b
+			tv[0].tv_nsec = UTIME_NOW;
1d442b
+		else if (valid & FUSE_SET_ATTR_ATIME)
1d442b
+			tv[0] = attr->st_atim;
1d442b
+
1d442b
+		if (valid & FUSE_SET_ATTR_MTIME_NOW)
1d442b
+			tv[1].tv_nsec = UTIME_NOW;
1d442b
+		else if (valid & FUSE_SET_ATTR_MTIME)
1d442b
+			tv[1] = attr->st_mtim;
1d442b
+
1d442b
+		if (fi)
1d442b
+			res = futimens(fi->fh, tv);
1d442b
+		else
1d442b
+			res = utimensat_empty_nofollow(inode, tv);
1d442b
+		if (res == -1)
1d442b
+			goto out_err;
1d442b
+	}
1d442b
+
1d442b
+	return lo_getattr(req, ino, fi);
1d442b
+
1d442b
+out_err:
1d442b
+	saverr = errno;
1d442b
+	fuse_reply_err(req, saverr);
1d442b
+}
1d442b
+
1d442b
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
1d442b
+{
1d442b
+	struct lo_inode *p;
1d442b
+	struct lo_inode *ret = NULL;
1d442b
+
1d442b
+	pthread_mutex_lock(&lo->mutex);
1d442b
+	for (p = lo->root.next; p != &lo->root; p = p->next) {
1d442b
+		if (p->ino == st->st_ino && p->dev == st->st_dev) {
1d442b
+			assert(p->refcount > 0);
1d442b
+			ret = p;
1d442b
+			ret->refcount++;
1d442b
+			break;
1d442b
+		}
1d442b
+	}
1d442b
+	pthread_mutex_unlock(&lo->mutex);
1d442b
+	return ret;
1d442b
+}
1d442b
+
1d442b
+static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
1d442b
+			 struct fuse_entry_param *e)
1d442b
+{
1d442b
+	int newfd;
1d442b
+	int res;
1d442b
+	int saverr;
1d442b
+	struct lo_data *lo = lo_data(req);
1d442b
+	struct lo_inode *inode;
1d442b
+
1d442b
+	memset(e, 0, sizeof(*e));
1d442b
+	e->attr_timeout = lo->timeout;
1d442b
+	e->entry_timeout = lo->timeout;
1d442b
+
1d442b
+	newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW);
1d442b
+	if (newfd == -1)
1d442b
+		goto out_err;
1d442b
+
1d442b
+	res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
1d442b
+	if (res == -1)
1d442b
+		goto out_err;
1d442b
+
1d442b
+	inode = lo_find(lo_data(req), &e->attr);
1d442b
+	if (inode) {
1d442b
+		close(newfd);
1d442b
+		newfd = -1;
1d442b
+	} else {
1d442b
+		struct lo_inode *prev, *next;
1d442b
+
1d442b
+		saverr = ENOMEM;
1d442b
+		inode = calloc(1, sizeof(struct lo_inode));
1d442b
+		if (!inode)
1d442b
+			goto out_err;
1d442b
+
1d442b
+		inode->is_symlink = S_ISLNK(e->attr.st_mode);
1d442b
+		inode->refcount = 1;
1d442b
+		inode->fd = newfd;
1d442b
+		inode->ino = e->attr.st_ino;
1d442b
+		inode->dev = e->attr.st_dev;
1d442b
+
1d442b
+		pthread_mutex_lock(&lo->mutex);
1d442b
+		prev = &lo->root;
1d442b
+		next = prev->next;
1d442b
+		next->prev = inode;
1d442b
+		inode->next = next;
1d442b
+		inode->prev = prev;
1d442b
+		prev->next = inode;
1d442b
+		pthread_mutex_unlock(&lo->mutex);
1d442b
+	}
1d442b
+	e->ino = (uintptr_t) inode;
1d442b
+
1d442b
+	if (lo_debug(req))
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n",
1d442b
+			(unsigned long long) parent, name, (unsigned long long) e->ino);
1d442b
+
1d442b
+	return 0;
1d442b
+
1d442b
+out_err:
1d442b
+	saverr = errno;
1d442b
+	if (newfd != -1)
1d442b
+		close(newfd);
1d442b
+	return saverr;
1d442b
+}
1d442b
+
1d442b
+static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
1d442b
+{
1d442b
+	struct fuse_entry_param e;
1d442b
+	int err;
1d442b
+
1d442b
+	if (lo_debug(req))
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n",
1d442b
+			parent, name);
1d442b
+
1d442b
+	err = lo_do_lookup(req, parent, name, &e);
1d442b
+	if (err)
1d442b
+		fuse_reply_err(req, err);
1d442b
+	else
1d442b
+		fuse_reply_entry(req, &e);
1d442b
+}
1d442b
+
1d442b
+static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
1d442b
+			     const char *name, mode_t mode, dev_t rdev,
1d442b
+			     const char *link)
1d442b
+{
1d442b
+	int res;
1d442b
+	int saverr;
1d442b
+	struct lo_inode *dir = lo_inode(req, parent);
1d442b
+	struct fuse_entry_param e;
1d442b
+
1d442b
+	saverr = ENOMEM;
1d442b
+
1d442b
+	res = mknod_wrapper(dir->fd, name, link, mode, rdev);
1d442b
+
1d442b
+	saverr = errno;
1d442b
+	if (res == -1)
1d442b
+		goto out;
1d442b
+
1d442b
+	saverr = lo_do_lookup(req, parent, name, &e);
1d442b
+	if (saverr)
1d442b
+		goto out;
1d442b
+
1d442b
+	if (lo_debug(req))
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n",
1d442b
+			(unsigned long long) parent, name, (unsigned long long) e.ino);
1d442b
+
1d442b
+	fuse_reply_entry(req, &e);
1d442b
+	return;
1d442b
+
1d442b
+out:
1d442b
+	fuse_reply_err(req, saverr);
1d442b
+}
1d442b
+
1d442b
+static void lo_mknod(fuse_req_t req, fuse_ino_t parent,
1d442b
+		     const char *name, mode_t mode, dev_t rdev)
1d442b
+{
1d442b
+	lo_mknod_symlink(req, parent, name, mode, rdev, NULL);
1d442b
+}
1d442b
+
1d442b
+static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name,
1d442b
+		     mode_t mode)
1d442b
+{
1d442b
+	lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL);
1d442b
+}
1d442b
+
1d442b
+static void lo_symlink(fuse_req_t req, const char *link,
1d442b
+		       fuse_ino_t parent, const char *name)
1d442b
+{
1d442b
+	lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
1d442b
+}
1d442b
+
1d442b
+static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
1d442b
+				 const char *name)
1d442b
+{
1d442b
+	int res;
1d442b
+	char procname[64];
1d442b
+
1d442b
+	if (inode->is_symlink) {
1d442b
+		res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
1d442b
+		if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
1d442b
+			/* Sorry, no race free way to hard-link a symlink. */
1d442b
+			errno = EPERM;
1d442b
+		}
1d442b
+		return res;
1d442b
+	}
1d442b
+
1d442b
+	sprintf(procname, "/proc/self/fd/%i", inode->fd);
1d442b
+
1d442b
+	return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
1d442b
+}
1d442b
+
1d442b
+static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
1d442b
+		    const char *name)
1d442b
+{
1d442b
+	int res;
1d442b
+	struct lo_data *lo = lo_data(req);
1d442b
+	struct lo_inode *inode = lo_inode(req, ino);
1d442b
+	struct fuse_entry_param e;
1d442b
+	int saverr;
1d442b
+
1d442b
+	memset(&e, 0, sizeof(struct fuse_entry_param));
1d442b
+	e.attr_timeout = lo->timeout;
1d442b
+	e.entry_timeout = lo->timeout;
1d442b
+
1d442b
+	res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
1d442b
+	if (res == -1)
1d442b
+		goto out_err;
1d442b
+
1d442b
+	res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
1d442b
+	if (res == -1)
1d442b
+		goto out_err;
1d442b
+
1d442b
+	pthread_mutex_lock(&lo->mutex);
1d442b
+	inode->refcount++;
1d442b
+	pthread_mutex_unlock(&lo->mutex);
1d442b
+	e.ino = (uintptr_t) inode;
1d442b
+
1d442b
+	if (lo_debug(req))
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n",
1d442b
+			(unsigned long long) parent, name,
1d442b
+			(unsigned long long) e.ino);
1d442b
+
1d442b
+	fuse_reply_entry(req, &e);
1d442b
+	return;
1d442b
+
1d442b
+out_err:
1d442b
+	saverr = errno;
1d442b
+	fuse_reply_err(req, saverr);
1d442b
+}
1d442b
+
1d442b
+static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
1d442b
+{
1d442b
+	int res;
1d442b
+
1d442b
+	res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR);
1d442b
+
1d442b
+	fuse_reply_err(req, res == -1 ? errno : 0);
1d442b
+}
1d442b
+
1d442b
+static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
1d442b
+		      fuse_ino_t newparent, const char *newname,
1d442b
+		      unsigned int flags)
1d442b
+{
1d442b
+	int res;
1d442b
+
1d442b
+	if (flags) {
1d442b
+		fuse_reply_err(req, EINVAL);
1d442b
+		return;
1d442b
+	}
1d442b
+
1d442b
+	res = renameat(lo_fd(req, parent), name,
1d442b
+			lo_fd(req, newparent), newname);
1d442b
+
1d442b
+	fuse_reply_err(req, res == -1 ? errno : 0);
1d442b
+}
1d442b
+
1d442b
+static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
1d442b
+{
1d442b
+	int res;
1d442b
+
1d442b
+	res = unlinkat(lo_fd(req, parent), name, 0);
1d442b
+
1d442b
+	fuse_reply_err(req, res == -1 ? errno : 0);
1d442b
+}
1d442b
+
1d442b
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n)
1d442b
+{
1d442b
+	if (!inode)
1d442b
+		return;
1d442b
+
1d442b
+	pthread_mutex_lock(&lo->mutex);
1d442b
+	assert(inode->refcount >= n);
1d442b
+	inode->refcount -= n;
1d442b
+	if (!inode->refcount) {
1d442b
+		struct lo_inode *prev, *next;
1d442b
+
1d442b
+		prev = inode->prev;
1d442b
+		next = inode->next;
1d442b
+		next->prev = prev;
1d442b
+		prev->next = next;
1d442b
+
1d442b
+		pthread_mutex_unlock(&lo->mutex);
1d442b
+		close(inode->fd);
1d442b
+		free(inode);
1d442b
+
1d442b
+	} else {
1d442b
+		pthread_mutex_unlock(&lo->mutex);
1d442b
+	}
1d442b
+}
1d442b
+
1d442b
+static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
1d442b
+{
1d442b
+	struct lo_data *lo = lo_data(req);
1d442b
+	struct lo_inode *inode = lo_inode(req, ino);
1d442b
+
1d442b
+	if (lo_debug(req)) {
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "  forget %lli %lli -%lli\n",
1d442b
+			(unsigned long long) ino,
1d442b
+			(unsigned long long) inode->refcount,
1d442b
+			(unsigned long long) nlookup);
1d442b
+	}
1d442b
+
1d442b
+	unref_inode(lo, inode, nlookup);
1d442b
+}
1d442b
+
1d442b
+static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
1d442b
+{
1d442b
+	lo_forget_one(req, ino, nlookup);
1d442b
+	fuse_reply_none(req);
1d442b
+}
1d442b
+
1d442b
+static void lo_forget_multi(fuse_req_t req, size_t count,
1d442b
+				struct fuse_forget_data *forgets)
1d442b
+{
1d442b
+	int i;
1d442b
+
1d442b
+	for (i = 0; i < count; i++)
1d442b
+		lo_forget_one(req, forgets[i].ino, forgets[i].nlookup);
1d442b
+	fuse_reply_none(req);
1d442b
+}
1d442b
+
1d442b
+static void lo_readlink(fuse_req_t req, fuse_ino_t ino)
1d442b
+{
1d442b
+	char buf[PATH_MAX + 1];
1d442b
+	int res;
1d442b
+
1d442b
+	res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf));
1d442b
+	if (res == -1)
1d442b
+		return (void) fuse_reply_err(req, errno);
1d442b
+
1d442b
+	if (res == sizeof(buf))
1d442b
+		return (void) fuse_reply_err(req, ENAMETOOLONG);
1d442b
+
1d442b
+	buf[res] = '\0';
1d442b
+
1d442b
+	fuse_reply_readlink(req, buf);
1d442b
+}
1d442b
+
1d442b
+struct lo_dirp {
1d442b
+	DIR *dp;
1d442b
+	struct dirent *entry;
1d442b
+	off_t offset;
1d442b
+};
1d442b
+
1d442b
+static struct lo_dirp *lo_dirp(struct fuse_file_info *fi)
1d442b
+{
1d442b
+	return (struct lo_dirp *) (uintptr_t) fi->fh;
1d442b
+}
1d442b
+
1d442b
+static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
1d442b
+{
1d442b
+	int error = ENOMEM;
1d442b
+	struct lo_data *lo = lo_data(req);
1d442b
+	struct lo_dirp *d;
1d442b
+	int fd;
1d442b
+
1d442b
+	d = calloc(1, sizeof(struct lo_dirp));
1d442b
+	if (d == NULL)
1d442b
+		goto out_err;
1d442b
+
1d442b
+	fd = openat(lo_fd(req, ino), ".", O_RDONLY);
1d442b
+	if (fd == -1)
1d442b
+		goto out_errno;
1d442b
+
1d442b
+	d->dp = fdopendir(fd);
1d442b
+	if (d->dp == NULL)
1d442b
+		goto out_errno;
1d442b
+
1d442b
+	d->offset = 0;
1d442b
+	d->entry = NULL;
1d442b
+
1d442b
+	fi->fh = (uintptr_t) d;
1d442b
+	if (lo->cache == CACHE_ALWAYS)
1d442b
+		fi->keep_cache = 1;
1d442b
+	fuse_reply_open(req, fi);
1d442b
+	return;
1d442b
+
1d442b
+out_errno:
1d442b
+	error = errno;
1d442b
+out_err:
1d442b
+	if (d) {
1d442b
+		if (fd != -1)
1d442b
+			close(fd);
1d442b
+		free(d);
1d442b
+	}
1d442b
+	fuse_reply_err(req, error);
1d442b
+}
1d442b
+
1d442b
+static int is_dot_or_dotdot(const char *name)
1d442b
+{
1d442b
+	return name[0] == '.' && (name[1] == '\0' ||
1d442b
+				  (name[1] == '.' && name[2] == '\0'));
1d442b
+}
1d442b
+
1d442b
+static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
1d442b
+			  off_t offset, struct fuse_file_info *fi, int plus)
1d442b
+{
1d442b
+	struct lo_dirp *d = lo_dirp(fi);
1d442b
+	char *buf;
1d442b
+	char *p;
1d442b
+	size_t rem = size;
1d442b
+	int err;
1d442b
+
1d442b
+	(void) ino;
1d442b
+
1d442b
+	buf = calloc(1, size);
1d442b
+	if (!buf) {
1d442b
+		err = ENOMEM;
1d442b
+		goto error;
1d442b
+	}
1d442b
+	p = buf;
1d442b
+
1d442b
+	if (offset != d->offset) {
1d442b
+		seekdir(d->dp, offset);
1d442b
+		d->entry = NULL;
1d442b
+		d->offset = offset;
1d442b
+	}
1d442b
+	while (1) {
1d442b
+		size_t entsize;
1d442b
+		off_t nextoff;
1d442b
+		const char *name;
1d442b
+
1d442b
+		if (!d->entry) {
1d442b
+			errno = 0;
1d442b
+			d->entry = readdir(d->dp);
1d442b
+			if (!d->entry) {
1d442b
+				if (errno) {  // Error
1d442b
+					err = errno;
1d442b
+					goto error;
1d442b
+				} else {  // End of stream
1d442b
+					break; 
1d442b
+				}
1d442b
+			}
1d442b
+		}
1d442b
+		nextoff = d->entry->d_off;
1d442b
+		name = d->entry->d_name;
1d442b
+		fuse_ino_t entry_ino = 0;
1d442b
+		if (plus) {
1d442b
+			struct fuse_entry_param e;
1d442b
+			if (is_dot_or_dotdot(name)) {
1d442b
+				e = (struct fuse_entry_param) {
1d442b
+					.attr.st_ino = d->entry->d_ino,
1d442b
+					.attr.st_mode = d->entry->d_type << 12,
1d442b
+				};
1d442b
+			} else {
1d442b
+				err = lo_do_lookup(req, ino, name, &e);
1d442b
+				if (err)
1d442b
+					goto error;
1d442b
+				entry_ino = e.ino;
1d442b
+			}
1d442b
+
1d442b
+			entsize = fuse_add_direntry_plus(req, p, rem, name,
1d442b
+							 &e, nextoff);
1d442b
+		} else {
1d442b
+			struct stat st = {
1d442b
+				.st_ino = d->entry->d_ino,
1d442b
+				.st_mode = d->entry->d_type << 12,
1d442b
+			};
1d442b
+			entsize = fuse_add_direntry(req, p, rem, name,
1d442b
+						    &st, nextoff);
1d442b
+		}
1d442b
+		if (entsize > rem) {
1d442b
+			if (entry_ino != 0) 
1d442b
+				lo_forget_one(req, entry_ino, 1);
1d442b
+			break;
1d442b
+		}
1d442b
+		
1d442b
+		p += entsize;
1d442b
+		rem -= entsize;
1d442b
+
1d442b
+		d->entry = NULL;
1d442b
+		d->offset = nextoff;
1d442b
+	}
1d442b
+
1d442b
+    err = 0;
1d442b
+error:
1d442b
+    // If there's an error, we can only signal it if we haven't stored
1d442b
+    // any entries yet - otherwise we'd end up with wrong lookup
1d442b
+    // counts for the entries that are already in the buffer. So we
1d442b
+    // return what we've collected until that point.
1d442b
+    if (err && rem == size)
1d442b
+	    fuse_reply_err(req, err);
1d442b
+    else
1d442b
+	    fuse_reply_buf(req, buf, size - rem);
1d442b
+    free(buf);
1d442b
+}
1d442b
+
1d442b
+static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
1d442b
+		       off_t offset, struct fuse_file_info *fi)
1d442b
+{
1d442b
+	lo_do_readdir(req, ino, size, offset, fi, 0);
1d442b
+}
1d442b
+
1d442b
+static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size,
1d442b
+			   off_t offset, struct fuse_file_info *fi)
1d442b
+{
1d442b
+	lo_do_readdir(req, ino, size, offset, fi, 1);
1d442b
+}
1d442b
+
1d442b
+static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
1d442b
+{
1d442b
+	struct lo_dirp *d = lo_dirp(fi);
1d442b
+	(void) ino;
1d442b
+	closedir(d->dp);
1d442b
+	free(d);
1d442b
+	fuse_reply_err(req, 0);
1d442b
+}
1d442b
+
1d442b
+static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
1d442b
+		      mode_t mode, struct fuse_file_info *fi)
1d442b
+{
1d442b
+	int fd;
1d442b
+	struct lo_data *lo = lo_data(req);
1d442b
+	struct fuse_entry_param e;
1d442b
+	int err;
1d442b
+
1d442b
+	if (lo_debug(req))
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n",
1d442b
+			parent, name);
1d442b
+
1d442b
+	fd = openat(lo_fd(req, parent), name,
1d442b
+		    (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode);
1d442b
+	if (fd == -1)
1d442b
+		return (void) fuse_reply_err(req, errno);
1d442b
+
1d442b
+	fi->fh = fd;
1d442b
+	if (lo->cache == CACHE_NEVER)
1d442b
+		fi->direct_io = 1;
1d442b
+	else if (lo->cache == CACHE_ALWAYS)
1d442b
+		fi->keep_cache = 1;
1d442b
+
1d442b
+	err = lo_do_lookup(req, parent, name, &e);
1d442b
+	if (err)
1d442b
+		fuse_reply_err(req, err);
1d442b
+	else
1d442b
+		fuse_reply_create(req, &e, fi);
1d442b
+}
1d442b
+
1d442b
+static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
1d442b
+			struct fuse_file_info *fi)
1d442b
+{
1d442b
+	int res;
1d442b
+	int fd = dirfd(lo_dirp(fi)->dp);
1d442b
+	(void) ino;
1d442b
+	if (datasync)
1d442b
+		res = fdatasync(fd);
1d442b
+	else
1d442b
+		res = fsync(fd);
1d442b
+	fuse_reply_err(req, res == -1 ? errno : 0);
1d442b
+}
1d442b
+
1d442b
+static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
1d442b
+{
1d442b
+	int fd;
1d442b
+	char buf[64];
1d442b
+	struct lo_data *lo = lo_data(req);
1d442b
+
1d442b
+	if (lo_debug(req))
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n",
1d442b
+			ino, fi->flags);
1d442b
+
1d442b
+	/* With writeback cache, kernel may send read requests even
1d442b
+	   when userspace opened write-only */
1d442b
+	if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
1d442b
+		fi->flags &= ~O_ACCMODE;
1d442b
+		fi->flags |= O_RDWR;
1d442b
+	}
1d442b
+
1d442b
+	/* With writeback cache, O_APPEND is handled by the kernel.
1d442b
+	   This breaks atomicity (since the file may change in the
1d442b
+	   underlying filesystem, so that the kernel's idea of the
1d442b
+	   end of the file isn't accurate anymore). In this example,
1d442b
+	   we just accept that. A more rigorous filesystem may want
1d442b
+	   to return an error here */
1d442b
+	if (lo->writeback && (fi->flags & O_APPEND))
1d442b
+		fi->flags &= ~O_APPEND;
1d442b
+
1d442b
+	sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino));
1d442b
+	fd = open(buf, fi->flags & ~O_NOFOLLOW);
1d442b
+	if (fd == -1)
1d442b
+		return (void) fuse_reply_err(req, errno);
1d442b
+
1d442b
+	fi->fh = fd;
1d442b
+	if (lo->cache == CACHE_NEVER)
1d442b
+		fi->direct_io = 1;
1d442b
+	else if (lo->cache == CACHE_ALWAYS)
1d442b
+		fi->keep_cache = 1;
1d442b
+	fuse_reply_open(req, fi);
1d442b
+}
1d442b
+
1d442b
+static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
1d442b
+{
1d442b
+	(void) ino;
1d442b
+
1d442b
+	close(fi->fh);
1d442b
+	fuse_reply_err(req, 0);
1d442b
+}
1d442b
+
1d442b
+static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
1d442b
+{
1d442b
+	int res;
1d442b
+	(void) ino;
1d442b
+	res = close(dup(fi->fh));
1d442b
+	fuse_reply_err(req, res == -1 ? errno : 0);
1d442b
+}
1d442b
+
1d442b
+static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
1d442b
+		     struct fuse_file_info *fi)
1d442b
+{
1d442b
+	int res;
1d442b
+	(void) ino;
1d442b
+	if (datasync)
1d442b
+		res = fdatasync(fi->fh);
1d442b
+	else
1d442b
+		res = fsync(fi->fh);
1d442b
+	fuse_reply_err(req, res == -1 ? errno : 0);
1d442b
+}
1d442b
+
1d442b
+static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size,
1d442b
+		    off_t offset, struct fuse_file_info *fi)
1d442b
+{
1d442b
+	struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size);
1d442b
+
1d442b
+	if (lo_debug(req))
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, "
1d442b
+			"off=%lu)\n", ino, size, (unsigned long) offset);
1d442b
+
1d442b
+	buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
1d442b
+	buf.buf[0].fd = fi->fh;
1d442b
+	buf.buf[0].pos = offset;
1d442b
+
1d442b
+	fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE);
1d442b
+}
1d442b
+
1d442b
+static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
1d442b
+			 struct fuse_bufvec *in_buf, off_t off,
1d442b
+			 struct fuse_file_info *fi)
1d442b
+{
1d442b
+	(void) ino;
1d442b
+	ssize_t res;
1d442b
+	struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
1d442b
+
1d442b
+	out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
1d442b
+	out_buf.buf[0].fd = fi->fh;
1d442b
+	out_buf.buf[0].pos = off;
1d442b
+
1d442b
+	if (lo_debug(req))
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n",
1d442b
+			ino, out_buf.buf[0].size, (unsigned long) off);
1d442b
+
1d442b
+	res = fuse_buf_copy(&out_buf, in_buf, 0);
1d442b
+	if(res < 0)
1d442b
+		fuse_reply_err(req, -res);
1d442b
+	else
1d442b
+		fuse_reply_write(req, (size_t) res);
1d442b
+}
1d442b
+
1d442b
+static void lo_statfs(fuse_req_t req, fuse_ino_t ino)
1d442b
+{
1d442b
+	int res;
1d442b
+	struct statvfs stbuf;
1d442b
+
1d442b
+	res = fstatvfs(lo_fd(req, ino), &stbuf);
1d442b
+	if (res == -1)
1d442b
+		fuse_reply_err(req, errno);
1d442b
+	else
1d442b
+		fuse_reply_statfs(req, &stbuf);
1d442b
+}
1d442b
+
1d442b
+static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode,
1d442b
+			 off_t offset, off_t length, struct fuse_file_info *fi)
1d442b
+{
1d442b
+	int err = EOPNOTSUPP;
1d442b
+	(void) ino;
1d442b
+
1d442b
+#ifdef HAVE_FALLOCATE
1d442b
+	err = fallocate(fi->fh, mode, offset, length);
1d442b
+	if (err < 0)
1d442b
+		err = errno;
1d442b
+
1d442b
+#elif defined(HAVE_POSIX_FALLOCATE)
1d442b
+	if (mode) {
1d442b
+		fuse_reply_err(req, EOPNOTSUPP);
1d442b
+		return;
1d442b
+	}
1d442b
+
1d442b
+	err = posix_fallocate(fi->fh, offset, length);
1d442b
+#endif
1d442b
+
1d442b
+	fuse_reply_err(req, err);
1d442b
+}
1d442b
+
1d442b
+static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
1d442b
+		     int op)
1d442b
+{
1d442b
+	int res;
1d442b
+	(void) ino;
1d442b
+
1d442b
+	res = flock(fi->fh, op);
1d442b
+
1d442b
+	fuse_reply_err(req, res == -1 ? errno : 0);
1d442b
+}
1d442b
+
1d442b
+static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
1d442b
+			size_t size)
1d442b
+{
1d442b
+	char *value = NULL;
1d442b
+	char procname[64];
1d442b
+	struct lo_inode *inode = lo_inode(req, ino);
1d442b
+	ssize_t ret;
1d442b
+	int saverr;
1d442b
+
1d442b
+	saverr = ENOSYS;
1d442b
+	if (!lo_data(req)->xattr)
1d442b
+		goto out;
1d442b
+
1d442b
+	if (lo_debug(req)) {
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n",
1d442b
+			ino, name, size);
1d442b
+	}
1d442b
+
1d442b
+	if (inode->is_symlink) {
1d442b
+		/* Sorry, no race free way to getxattr on symlink. */
1d442b
+		saverr = EPERM;
1d442b
+		goto out;
1d442b
+	}
1d442b
+
1d442b
+	sprintf(procname, "/proc/self/fd/%i", inode->fd);
1d442b
+
1d442b
+	if (size) {
1d442b
+		value = malloc(size);
1d442b
+		if (!value)
1d442b
+			goto out_err;
1d442b
+
1d442b
+		ret = getxattr(procname, name, value, size);
1d442b
+		if (ret == -1)
1d442b
+			goto out_err;
1d442b
+		saverr = 0;
1d442b
+		if (ret == 0)
1d442b
+			goto out;
1d442b
+
1d442b
+		fuse_reply_buf(req, value, ret);
1d442b
+	} else {
1d442b
+		ret = getxattr(procname, name, NULL, 0);
1d442b
+		if (ret == -1)
1d442b
+			goto out_err;
1d442b
+
1d442b
+		fuse_reply_xattr(req, ret);
1d442b
+	}
1d442b
+out_free:
1d442b
+	free(value);
1d442b
+	return;
1d442b
+
1d442b
+out_err:
1d442b
+	saverr = errno;
1d442b
+out:
1d442b
+	fuse_reply_err(req, saverr);
1d442b
+	goto out_free;
1d442b
+}
1d442b
+
1d442b
+static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
1d442b
+{
1d442b
+	char *value = NULL;
1d442b
+	char procname[64];
1d442b
+	struct lo_inode *inode = lo_inode(req, ino);
1d442b
+	ssize_t ret;
1d442b
+	int saverr;
1d442b
+
1d442b
+	saverr = ENOSYS;
1d442b
+	if (!lo_data(req)->xattr)
1d442b
+		goto out;
1d442b
+
1d442b
+	if (lo_debug(req)) {
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n",
1d442b
+			ino, size);
1d442b
+	}
1d442b
+
1d442b
+	if (inode->is_symlink) {
1d442b
+		/* Sorry, no race free way to listxattr on symlink. */
1d442b
+		saverr = EPERM;
1d442b
+		goto out;
1d442b
+	}
1d442b
+
1d442b
+	sprintf(procname, "/proc/self/fd/%i", inode->fd);
1d442b
+
1d442b
+	if (size) {
1d442b
+		value = malloc(size);
1d442b
+		if (!value)
1d442b
+			goto out_err;
1d442b
+
1d442b
+		ret = listxattr(procname, value, size);
1d442b
+		if (ret == -1)
1d442b
+			goto out_err;
1d442b
+		saverr = 0;
1d442b
+		if (ret == 0)
1d442b
+			goto out;
1d442b
+
1d442b
+		fuse_reply_buf(req, value, ret);
1d442b
+	} else {
1d442b
+		ret = listxattr(procname, NULL, 0);
1d442b
+		if (ret == -1)
1d442b
+			goto out_err;
1d442b
+
1d442b
+		fuse_reply_xattr(req, ret);
1d442b
+	}
1d442b
+out_free:
1d442b
+	free(value);
1d442b
+	return;
1d442b
+
1d442b
+out_err:
1d442b
+	saverr = errno;
1d442b
+out:
1d442b
+	fuse_reply_err(req, saverr);
1d442b
+	goto out_free;
1d442b
+}
1d442b
+
1d442b
+static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
1d442b
+			const char *value, size_t size, int flags)
1d442b
+{
1d442b
+	char procname[64];
1d442b
+	struct lo_inode *inode = lo_inode(req, ino);
1d442b
+	ssize_t ret;
1d442b
+	int saverr;
1d442b
+
1d442b
+	saverr = ENOSYS;
1d442b
+	if (!lo_data(req)->xattr)
1d442b
+		goto out;
1d442b
+
1d442b
+	if (lo_debug(req)) {
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n",
1d442b
+			ino, name, value, size);
1d442b
+	}
1d442b
+
1d442b
+	if (inode->is_symlink) {
1d442b
+		/* Sorry, no race free way to setxattr on symlink. */
1d442b
+		saverr = EPERM;
1d442b
+		goto out;
1d442b
+	}
1d442b
+
1d442b
+	sprintf(procname, "/proc/self/fd/%i", inode->fd);
1d442b
+
1d442b
+	ret = setxattr(procname, name, value, size, flags);
1d442b
+	saverr = ret == -1 ? errno : 0;
1d442b
+
1d442b
+out:
1d442b
+	fuse_reply_err(req, saverr);
1d442b
+}
1d442b
+
1d442b
+static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name)
1d442b
+{
1d442b
+	char procname[64];
1d442b
+	struct lo_inode *inode = lo_inode(req, ino);
1d442b
+	ssize_t ret;
1d442b
+	int saverr;
1d442b
+
1d442b
+	saverr = ENOSYS;
1d442b
+	if (!lo_data(req)->xattr)
1d442b
+		goto out;
1d442b
+
1d442b
+	if (lo_debug(req)) {
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n",
1d442b
+			ino, name);
1d442b
+	}
1d442b
+
1d442b
+	if (inode->is_symlink) {
1d442b
+		/* Sorry, no race free way to setxattr on symlink. */
1d442b
+		saverr = EPERM;
1d442b
+		goto out;
1d442b
+	}
1d442b
+
1d442b
+	sprintf(procname, "/proc/self/fd/%i", inode->fd);
1d442b
+
1d442b
+	ret = removexattr(procname, name);
1d442b
+	saverr = ret == -1 ? errno : 0;
1d442b
+
1d442b
+out:
1d442b
+	fuse_reply_err(req, saverr);
1d442b
+}
1d442b
+
1d442b
+#ifdef HAVE_COPY_FILE_RANGE
1d442b
+static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
1d442b
+			       struct fuse_file_info *fi_in,
1d442b
+			       fuse_ino_t ino_out, off_t off_out,
1d442b
+			       struct fuse_file_info *fi_out, size_t len,
1d442b
+			       int flags)
1d442b
+{
1d442b
+	ssize_t res;
1d442b
+
1d442b
+	if (lo_debug(req))
1d442b
+		fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, "
1d442b
+				"off=%lu, ino=%" PRIu64 "/fd=%lu, "
1d442b
+				"off=%lu, size=%zd, flags=0x%x)\n",
1d442b
+			ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out,
1d442b
+			len, flags);
1d442b
+
1d442b
+	res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len,
1d442b
+			      flags);
1d442b
+	if (res < 0)
1d442b
+		fuse_reply_err(req, -errno);
1d442b
+	else
1d442b
+		fuse_reply_write(req, res);
1d442b
+}
1d442b
+#endif
1d442b
+
1d442b
+static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
1d442b
+		     struct fuse_file_info *fi)
1d442b
+{
1d442b
+	off_t res;
1d442b
+
1d442b
+	(void)ino;
1d442b
+	res = lseek(fi->fh, off, whence);
1d442b
+	if (res != -1)
1d442b
+		fuse_reply_lseek(req, res);
1d442b
+	else
1d442b
+		fuse_reply_err(req, errno);
1d442b
+}
1d442b
+
1d442b
+static struct fuse_lowlevel_ops lo_oper = {
1d442b
+	.init		= lo_init,
1d442b
+	.lookup		= lo_lookup,
1d442b
+	.mkdir		= lo_mkdir,
1d442b
+	.mknod		= lo_mknod,
1d442b
+	.symlink	= lo_symlink,
1d442b
+	.link		= lo_link,
1d442b
+	.unlink		= lo_unlink,
1d442b
+	.rmdir		= lo_rmdir,
1d442b
+	.rename		= lo_rename,
1d442b
+	.forget		= lo_forget,
1d442b
+	.forget_multi	= lo_forget_multi,
1d442b
+	.getattr	= lo_getattr,
1d442b
+	.setattr	= lo_setattr,
1d442b
+	.readlink	= lo_readlink,
1d442b
+	.opendir	= lo_opendir,
1d442b
+	.readdir	= lo_readdir,
1d442b
+	.readdirplus	= lo_readdirplus,
1d442b
+	.releasedir	= lo_releasedir,
1d442b
+	.fsyncdir	= lo_fsyncdir,
1d442b
+	.create		= lo_create,
1d442b
+	.open		= lo_open,
1d442b
+	.release	= lo_release,
1d442b
+	.flush		= lo_flush,
1d442b
+	.fsync		= lo_fsync,
1d442b
+	.read		= lo_read,
1d442b
+	.write_buf      = lo_write_buf,
1d442b
+	.statfs		= lo_statfs,
1d442b
+	.fallocate	= lo_fallocate,
1d442b
+	.flock		= lo_flock,
1d442b
+	.getxattr	= lo_getxattr,
1d442b
+	.listxattr	= lo_listxattr,
1d442b
+	.setxattr	= lo_setxattr,
1d442b
+	.removexattr	= lo_removexattr,
1d442b
+#ifdef HAVE_COPY_FILE_RANGE
1d442b
+	.copy_file_range = lo_copy_file_range,
1d442b
+#endif
1d442b
+	.lseek		= lo_lseek,
1d442b
+};
1d442b
+
1d442b
+int main(int argc, char *argv[])
1d442b
+{
1d442b
+	struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
1d442b
+	struct fuse_session *se;
1d442b
+	struct fuse_cmdline_opts opts;
1d442b
+	struct lo_data lo = { .debug = 0,
1d442b
+	                      .writeback = 0 };
1d442b
+	int ret = -1;
1d442b
+
1d442b
+	/* Don't mask creation mode, kernel already did that */
1d442b
+	umask(0);
1d442b
+
1d442b
+	pthread_mutex_init(&lo.mutex, NULL);
1d442b
+	lo.root.next = lo.root.prev = &lo.root;
1d442b
+	lo.root.fd = -1;
1d442b
+	lo.cache = CACHE_NORMAL;
1d442b
+
1d442b
+	if (fuse_parse_cmdline(&args, &opts) != 0)
1d442b
+		return 1;
1d442b
+	if (opts.show_help) {
1d442b
+		printf("usage: %s [options] <mountpoint>\n\n", argv[0]);
1d442b
+		fuse_cmdline_help();
1d442b
+		fuse_lowlevel_help();
1d442b
+		ret = 0;
1d442b
+		goto err_out1;
1d442b
+	} else if (opts.show_version) {
1d442b
+		printf("FUSE library version %s\n", fuse_pkgversion());
1d442b
+		fuse_lowlevel_version();
1d442b
+		ret = 0;
1d442b
+		goto err_out1;
1d442b
+	}
1d442b
+
1d442b
+	if(opts.mountpoint == NULL) {
1d442b
+		printf("usage: %s [options] <mountpoint>\n", argv[0]);
1d442b
+		printf("       %s --help\n", argv[0]);
1d442b
+		ret = 1;
1d442b
+		goto err_out1;
1d442b
+	}
1d442b
+
1d442b
+	if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1)
1d442b
+		return 1;
1d442b
+
1d442b
+	lo.debug = opts.debug;
1d442b
+	lo.root.refcount = 2;
1d442b
+	if (lo.source) {
1d442b
+		struct stat stat;
1d442b
+		int res;
1d442b
+
1d442b
+		res = lstat(lo.source, &stat;;
1d442b
+		if (res == -1) {
1d442b
+			fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n",
1d442b
+				 lo.source);
1d442b
+			exit(1);
1d442b
+		}
1d442b
+		if (!S_ISDIR(stat.st_mode)) {
1d442b
+			fuse_log(FUSE_LOG_ERR, "source is not a directory\n");
1d442b
+			exit(1);
1d442b
+		}
1d442b
+
1d442b
+	} else {
1d442b
+		lo.source = "/";
1d442b
+	}
1d442b
+	lo.root.is_symlink = false;
1d442b
+	if (!lo.timeout_set) {
1d442b
+		switch (lo.cache) {
1d442b
+		case CACHE_NEVER:
1d442b
+			lo.timeout = 0.0;
1d442b
+			break;
1d442b
+
1d442b
+		case CACHE_NORMAL:
1d442b
+			lo.timeout = 1.0;
1d442b
+			break;
1d442b
+
1d442b
+		case CACHE_ALWAYS:
1d442b
+			lo.timeout = 86400.0;
1d442b
+			break;
1d442b
+		}
1d442b
+	} else if (lo.timeout < 0) {
1d442b
+		fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n",
1d442b
+			 lo.timeout);
1d442b
+		exit(1);
1d442b
+	}
1d442b
+
1d442b
+	lo.root.fd = open(lo.source, O_PATH);
1d442b
+	if (lo.root.fd == -1) {
1d442b
+		fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n",
1d442b
+			 lo.source);
1d442b
+		exit(1);
1d442b
+	}
1d442b
+
1d442b
+	se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo);
1d442b
+	if (se == NULL)
1d442b
+	    goto err_out1;
1d442b
+
1d442b
+	if (fuse_set_signal_handlers(se) != 0)
1d442b
+	    goto err_out2;
1d442b
+
1d442b
+	if (fuse_session_mount(se, opts.mountpoint) != 0)
1d442b
+	    goto err_out3;
1d442b
+
1d442b
+	fuse_daemonize(opts.foreground);
1d442b
+
1d442b
+	/* Block until ctrl+c or fusermount -u */
1d442b
+	if (opts.singlethread)
1d442b
+		ret = fuse_session_loop(se);
1d442b
+	else
1d442b
+		ret = fuse_session_loop_mt(se, opts.clone_fd);
1d442b
+
1d442b
+	fuse_session_unmount(se);
1d442b
+err_out3:
1d442b
+	fuse_remove_signal_handlers(se);
1d442b
+err_out2:
1d442b
+	fuse_session_destroy(se);
1d442b
+err_out1:
1d442b
+	free(opts.mountpoint);
1d442b
+	fuse_opt_free_args(&args);
1d442b
+
1d442b
+	if (lo.root.fd >= 0)
1d442b
+		close(lo.root.fd);
1d442b
+
1d442b
+	return ret ? 1 : 0;
1d442b
+}