|
|
887953 |
From 55e67fb41ae3b4388839723ac929cd239280a0fc Mon Sep 17 00:00:00 2001
|
|
|
887953 |
From: Amar Tumballi <amarts@redhat.com>
|
|
|
887953 |
Date: Thu, 7 Feb 2019 18:06:43 +0530
|
|
|
887953 |
Subject: [PATCH 522/529] fuse: add --lru-limit option
|
|
|
887953 |
|
|
|
887953 |
The inode LRU mechanism is moot in fuse xlator (ie. there is no
|
|
|
887953 |
limit for the LRU list), as fuse inodes are referenced from
|
|
|
887953 |
kernel context, and thus they can only be dropped on request of
|
|
|
887953 |
the kernel. This might results in a high number of passive
|
|
|
887953 |
inodes which are useless for the glusterfs client, causing a
|
|
|
887953 |
significant memory overhead.
|
|
|
887953 |
|
|
|
887953 |
This change tries to remedy this by extending the LRU semantics
|
|
|
887953 |
and allowing to set a finite limit on the fuse inode LRU.
|
|
|
887953 |
|
|
|
887953 |
A brief history of problem:
|
|
|
887953 |
|
|
|
887953 |
When gluster's inode table was designed, fuse didn't have any
|
|
|
887953 |
'invalidate' method, which means, userspace application could
|
|
|
887953 |
never ask kernel to send a 'forget()' fop, instead had to wait
|
|
|
887953 |
for kernel to send it based on kernel's parameters. Inode table
|
|
|
887953 |
remembers the number of times kernel has cached the inode based
|
|
|
887953 |
on the 'nlookup' parameter. And 'nlookup' field is not used by
|
|
|
887953 |
no other entry points (like server-protocol, gfapi etc).
|
|
|
887953 |
|
|
|
887953 |
Hence the inode_table of fuse module always has to have lru-limit
|
|
|
887953 |
as '0', which means no limit. GlusterFS always had to keep all
|
|
|
887953 |
inodes in memory as kernel would have had a reference to it.
|
|
|
887953 |
Again, the reason for this is, kernel's glusterfs inode reference
|
|
|
887953 |
was pointer of 'inode_t' structure in glusterfs. As it is a
|
|
|
887953 |
pointer, we could never free it (to prevent segfault, or memory
|
|
|
887953 |
corruption).
|
|
|
887953 |
|
|
|
887953 |
Solution:
|
|
|
887953 |
|
|
|
887953 |
In the inode table, handle the prune case of inodes with 'nlookup'
|
|
|
887953 |
differently, and call a 'invalidator' method, which in this case is
|
|
|
887953 |
fuse_invalidate(), and it sends the request to kernel for getting
|
|
|
887953 |
the forget request.
|
|
|
887953 |
|
|
|
887953 |
When the kernel sends the forget, it means, it has dropped all
|
|
|
887953 |
the reference to the inode, and it will send the forget with the
|
|
|
887953 |
'nlookup' parameter too. We just need to make sure to reduce the
|
|
|
887953 |
'nlookup' value we have when we get forget. That automatically
|
|
|
887953 |
cause the relevant prune to happen.
|
|
|
887953 |
|
|
|
887953 |
Credits: Csaba Henk, Xavier Hernandez, Raghavendra Gowdappa, Nithya B
|
|
|
887953 |
|
|
|
887953 |
Upstream:
|
|
|
887953 |
> URL: https://review.gluster.org/19778
|
|
|
887953 |
|
|
|
887953 |
BUG: 1511779
|
|
|
887953 |
Change-Id: Iabe22a62e0f819b7eb67d4ecb850dd559b0c937f
|
|
|
887953 |
Signed-off-by: Amar Tumballi <amarts@redhat.com>
|
|
|
887953 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/162494
|
|
|
887953 |
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
|
|
|
887953 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
887953 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
887953 |
---
|
|
|
887953 |
doc/mount.glusterfs.8 | 4 +
|
|
|
887953 |
glusterfsd/src/glusterfsd.c | 24 +++
|
|
|
887953 |
glusterfsd/src/glusterfsd.h | 1 +
|
|
|
887953 |
libglusterfs/src/glusterfs.h | 1 +
|
|
|
887953 |
libglusterfs/src/inode.c | 256 ++++++++++++++++++++++++----
|
|
|
887953 |
libglusterfs/src/inode.h | 17 +-
|
|
|
887953 |
tests/features/fuse-lru-limit.t | 42 +++++
|
|
|
887953 |
xlators/mount/fuse/src/fuse-bridge.c | 121 ++++++++-----
|
|
|
887953 |
xlators/mount/fuse/src/fuse-bridge.h | 3 +
|
|
|
887953 |
xlators/mount/fuse/utils/mount.glusterfs.in | 7 +
|
|
|
887953 |
10 files changed, 393 insertions(+), 83 deletions(-)
|
|
|
887953 |
create mode 100644 tests/features/fuse-lru-limit.t
|
|
|
887953 |
|
|
|
887953 |
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
|
|
|
887953 |
index 95aad02..ed6b410 100644
|
|
|
887953 |
--- a/doc/mount.glusterfs.8
|
|
|
887953 |
+++ b/doc/mount.glusterfs.8
|
|
|
887953 |
@@ -119,6 +119,10 @@ Provide list of backup volfile servers in the following format [default: None]
|
|
|
887953 |
\fBDeprecated\fR option - placed here for backward compatibility [default: 1]
|
|
|
887953 |
.TP
|
|
|
887953 |
.TP
|
|
|
887953 |
+\fBlru-limit=\fRN
|
|
|
887953 |
+Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072]
|
|
|
887953 |
+.TP
|
|
|
887953 |
+.TP
|
|
|
887953 |
\fBbackground-qlen=\fRN
|
|
|
887953 |
Set fuse module's background queue length to N [default: 64]
|
|
|
887953 |
.TP
|
|
|
887953 |
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
|
|
|
887953 |
index 990036c..2e2cd77 100644
|
|
|
887953 |
--- a/glusterfsd/src/glusterfsd.c
|
|
|
887953 |
+++ b/glusterfsd/src/glusterfsd.c
|
|
|
887953 |
@@ -203,6 +203,9 @@ static struct argp_option gf_options[] = {
|
|
|
887953 |
"[default: 300]"},
|
|
|
887953 |
{"resolve-gids", ARGP_RESOLVE_GIDS_KEY, 0, 0,
|
|
|
887953 |
"Resolve all auxiliary groups in fuse translator (max 32 otherwise)"},
|
|
|
887953 |
+ {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
|
|
|
887953 |
+ "Set fuse module's limit for number of inodes kept in LRU list to N "
|
|
|
887953 |
+ "[default: 131072]"},
|
|
|
887953 |
{"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
|
|
|
887953 |
"Set fuse module's background queue length to N "
|
|
|
887953 |
"[default: 64]"},
|
|
|
887953 |
@@ -462,6 +465,15 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options)
|
|
|
887953 |
}
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+ if (cmd_args->lru_limit >= 0) {
|
|
|
887953 |
+ ret = dict_set_int32(options, "lru-limit", cmd_args->lru_limit);
|
|
|
887953 |
+ if (ret < 0) {
|
|
|
887953 |
+ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
|
|
|
887953 |
+ "lru-limit");
|
|
|
887953 |
+ goto err;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
if (cmd_args->background_qlen) {
|
|
|
887953 |
ret = dict_set_int32 (options, "background-qlen",
|
|
|
887953 |
cmd_args->background_qlen);
|
|
|
887953 |
@@ -1169,6 +1181,13 @@ parse_opts (int key, char *arg, struct argp_state *state)
|
|
|
887953 |
cmd_args->resolve_gids = 1;
|
|
|
887953 |
break;
|
|
|
887953 |
|
|
|
887953 |
+ case ARGP_FUSE_LRU_LIMIT_KEY:
|
|
|
887953 |
+ if (!gf_string2int32(arg, &cmd_args->lru_limit))
|
|
|
887953 |
+ break;
|
|
|
887953 |
+
|
|
|
887953 |
+ argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
|
|
|
887953 |
+ break;
|
|
|
887953 |
+
|
|
|
887953 |
case ARGP_FUSE_BACKGROUND_QLEN_KEY:
|
|
|
887953 |
if (!gf_string2int (arg, &cmd_args->background_qlen))
|
|
|
887953 |
break;
|
|
|
887953 |
@@ -1937,6 +1956,11 @@ parse_cmdline (int argc, char *argv[], glusterfs_ctx_t *ctx)
|
|
|
887953 |
ctx->ssl_cert_depth = glusterfs_read_secure_access_file ();
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+ /* Need to set lru_limit to below 0 to indicate there was nothing
|
|
|
887953 |
+ specified. This is needed as 0 is a valid option, and may not be
|
|
|
887953 |
+ default value. */
|
|
|
887953 |
+ cmd_args->lru_limit = -1;
|
|
|
887953 |
+
|
|
|
887953 |
argp_parse (&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args);
|
|
|
887953 |
if (cmd_args->print_netgroups) {
|
|
|
887953 |
/* When this option is set we don't want to do anything else
|
|
|
887953 |
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
|
|
|
887953 |
index 75cb1d8..1550a30 100644
|
|
|
887953 |
--- a/glusterfsd/src/glusterfsd.h
|
|
|
887953 |
+++ b/glusterfsd/src/glusterfsd.h
|
|
|
887953 |
@@ -100,6 +100,7 @@ enum argp_option_keys {
|
|
|
887953 |
ARGP_SUBDIR_MOUNT_KEY = 178,
|
|
|
887953 |
ARGP_FUSE_EVENT_HISTORY_KEY = 179,
|
|
|
887953 |
ARGP_READER_THREAD_COUNT_KEY = 180,
|
|
|
887953 |
+ ARGP_FUSE_LRU_LIMIT_KEY = 190,
|
|
|
887953 |
};
|
|
|
887953 |
|
|
|
887953 |
struct _gfd_vol_top_priv {
|
|
|
887953 |
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
|
|
|
887953 |
index 157437c..2690306 100644
|
|
|
887953 |
--- a/libglusterfs/src/glusterfs.h
|
|
|
887953 |
+++ b/libglusterfs/src/glusterfs.h
|
|
|
887953 |
@@ -413,6 +413,7 @@ struct _cmd_args {
|
|
|
887953 |
pid_t client_pid;
|
|
|
887953 |
int client_pid_set;
|
|
|
887953 |
unsigned uid_map_root;
|
|
|
887953 |
+ int32_t lru_limit;
|
|
|
887953 |
int background_qlen;
|
|
|
887953 |
int congestion_threshold;
|
|
|
887953 |
char *fuse_mountopts;
|
|
|
887953 |
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
|
|
|
887953 |
index 29d3c8f..f57020a 100644
|
|
|
887953 |
--- a/libglusterfs/src/inode.c
|
|
|
887953 |
+++ b/libglusterfs/src/inode.c
|
|
|
887953 |
@@ -24,6 +24,100 @@
|
|
|
887953 |
move latest accessed dentry to list_head of inode
|
|
|
887953 |
*/
|
|
|
887953 |
|
|
|
887953 |
+/* clang-format off */
|
|
|
887953 |
+/*
|
|
|
887953 |
+
|
|
|
887953 |
+Details as per Xavi:
|
|
|
887953 |
+
|
|
|
887953 |
+ I think we should have 3 lists: active, lru and invalidate.
|
|
|
887953 |
+
|
|
|
887953 |
+We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of
|
|
|
887953 |
+refs, invalidate_sent flag and moving from one list to another must be done
|
|
|
887953 |
+atomically.
|
|
|
887953 |
+
|
|
|
887953 |
+With this information, these are the states that cause a transition:
|
|
|
887953 |
+
|
|
|
887953 |
+ refs nlookups inv_sent op
|
|
|
887953 |
+ 1 0 0 unref -> refs = 0, active--->destroy
|
|
|
887953 |
+ 1 1 0 unref -> refs = 0, active--->lru
|
|
|
887953 |
+ 1 1 0 forget -> nlookups = 0, active--->active
|
|
|
887953 |
+ *0 1 0 forget -> nlookups = 0, lru--->destroy
|
|
|
887953 |
+ *0 1 1 forget -> nlookups = 0, invalidate--->destroy
|
|
|
887953 |
+ 0 1 0 ref -> refs = 1, lru--->active
|
|
|
887953 |
+ 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active
|
|
|
887953 |
+ 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate
|
|
|
887953 |
+ 1 1 1 unref -> refs = 0, invalidate--->invalidate
|
|
|
887953 |
+ 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active
|
|
|
887953 |
+
|
|
|
887953 |
+(*) technically these combinations cannot happen because a forget sent by the
|
|
|
887953 |
+kernel first calls ref() and then unref(). However it's equivalent.
|
|
|
887953 |
+
|
|
|
887953 |
+overflow means that lru list has grown beyond the limit and the inode needs to
|
|
|
887953 |
+be invalidated. All other combinations do not cause a change in state or are not
|
|
|
887953 |
+possible.
|
|
|
887953 |
+
|
|
|
887953 |
+Based on this, the code could be similar to this:
|
|
|
887953 |
+
|
|
|
887953 |
+ ref(inode, inv)
|
|
|
887953 |
+ {
|
|
|
887953 |
+ if (refs == 0) {
|
|
|
887953 |
+ if (inv_sent) {
|
|
|
887953 |
+ invalidate_count--;
|
|
|
887953 |
+ inv_sent = 0;
|
|
|
887953 |
+ } else {
|
|
|
887953 |
+ lru_count--;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ if (inv) {
|
|
|
887953 |
+ inv_sent = 1;
|
|
|
887953 |
+ invalidate_count++;
|
|
|
887953 |
+ list_move(inode, invalidate);
|
|
|
887953 |
+ } else {
|
|
|
887953 |
+ active_count++;
|
|
|
887953 |
+ list_move(inode, active);
|
|
|
887953 |
+ }
|
|
|
887953 |
+ }
|
|
|
887953 |
+ refs++;
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
+ unref(inode, clear)
|
|
|
887953 |
+ {
|
|
|
887953 |
+ if (clear && inv_sent) {
|
|
|
887953 |
+ // there is a case of fuse itself sending forget, without
|
|
|
887953 |
+ // invalidate, after entry delete, like unlink(), rmdir().
|
|
|
887953 |
+ inv_sent = 0;
|
|
|
887953 |
+ invalidate_count--;
|
|
|
887953 |
+ active_count++;
|
|
|
887953 |
+ list_move(inode, active);
|
|
|
887953 |
+ }
|
|
|
887953 |
+ refs--;
|
|
|
887953 |
+ if ((refs == 0) && !inv_sent) {
|
|
|
887953 |
+ active_count--;
|
|
|
887953 |
+ if (nlookups == 0) {
|
|
|
887953 |
+ destroy(inode);
|
|
|
887953 |
+ } else {
|
|
|
887953 |
+ lru_count++;
|
|
|
887953 |
+ list_move(inode, lru);
|
|
|
887953 |
+ }
|
|
|
887953 |
+ }
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
+ forget(inode)
|
|
|
887953 |
+ {
|
|
|
887953 |
+ ref(inode, false);
|
|
|
887953 |
+ nlookups--;
|
|
|
887953 |
+ unref(inode, true);
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
+ overflow(inode)
|
|
|
887953 |
+ {
|
|
|
887953 |
+ ref(inode, true);
|
|
|
887953 |
+ invalidator(inode);
|
|
|
887953 |
+ unref(inode, false);
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
+*/
|
|
|
887953 |
+/* clang-format on */
|
|
|
887953 |
+
|
|
|
887953 |
#define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \
|
|
|
887953 |
{ \
|
|
|
887953 |
int i = 1; \
|
|
|
887953 |
@@ -37,7 +131,7 @@
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
static inode_t *
|
|
|
887953 |
-__inode_unref (inode_t *inode);
|
|
|
887953 |
+__inode_unref (inode_t *inode, gf_boolean_t clear);
|
|
|
887953 |
|
|
|
887953 |
static int
|
|
|
887953 |
inode_table_prune (inode_table_t *table);
|
|
|
887953 |
@@ -138,7 +232,7 @@ __dentry_unset (dentry_t *dentry)
|
|
|
887953 |
dentry->name = NULL;
|
|
|
887953 |
|
|
|
887953 |
if (dentry->parent) {
|
|
|
887953 |
- __inode_unref (dentry->parent);
|
|
|
887953 |
+ __inode_unref (dentry->parent, _gf_false);
|
|
|
887953 |
dentry->parent = NULL;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
@@ -465,7 +559,7 @@ out:
|
|
|
887953 |
|
|
|
887953 |
|
|
|
887953 |
static inode_t *
|
|
|
887953 |
-__inode_unref (inode_t *inode)
|
|
|
887953 |
+__inode_unref (inode_t *inode, gf_boolean_t clear)
|
|
|
887953 |
{
|
|
|
887953 |
int index = 0;
|
|
|
887953 |
xlator_t *this = NULL;
|
|
|
887953 |
@@ -473,8 +567,6 @@ __inode_unref (inode_t *inode)
|
|
|
887953 |
if (!inode)
|
|
|
887953 |
return NULL;
|
|
|
887953 |
|
|
|
887953 |
- this = THIS;
|
|
|
887953 |
-
|
|
|
887953 |
/*
|
|
|
887953 |
* Root inode should always be in active list of inode table. So unrefs
|
|
|
887953 |
* on root inode are no-ops.
|
|
|
887953 |
@@ -482,6 +574,14 @@ __inode_unref (inode_t *inode)
|
|
|
887953 |
if (__is_root_gfid(inode->gfid))
|
|
|
887953 |
return inode;
|
|
|
887953 |
|
|
|
887953 |
+ this = THIS;
|
|
|
887953 |
+
|
|
|
887953 |
+ if (clear && inode->invalidate_sent) {
|
|
|
887953 |
+ inode->invalidate_sent = _gf_false;
|
|
|
887953 |
+ inode->table->invalidate_size--;
|
|
|
887953 |
+ __inode_activate(inode);
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
GF_ASSERT (inode->ref);
|
|
|
887953 |
|
|
|
887953 |
--inode->ref;
|
|
|
887953 |
@@ -492,7 +592,7 @@ __inode_unref (inode_t *inode)
|
|
|
887953 |
inode->_ctx[index].ref--;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- if (!inode->ref) {
|
|
|
887953 |
+ if (!inode->ref && !inode->invalidate_sent) {
|
|
|
887953 |
inode->table->active_size--;
|
|
|
887953 |
|
|
|
887953 |
if (inode->nlookup)
|
|
|
887953 |
@@ -506,7 +606,7 @@ __inode_unref (inode_t *inode)
|
|
|
887953 |
|
|
|
887953 |
|
|
|
887953 |
static inode_t *
|
|
|
887953 |
-__inode_ref (inode_t *inode)
|
|
|
887953 |
+__inode_ref (inode_t *inode, gf_boolean_t is_invalidate)
|
|
|
887953 |
{
|
|
|
887953 |
int index = 0;
|
|
|
887953 |
xlator_t *this = NULL;
|
|
|
887953 |
@@ -516,11 +616,6 @@ __inode_ref (inode_t *inode)
|
|
|
887953 |
|
|
|
887953 |
this = THIS;
|
|
|
887953 |
|
|
|
887953 |
- if (!inode->ref) {
|
|
|
887953 |
- inode->table->lru_size--;
|
|
|
887953 |
- __inode_activate (inode);
|
|
|
887953 |
- }
|
|
|
887953 |
-
|
|
|
887953 |
/*
|
|
|
887953 |
* Root inode should always be in active list of inode table. So unrefs
|
|
|
887953 |
* on root inode are no-ops. If we do not allow unrefs but allow refs,
|
|
|
887953 |
@@ -532,6 +627,22 @@ __inode_ref (inode_t *inode)
|
|
|
887953 |
if (__is_root_gfid(inode->gfid) && inode->ref)
|
|
|
887953 |
return inode;
|
|
|
887953 |
|
|
|
887953 |
+ if (!inode->ref) {
|
|
|
887953 |
+ if (inode->invalidate_sent) {
|
|
|
887953 |
+ inode->invalidate_sent = _gf_false;
|
|
|
887953 |
+ inode->table->invalidate_size--;
|
|
|
887953 |
+ } else {
|
|
|
887953 |
+ inode->table->lru_size--;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ if (is_invalidate) {
|
|
|
887953 |
+ inode->invalidate_sent = _gf_true;
|
|
|
887953 |
+ inode->table->invalidate_size++;
|
|
|
887953 |
+ list_move_tail(&inode->list, &inode->table->invalidate);
|
|
|
887953 |
+ } else {
|
|
|
887953 |
+ __inode_activate(inode);
|
|
|
887953 |
+ }
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
inode->ref++;
|
|
|
887953 |
|
|
|
887953 |
index = __inode_get_xl_index (inode, this);
|
|
|
887953 |
@@ -556,7 +667,7 @@ inode_unref (inode_t *inode)
|
|
|
887953 |
|
|
|
887953 |
pthread_mutex_lock (&table->lock);
|
|
|
887953 |
{
|
|
|
887953 |
- inode = __inode_unref (inode);
|
|
|
887953 |
+ inode = __inode_unref (inode, _gf_false);
|
|
|
887953 |
}
|
|
|
887953 |
pthread_mutex_unlock (&table->lock);
|
|
|
887953 |
|
|
|
887953 |
@@ -578,7 +689,7 @@ inode_ref (inode_t *inode)
|
|
|
887953 |
|
|
|
887953 |
pthread_mutex_lock (&table->lock);
|
|
|
887953 |
{
|
|
|
887953 |
- inode = __inode_ref (inode);
|
|
|
887953 |
+ inode = __inode_ref (inode, _gf_false);
|
|
|
887953 |
}
|
|
|
887953 |
pthread_mutex_unlock (&table->lock);
|
|
|
887953 |
|
|
|
887953 |
@@ -614,7 +725,7 @@ __dentry_create (inode_t *inode, inode_t *parent, const char *name)
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
if (parent)
|
|
|
887953 |
- newd->parent = __inode_ref (parent);
|
|
|
887953 |
+ newd->parent = __inode_ref (parent, _gf_false);
|
|
|
887953 |
|
|
|
887953 |
list_add (&newd->inode_list, &inode->dentry_list);
|
|
|
887953 |
newd->inode = inode;
|
|
|
887953 |
@@ -685,7 +796,7 @@ inode_new (inode_table_t *table)
|
|
|
887953 |
{
|
|
|
887953 |
inode = __inode_create (table);
|
|
|
887953 |
if (inode != NULL) {
|
|
|
887953 |
- __inode_ref (inode);
|
|
|
887953 |
+ __inode_ref (inode, _gf_false);
|
|
|
887953 |
}
|
|
|
887953 |
}
|
|
|
887953 |
pthread_mutex_unlock (&table->lock);
|
|
|
887953 |
@@ -802,7 +913,7 @@ inode_grep (inode_table_t *table, inode_t *parent, const char *name)
|
|
|
887953 |
inode = dentry->inode;
|
|
|
887953 |
|
|
|
887953 |
if (inode)
|
|
|
887953 |
- __inode_ref (inode);
|
|
|
887953 |
+ __inode_ref (inode, _gf_false);
|
|
|
887953 |
}
|
|
|
887953 |
pthread_mutex_unlock (&table->lock);
|
|
|
887953 |
|
|
|
887953 |
@@ -947,7 +1058,7 @@ inode_find (inode_table_t *table, uuid_t gfid)
|
|
|
887953 |
{
|
|
|
887953 |
inode = __inode_find (table, gfid);
|
|
|
887953 |
if (inode)
|
|
|
887953 |
- __inode_ref (inode);
|
|
|
887953 |
+ __inode_ref (inode, _gf_false);
|
|
|
887953 |
}
|
|
|
887953 |
pthread_mutex_unlock (&table->lock);
|
|
|
887953 |
|
|
|
887953 |
@@ -1096,7 +1207,7 @@ inode_link (inode_t *inode, inode_t *parent, const char *name,
|
|
|
887953 |
linked_inode = __inode_link (inode, parent, name, iatt);
|
|
|
887953 |
|
|
|
887953 |
if (linked_inode)
|
|
|
887953 |
- __inode_ref (linked_inode);
|
|
|
887953 |
+ __inode_ref (linked_inode, _gf_false);
|
|
|
887953 |
}
|
|
|
887953 |
pthread_mutex_unlock (&table->lock);
|
|
|
887953 |
|
|
|
887953 |
@@ -1178,6 +1289,31 @@ inode_forget (inode_t *inode, uint64_t nlookup)
|
|
|
887953 |
return 0;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+int
|
|
|
887953 |
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup)
|
|
|
887953 |
+{
|
|
|
887953 |
+ inode_table_t *table = NULL;
|
|
|
887953 |
+
|
|
|
887953 |
+ if (!inode) {
|
|
|
887953 |
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
|
|
|
887953 |
+ "inode not found");
|
|
|
887953 |
+ return -1;
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
+ table = inode->table;
|
|
|
887953 |
+
|
|
|
887953 |
+ pthread_mutex_lock(&table->lock);
|
|
|
887953 |
+ {
|
|
|
887953 |
+ __inode_forget(inode, nlookup);
|
|
|
887953 |
+ __inode_unref(inode, _gf_true);
|
|
|
887953 |
+ }
|
|
|
887953 |
+ pthread_mutex_unlock(&table->lock);
|
|
|
887953 |
+
|
|
|
887953 |
+ inode_table_prune(table);
|
|
|
887953 |
+
|
|
|
887953 |
+ return 0;
|
|
|
887953 |
+}
|
|
|
887953 |
+
|
|
|
887953 |
/*
|
|
|
887953 |
* Invalidate an inode. This is invoked when a translator decides that an inode's
|
|
|
887953 |
* cache is no longer valid. Any translator interested in taking action in this
|
|
|
887953 |
@@ -1356,7 +1492,7 @@ inode_parent (inode_t *inode, uuid_t pargfid, const char *name)
|
|
|
887953 |
parent = dentry->parent;
|
|
|
887953 |
|
|
|
887953 |
if (parent)
|
|
|
887953 |
- __inode_ref (parent);
|
|
|
887953 |
+ __inode_ref (parent, _gf_false);
|
|
|
887953 |
}
|
|
|
887953 |
pthread_mutex_unlock (&table->lock);
|
|
|
887953 |
|
|
|
887953 |
@@ -1540,6 +1676,7 @@ inode_table_prune (inode_table_t *table)
|
|
|
887953 |
inode_t *del = NULL;
|
|
|
887953 |
inode_t *tmp = NULL;
|
|
|
887953 |
inode_t *entry = NULL;
|
|
|
887953 |
+ int64_t lru_size = 0;
|
|
|
887953 |
|
|
|
887953 |
if (!table)
|
|
|
887953 |
return -1;
|
|
|
887953 |
@@ -1548,8 +1685,11 @@ inode_table_prune (inode_table_t *table)
|
|
|
887953 |
|
|
|
887953 |
pthread_mutex_lock (&table->lock);
|
|
|
887953 |
{
|
|
|
887953 |
- while (table->lru_limit
|
|
|
887953 |
- && table->lru_size > (table->lru_limit)) {
|
|
|
887953 |
+ if (!table->lru_limit)
|
|
|
887953 |
+ goto purge_list;
|
|
|
887953 |
+
|
|
|
887953 |
+ lru_size = table->lru_size;
|
|
|
887953 |
+ while (lru_size > (table->lru_limit)) {
|
|
|
887953 |
if (list_empty (&table->lru)) {
|
|
|
887953 |
gf_msg_callingfn (THIS->name, GF_LOG_WARNING, 0,
|
|
|
887953 |
LG_MSG_INVALID_INODE_LIST,
|
|
|
887953 |
@@ -1559,7 +1699,18 @@ inode_table_prune (inode_table_t *table)
|
|
|
887953 |
break;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+ lru_size--;
|
|
|
887953 |
entry = list_entry (table->lru.next, inode_t, list);
|
|
|
887953 |
+ /* The logic of invalidation is required only if invalidator_fn
|
|
|
887953 |
+ is present */
|
|
|
887953 |
+ if (table->invalidator_fn) {
|
|
|
887953 |
+ /* check for valid inode with 'nlookup' */
|
|
|
887953 |
+ if (entry->nlookup) {
|
|
|
887953 |
+ __inode_ref(entry, _gf_true);
|
|
|
887953 |
+ tmp = entry;
|
|
|
887953 |
+ break;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ }
|
|
|
887953 |
|
|
|
887953 |
table->lru_size--;
|
|
|
887953 |
__inode_retire (entry);
|
|
|
887953 |
@@ -1567,17 +1718,25 @@ inode_table_prune (inode_table_t *table)
|
|
|
887953 |
ret++;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+ purge_list:
|
|
|
887953 |
list_splice_init (&table->purge, &purge);
|
|
|
887953 |
table->purge_size = 0;
|
|
|
887953 |
}
|
|
|
887953 |
pthread_mutex_unlock (&table->lock);
|
|
|
887953 |
|
|
|
887953 |
- {
|
|
|
887953 |
- list_for_each_entry_safe (del, tmp, &purge, list) {
|
|
|
887953 |
- list_del_init (&del->list);
|
|
|
887953 |
- __inode_forget (del, 0);
|
|
|
887953 |
- __inode_destroy (del);
|
|
|
887953 |
- }
|
|
|
887953 |
+ /* Pick 1 inode for invalidation */
|
|
|
887953 |
+ if (tmp) {
|
|
|
887953 |
+ xlator_t *old_THIS = THIS;
|
|
|
887953 |
+ THIS = table->invalidator_xl;
|
|
|
887953 |
+ table->invalidator_fn(table->invalidator_xl, tmp);
|
|
|
887953 |
+ THIS = old_THIS;
|
|
|
887953 |
+ inode_unref(tmp);
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
+ list_for_each_entry_safe (del, tmp, &purge, list) {
|
|
|
887953 |
+ list_del_init (&del->list);
|
|
|
887953 |
+ __inode_forget (del, 0);
|
|
|
887953 |
+ __inode_destroy (del);
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
return ret;
|
|
|
887953 |
@@ -1605,9 +1764,12 @@ __inode_table_init_root (inode_table_t *table)
|
|
|
887953 |
|
|
|
887953 |
|
|
|
887953 |
inode_table_t *
|
|
|
887953 |
-inode_table_new (size_t lru_limit, xlator_t *xl)
|
|
|
887953 |
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
|
|
|
887953 |
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
|
|
|
887953 |
+ xlator_t *invalidator_xl)
|
|
|
887953 |
{
|
|
|
887953 |
inode_table_t *new = NULL;
|
|
|
887953 |
+ uint32_t mem_pool_size = lru_limit;
|
|
|
887953 |
int ret = -1;
|
|
|
887953 |
int i = 0;
|
|
|
887953 |
|
|
|
887953 |
@@ -1619,20 +1781,19 @@ inode_table_new (size_t lru_limit, xlator_t *xl)
|
|
|
887953 |
new->ctxcount = xl->graph->xl_count + 1;
|
|
|
887953 |
|
|
|
887953 |
new->lru_limit = lru_limit;
|
|
|
887953 |
+ new->invalidator_fn = invalidator_fn;
|
|
|
887953 |
+ new->invalidator_xl = invalidator_xl;
|
|
|
887953 |
|
|
|
887953 |
new->hashsize = 14057; /* TODO: Random Number?? */
|
|
|
887953 |
|
|
|
887953 |
- /* In case FUSE is initing the inode table. */
|
|
|
887953 |
- if (lru_limit == 0)
|
|
|
887953 |
- lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES;
|
|
|
887953 |
-
|
|
|
887953 |
- new->inode_pool = mem_pool_new (inode_t, lru_limit);
|
|
|
887953 |
+ if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
|
|
|
887953 |
+ mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES;
|
|
|
887953 |
|
|
|
887953 |
+ new->inode_pool = mem_pool_new(inode_t, mem_pool_size);
|
|
|
887953 |
if (!new->inode_pool)
|
|
|
887953 |
goto out;
|
|
|
887953 |
|
|
|
887953 |
- new->dentry_pool = mem_pool_new (dentry_t, lru_limit);
|
|
|
887953 |
-
|
|
|
887953 |
+ new->dentry_pool = mem_pool_new (dentry_t, mem_pool_size);
|
|
|
887953 |
if (!new->dentry_pool)
|
|
|
887953 |
goto out;
|
|
|
887953 |
|
|
|
887953 |
@@ -1667,6 +1828,7 @@ inode_table_new (size_t lru_limit, xlator_t *xl)
|
|
|
887953 |
INIT_LIST_HEAD (&new->active);
|
|
|
887953 |
INIT_LIST_HEAD (&new->lru);
|
|
|
887953 |
INIT_LIST_HEAD (&new->purge);
|
|
|
887953 |
+ INIT_LIST_HEAD(&new->invalidate);
|
|
|
887953 |
|
|
|
887953 |
ret = gf_asprintf (&new->name, "%s/inode", xl->name);
|
|
|
887953 |
if (-1 == ret) {
|
|
|
887953 |
@@ -1696,6 +1858,14 @@ out:
|
|
|
887953 |
return new;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+inode_table_t *
|
|
|
887953 |
+inode_table_new(uint32_t lru_limit, xlator_t *xl)
|
|
|
887953 |
+{
|
|
|
887953 |
+ /* Only fuse for now requires the inode table with invalidator */
|
|
|
887953 |
+ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
|
|
|
887953 |
+}
|
|
|
887953 |
+
|
|
|
887953 |
+
|
|
|
887953 |
int
|
|
|
887953 |
inode_table_ctx_free (inode_table_t *table)
|
|
|
887953 |
{
|
|
|
887953 |
@@ -1830,6 +2000,15 @@ inode_table_destroy (inode_table_t *inode_table) {
|
|
|
887953 |
inode_table->lru_size--;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+ /* Same logic for invalidate list */
|
|
|
887953 |
+ while (!list_empty(&inode_table->invalidate)) {
|
|
|
887953 |
+ trav = list_first_entry(&inode_table->invalidate,
|
|
|
887953 |
+ inode_t, list);
|
|
|
887953 |
+ __inode_forget(trav, 0);
|
|
|
887953 |
+ __inode_retire(trav);
|
|
|
887953 |
+ inode_table->invalidate_size--;
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
while (!list_empty (&inode_table->active)) {
|
|
|
887953 |
trav = list_first_entry (&inode_table->active,
|
|
|
887953 |
inode_t, list);
|
|
|
887953 |
@@ -2347,6 +2526,8 @@ inode_dump (inode_t *inode, char *prefix)
|
|
|
887953 |
gf_proc_dump_write("active-fd-count", "%u",
|
|
|
887953 |
inode->active_fd_count);
|
|
|
887953 |
gf_proc_dump_write("ref", "%u", inode->ref);
|
|
|
887953 |
+ gf_proc_dump_write("invalidate-sent", "%d",
|
|
|
887953 |
+ inode->invalidate_sent);
|
|
|
887953 |
gf_proc_dump_write("ia_type", "%d", inode->ia_type);
|
|
|
887953 |
if (inode->_ctx) {
|
|
|
887953 |
inode_ctx = GF_CALLOC (inode->table->ctxcount,
|
|
|
887953 |
@@ -2427,10 +2608,13 @@ inode_table_dump (inode_table_t *itable, char *prefix)
|
|
|
887953 |
gf_proc_dump_write(key, "%d", itable->lru_size);
|
|
|
887953 |
gf_proc_dump_build_key(key, prefix, "purge_size");
|
|
|
887953 |
gf_proc_dump_write(key, "%d", itable->purge_size);
|
|
|
887953 |
+ gf_proc_dump_build_key(key, prefix, "invalidate_size");
|
|
|
887953 |
+ gf_proc_dump_write(key, "%d", itable->invalidate_size);
|
|
|
887953 |
|
|
|
887953 |
INODE_DUMP_LIST(&itable->active, key, prefix, "active");
|
|
|
887953 |
INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");
|
|
|
887953 |
INODE_DUMP_LIST(&itable->purge, key, prefix, "purge");
|
|
|
887953 |
+ INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate");
|
|
|
887953 |
|
|
|
887953 |
pthread_mutex_unlock(&itable->lock);
|
|
|
887953 |
}
|
|
|
887953 |
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
|
|
|
887953 |
index 7a87748..6a96447 100644
|
|
|
887953 |
--- a/libglusterfs/src/inode.h
|
|
|
887953 |
+++ b/libglusterfs/src/inode.h
|
|
|
887953 |
@@ -55,6 +55,13 @@ struct _inode_table {
|
|
|
887953 |
struct mem_pool *dentry_pool; /* memory pool for dentrys */
|
|
|
887953 |
struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
|
|
|
887953 |
int ctxcount; /* number of slots in inode->ctx */
|
|
|
887953 |
+
|
|
|
887953 |
+ /* This is required for 'invalidation' when 'nlookup' would be used,
|
|
|
887953 |
+ specially in case of fuse-bridge */
|
|
|
887953 |
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *);
|
|
|
887953 |
+ xlator_t *invalidator_xl;
|
|
|
887953 |
+ struct list_head invalidate; /* inodes which are in invalidation queue */
|
|
|
887953 |
+ uint32_t invalidate_size; /* count of inodes in invalidation list */
|
|
|
887953 |
};
|
|
|
887953 |
|
|
|
887953 |
|
|
|
887953 |
@@ -102,6 +109,7 @@ struct _inode {
|
|
|
887953 |
struct list_head list; /* active/lru/purge */
|
|
|
887953 |
|
|
|
887953 |
struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
|
|
|
887953 |
+ gf_boolean_t invalidate_sent; /* Set it if invalidator_fn is called for inode */
|
|
|
887953 |
};
|
|
|
887953 |
|
|
|
887953 |
|
|
|
887953 |
@@ -110,7 +118,14 @@ struct _inode {
|
|
|
887953 |
#define GFID_STR_PFX_LEN (sizeof (GFID_STR_PFX) - 1)
|
|
|
887953 |
|
|
|
887953 |
inode_table_t *
|
|
|
887953 |
-inode_table_new (size_t lru_limit, xlator_t *xl);
|
|
|
887953 |
+inode_table_new(uint32_t lru_limit, xlator_t *xl);
|
|
|
887953 |
+
|
|
|
887953 |
+inode_table_t *
|
|
|
887953 |
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
|
|
|
887953 |
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
|
|
|
887953 |
+ xlator_t *invalidator_xl);
|
|
|
887953 |
+int
|
|
|
887953 |
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup);
|
|
|
887953 |
|
|
|
887953 |
void
|
|
|
887953 |
inode_table_destroy_all (glusterfs_ctx_t *ctx);
|
|
|
887953 |
diff --git a/tests/features/fuse-lru-limit.t b/tests/features/fuse-lru-limit.t
|
|
|
887953 |
new file mode 100644
|
|
|
887953 |
index 0000000..9f12116
|
|
|
887953 |
--- /dev/null
|
|
|
887953 |
+++ b/tests/features/fuse-lru-limit.t
|
|
|
887953 |
@@ -0,0 +1,42 @@
|
|
|
887953 |
+#!/bin/bash
|
|
|
887953 |
+
|
|
|
887953 |
+. $(dirname $0)/../include.rc
|
|
|
887953 |
+. $(dirname $0)/../volume.rc
|
|
|
887953 |
+
|
|
|
887953 |
+cleanup
|
|
|
887953 |
+
|
|
|
887953 |
+TEST glusterd
|
|
|
887953 |
+TEST pidof glusterd
|
|
|
887953 |
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
|
|
|
887953 |
+TEST $CLI volume start $V0
|
|
|
887953 |
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
|
|
|
887953 |
+
|
|
|
887953 |
+EXPECT "1" get_mount_active_size_value $V0 $M0
|
|
|
887953 |
+EXPECT "0" get_mount_lru_size_value $V0 $M0
|
|
|
887953 |
+
|
|
|
887953 |
+mkdir ${M0}/dir-{1..9}
|
|
|
887953 |
+for i in {1..9}; do
|
|
|
887953 |
+ for j in {1..1000}; do
|
|
|
887953 |
+ echo "Test file" > ${M0}/dir-$i/file-$j;
|
|
|
887953 |
+ done;
|
|
|
887953 |
+done
|
|
|
887953 |
+lc=$(get_mount_lru_size_value $V0 ${M0})
|
|
|
887953 |
+# ideally it should be 9000+
|
|
|
887953 |
+TEST [ $lc -ge 9000 ]
|
|
|
887953 |
+
|
|
|
887953 |
+TEST umount $M0
|
|
|
887953 |
+
|
|
|
887953 |
+TEST glusterfs -s $H0 --volfile-id $V0 --lru-limit 1000 $M0
|
|
|
887953 |
+
|
|
|
887953 |
+TEST find $M0
|
|
|
887953 |
+lc=$(get_mount_lru_size_value $V0 ${M0})
|
|
|
887953 |
+# ideally it should be <1000
|
|
|
887953 |
+# Not sure if there are any possibilities of buffer need.
|
|
|
887953 |
+TEST [ $lc -le 1000 ]
|
|
|
887953 |
+
|
|
|
887953 |
+TEST rm -rf $M0/*
|
|
|
887953 |
+
|
|
|
887953 |
+EXPECT "1" get_mount_active_size_value $V0 $M0
|
|
|
887953 |
+EXPECT "0" get_mount_lru_size_value $V0 $M0
|
|
|
887953 |
+
|
|
|
887953 |
+cleanup
|
|
|
887953 |
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
|
|
|
887953 |
index 8d1e3a0..f3188d6 100644
|
|
|
887953 |
--- a/xlators/mount/fuse/src/fuse-bridge.c
|
|
|
887953 |
+++ b/xlators/mount/fuse/src/fuse-bridge.c
|
|
|
887953 |
@@ -279,29 +279,31 @@ send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
|
|
|
887953 |
send_fuse_data (this, finh, obj, sizeof (*(obj)))
|
|
|
887953 |
|
|
|
887953 |
|
|
|
887953 |
-#if FUSE_KERNEL_MINOR_VERSION >= 11
|
|
|
887953 |
static void
|
|
|
887953 |
fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino)
|
|
|
887953 |
{
|
|
|
887953 |
+#if FUSE_KERNEL_MINOR_VERSION >= 11
|
|
|
887953 |
struct fuse_out_header *fouh = NULL;
|
|
|
887953 |
struct fuse_notify_inval_entry_out *fnieo = NULL;
|
|
|
887953 |
fuse_private_t *priv = NULL;
|
|
|
887953 |
dentry_t *dentry = NULL;
|
|
|
887953 |
+ dentry_t *tmp = NULL;
|
|
|
887953 |
inode_t *inode = NULL;
|
|
|
887953 |
size_t nlen = 0;
|
|
|
887953 |
fuse_invalidate_node_t *node = NULL;
|
|
|
887953 |
+ char gfid_str[UUID_CANONICAL_FORM_LEN + 1];
|
|
|
887953 |
|
|
|
887953 |
priv = this->private;
|
|
|
887953 |
|
|
|
887953 |
if (!priv->reverse_fuse_thread_started)
|
|
|
887953 |
return;
|
|
|
887953 |
|
|
|
887953 |
- inode = fuse_ino_to_inode(fuse_ino, this);
|
|
|
887953 |
+ inode = (inode_t *)(unsigned long)fuse_ino;
|
|
|
887953 |
if (inode == NULL) {
|
|
|
887953 |
return;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- list_for_each_entry (dentry, &inode->dentry_list, inode_list) {
|
|
|
887953 |
+ list_for_each_entry_safe (dentry, tmp, &inode->dentry_list, inode_list) {
|
|
|
887953 |
node = GF_CALLOC (1, sizeof (*node),
|
|
|
887953 |
gf_fuse_mt_invalidate_node_t);
|
|
|
887953 |
if (node == NULL)
|
|
|
887953 |
@@ -315,14 +317,31 @@ fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino)
|
|
|
887953 |
fouh->unique = 0;
|
|
|
887953 |
fouh->error = FUSE_NOTIFY_INVAL_ENTRY;
|
|
|
887953 |
|
|
|
887953 |
- nlen = strlen (dentry->name);
|
|
|
887953 |
- fouh->len = sizeof (*fouh) + sizeof (*fnieo) + nlen + 1;
|
|
|
887953 |
- fnieo->parent = inode_to_fuse_nodeid (dentry->parent);
|
|
|
887953 |
+ if (dentry->name) {
|
|
|
887953 |
+ nlen = strlen (dentry->name);
|
|
|
887953 |
+ fouh->len = sizeof (*fouh) + sizeof (*fnieo) + nlen + 1;
|
|
|
887953 |
+ fnieo->parent = inode_to_fuse_nodeid (dentry->parent);
|
|
|
887953 |
+
|
|
|
887953 |
+ fnieo->namelen = nlen;
|
|
|
887953 |
+ strcpy (node->inval_buf + sizeof (*fouh) + sizeof (*fnieo),
|
|
|
887953 |
+ dentry->name);
|
|
|
887953 |
+ }
|
|
|
887953 |
|
|
|
887953 |
- fnieo->namelen = nlen;
|
|
|
887953 |
- strcpy (node->inval_buf + sizeof (*fouh) + sizeof (*fnieo),
|
|
|
887953 |
- dentry->name);
|
|
|
887953 |
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE entry: "
|
|
|
887953 |
+ "%"PRIu64"/%s (gfid:%s)", fnieo->parent, dentry->name,
|
|
|
887953 |
+ uuid_utoa(inode->gfid));
|
|
|
887953 |
|
|
|
887953 |
+ if (dentry->parent) {
|
|
|
887953 |
+ fuse_log_eh (this, "Invalidated entry %s (parent: %s)"
|
|
|
887953 |
+ "(gfid: %s)", dentry->name,
|
|
|
887953 |
+ uuid_utoa (dentry->parent->gfid),
|
|
|
887953 |
+ uuid_utoa_r(inode->gfid, gfid_str));
|
|
|
887953 |
+ } else {
|
|
|
887953 |
+ fuse_log_eh (this, "Invalidated entry %s(nodeid: %"
|
|
|
887953 |
+ PRIu64 ") gfid: %s",
|
|
|
887953 |
+ dentry->name, fnieo->parent,
|
|
|
887953 |
+ uuid_utoa (inode->gfid));
|
|
|
887953 |
+ }
|
|
|
887953 |
pthread_mutex_lock (&priv->invalidate_mutex);
|
|
|
887953 |
{
|
|
|
887953 |
list_add_tail (&node->next, &priv->invalidate_list);
|
|
|
887953 |
@@ -330,23 +349,10 @@ fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino)
|
|
|
887953 |
}
|
|
|
887953 |
pthread_mutex_unlock (&priv->invalidate_mutex);
|
|
|
887953 |
|
|
|
887953 |
- gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE entry: "
|
|
|
887953 |
- "%"PRIu64"/%s", fnieo->parent, dentry->name);
|
|
|
887953 |
-
|
|
|
887953 |
- if (dentry->parent) {
|
|
|
887953 |
- fuse_log_eh (this, "Invalidated entry %s (parent: %s)",
|
|
|
887953 |
- dentry->name,
|
|
|
887953 |
- uuid_utoa (dentry->parent->gfid));
|
|
|
887953 |
- } else {
|
|
|
887953 |
- fuse_log_eh (this, "Invalidated entry %s(nodeid: %" PRIu64 ")",
|
|
|
887953 |
- dentry->name, fnieo->parent);
|
|
|
887953 |
- }
|
|
|
887953 |
}
|
|
|
887953 |
-
|
|
|
887953 |
- if (inode)
|
|
|
887953 |
- inode_unref (inode);
|
|
|
887953 |
+#endif /* KERNEL_VERSION */
|
|
|
887953 |
+ return;
|
|
|
887953 |
}
|
|
|
887953 |
-#endif
|
|
|
887953 |
|
|
|
887953 |
/*
|
|
|
887953 |
* Send an inval inode notification to fuse. This causes an invalidation of the
|
|
|
887953 |
@@ -367,6 +373,10 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
|
|
|
887953 |
if (!priv->reverse_fuse_thread_started)
|
|
|
887953 |
return;
|
|
|
887953 |
|
|
|
887953 |
+ inode = (inode_t *)(unsigned long)fuse_ino;
|
|
|
887953 |
+ if (inode == NULL)
|
|
|
887953 |
+ return;
|
|
|
887953 |
+
|
|
|
887953 |
node = GF_CALLOC (1, sizeof (*node), gf_fuse_mt_invalidate_node_t);
|
|
|
887953 |
if (node == NULL)
|
|
|
887953 |
return;
|
|
|
887953 |
@@ -386,7 +396,11 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
|
|
|
887953 |
fniio->off = 0;
|
|
|
887953 |
fniio->len = -1;
|
|
|
887953 |
|
|
|
887953 |
- inode = fuse_ino_to_inode (fuse_ino, this);
|
|
|
887953 |
+ fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino,
|
|
|
887953 |
+ uuid_utoa(inode->gfid));
|
|
|
887953 |
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
|
|
|
887953 |
+ "INVALIDATE inode: %" PRIu64 "(gfid:%s)", fuse_ino,
|
|
|
887953 |
+ uuid_utoa(inode->gfid));
|
|
|
887953 |
|
|
|
887953 |
pthread_mutex_lock (&priv->invalidate_mutex);
|
|
|
887953 |
{
|
|
|
887953 |
@@ -395,24 +409,23 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
|
|
|
887953 |
}
|
|
|
887953 |
pthread_mutex_unlock (&priv->invalidate_mutex);
|
|
|
887953 |
|
|
|
887953 |
- gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %" PRIu64,
|
|
|
887953 |
- fuse_ino);
|
|
|
887953 |
-
|
|
|
887953 |
- if (inode) {
|
|
|
887953 |
- fuse_log_eh (this, "Invalidated inode %" PRIu64 " (gfid: %s)",
|
|
|
887953 |
- fuse_ino, uuid_utoa (inode->gfid));
|
|
|
887953 |
- } else {
|
|
|
887953 |
- fuse_log_eh (this, "Invalidated inode %" PRIu64, fuse_ino);
|
|
|
887953 |
- }
|
|
|
887953 |
-
|
|
|
887953 |
- if (inode)
|
|
|
887953 |
- inode_unref (inode);
|
|
|
887953 |
#else
|
|
|
887953 |
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
|
|
|
887953 |
- "fuse_invalidate_inode not implemented on OS X due to missing FUSE notification");
|
|
|
887953 |
+ "fuse_invalidate_inode not implemented on this system");
|
|
|
887953 |
#endif
|
|
|
887953 |
+ return;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+#if FUSE_KERNEL_MINOR_VERSION >= 11
|
|
|
887953 |
+/* Need this function for the signature (inode_t *, instead of uint64_t) */
|
|
|
887953 |
+static int32_t
|
|
|
887953 |
+fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode)
|
|
|
887953 |
+{
|
|
|
887953 |
+ fuse_invalidate_entry(this, (uint64_t)inode);
|
|
|
887953 |
+ return 0;
|
|
|
887953 |
+}
|
|
|
887953 |
+#endif
|
|
|
887953 |
+
|
|
|
887953 |
|
|
|
887953 |
int
|
|
|
887953 |
send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error)
|
|
|
887953 |
@@ -686,11 +699,14 @@ do_forget(xlator_t *this, uint64_t unique, uint64_t nodeid, uint64_t nlookup)
|
|
|
887953 |
{
|
|
|
887953 |
inode_t *fuse_inode = fuse_ino_to_inode(nodeid, this);
|
|
|
887953 |
|
|
|
887953 |
+ gf_log("fuse", GF_LOG_TRACE,
|
|
|
887953 |
+ "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64 " gfid: (%s)", unique,
|
|
|
887953 |
+ nodeid, nlookup, uuid_utoa(fuse_inode->gfid));
|
|
|
887953 |
+
|
|
|
887953 |
fuse_log_eh(this, "%"PRIu64": FORGET %"PRIu64"/%"PRIu64" gfid: (%s)",
|
|
|
887953 |
unique, nodeid, nlookup, uuid_utoa(fuse_inode->gfid));
|
|
|
887953 |
|
|
|
887953 |
- inode_forget(fuse_inode, nlookup);
|
|
|
887953 |
- inode_unref(fuse_inode);
|
|
|
887953 |
+ inode_forget_with_unref(fuse_inode, nlookup);
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
static void
|
|
|
887953 |
@@ -705,10 +721,6 @@ fuse_forget (xlator_t *this, fuse_in_header_t *finh, void *msg,
|
|
|
887953 |
return;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
|
|
|
887953 |
- "%"PRIu64": FORGET %"PRIu64"/%"PRIu64,
|
|
|
887953 |
- finh->unique, finh->nodeid, ffi->nlookup);
|
|
|
887953 |
-
|
|
|
887953 |
do_forget(this, finh->unique, finh->nodeid, ffi->nlookup);
|
|
|
887953 |
|
|
|
887953 |
GF_FREE (finh);
|
|
|
887953 |
@@ -4940,7 +4952,9 @@ fuse_thread_proc (void *data)
|
|
|
887953 |
fuse_in_header_t *finh = NULL;
|
|
|
887953 |
struct iovec iov_in[2];
|
|
|
887953 |
void *msg = NULL;
|
|
|
887953 |
- const size_t msg0_size = sizeof (*finh) + 128;
|
|
|
887953 |
+ /* we need 512 extra buffer size for BATCH_FORGET fop. By tests, it is
|
|
|
887953 |
+ found to be reduces 'REALLOC()' in the loop */
|
|
|
887953 |
+ const size_t msg0_size = sizeof (*finh) + 512;
|
|
|
887953 |
fuse_handler_t **fuse_ops = NULL;
|
|
|
887953 |
struct pollfd pfd[2] = {{0,}};
|
|
|
887953 |
|
|
|
887953 |
@@ -5283,7 +5297,12 @@ fuse_graph_setup (xlator_t *this, glusterfs_graph_t *graph)
|
|
|
887953 |
goto unlock;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- itable = inode_table_new (0, graph->top);
|
|
|
887953 |
+#if FUSE_KERNEL_MINOR_VERSION >= 11
|
|
|
887953 |
+ itable = inode_table_with_invalidator(priv->lru_limit, graph->top,
|
|
|
887953 |
+ fuse_inode_invalidate_fn, this);
|
|
|
887953 |
+#else
|
|
|
887953 |
+ itable = inode_table_new(0, graph->top);
|
|
|
887953 |
+#endif
|
|
|
887953 |
if (!itable) {
|
|
|
887953 |
ret = -1;
|
|
|
887953 |
goto unlock;
|
|
|
887953 |
@@ -5740,6 +5759,8 @@ init (xlator_t *this_xl)
|
|
|
887953 |
}
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+ GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit);
|
|
|
887953 |
+
|
|
|
887953 |
GF_OPTION_INIT("event-history", priv->event_history, bool,
|
|
|
887953 |
cleanup_exit);
|
|
|
887953 |
|
|
|
887953 |
@@ -6061,5 +6082,13 @@ struct volume_options options[] = {
|
|
|
887953 |
.max = 64,
|
|
|
887953 |
.description = "Sets fuse reader thread count.",
|
|
|
887953 |
},
|
|
|
887953 |
+ {
|
|
|
887953 |
+ .key = {"lru-limit"},
|
|
|
887953 |
+ .type = GF_OPTION_TYPE_INT,
|
|
|
887953 |
+ .default_value = "131072",
|
|
|
887953 |
+ .min = 0,
|
|
|
887953 |
+ .description = "makes glusterfs invalidate kernel inodes after "
|
|
|
887953 |
+ "reaching this limit (0 means 'unlimited')",
|
|
|
887953 |
+ },
|
|
|
887953 |
{ .key = {NULL} },
|
|
|
887953 |
};
|
|
|
887953 |
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
|
|
|
887953 |
index 4ca76e9..4e32a7f 100644
|
|
|
887953 |
--- a/xlators/mount/fuse/src/fuse-bridge.h
|
|
|
887953 |
+++ b/xlators/mount/fuse/src/fuse-bridge.h
|
|
|
887953 |
@@ -144,6 +144,9 @@ struct fuse_private {
|
|
|
887953 |
gf_boolean_t mount_finished;
|
|
|
887953 |
gf_boolean_t handle_graph_switch;
|
|
|
887953 |
pthread_cond_t migrate_cond;
|
|
|
887953 |
+
|
|
|
887953 |
+ /* LRU Limit, if not set, default is 128k for now */
|
|
|
887953 |
+ uint32_t lru_limit;
|
|
|
887953 |
};
|
|
|
887953 |
typedef struct fuse_private fuse_private_t;
|
|
|
887953 |
|
|
|
887953 |
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
|
|
|
887953 |
index 817619e..9a0404f 100755
|
|
|
887953 |
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
|
|
|
887953 |
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
|
|
|
887953 |
@@ -245,6 +245,10 @@ start_glusterfs ()
|
|
|
887953 |
cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout");
|
|
|
887953 |
fi
|
|
|
887953 |
|
|
|
887953 |
+ if [ -n "$lru_limit" ]; then
|
|
|
887953 |
+ cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit");
|
|
|
887953 |
+ fi
|
|
|
887953 |
+
|
|
|
887953 |
if [ -n "$bg_qlen" ]; then
|
|
|
887953 |
cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
|
|
|
887953 |
fi
|
|
|
887953 |
@@ -467,6 +471,9 @@ with_options()
|
|
|
887953 |
"gid-timeout")
|
|
|
887953 |
gid_timeout=$value
|
|
|
887953 |
;;
|
|
|
887953 |
+ "lru-limit")
|
|
|
887953 |
+ lru_limit=$value
|
|
|
887953 |
+ ;;
|
|
|
887953 |
"background-qlen")
|
|
|
887953 |
bg_qlen=$value
|
|
|
887953 |
;;
|
|
|
887953 |
--
|
|
|
887953 |
1.8.3.1
|
|
|
887953 |
|