From ddb0038de77a4269fa7eed1bb217bfb6bed1b7ba Mon Sep 17 00:00:00 2001 From: N Balachandran Date: Fri, 9 Aug 2019 14:34:22 +0530 Subject: [PATCH 337/344] fuse: Set limit on invalidate queue size If the glusterfs fuse client process is unable to process the invalidate requests quickly enough, the number of such requests quickly grows large enough to use a significant amount of memory. We are now introducing another option to set an upper limit on these to prevent runaway memory usage. > Upstream https://review.gluster.org/23187 > Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788 > Fixes: bz#1732717 > Signed-off-by: N Balachandran BUG: 1763208 Change-Id: I666cdf6c70999a0f0bc79969e8df0a9dde93b6e4 Signed-off-by: Csaba Henk Reviewed-on: https://code.engineering.redhat.com/gerrit/187529 Tested-by: RHGS Build Bot Reviewed-by: Sunil Kumar Heggodu Gopala Acharya --- doc/mount.glusterfs.8 | 5 +++ glusterfsd/src/glusterfsd.c | 21 ++++++++++ glusterfsd/src/glusterfsd.h | 3 +- libglusterfs/src/glusterfs/glusterfs.h | 1 + libglusterfs/src/glusterfs/inode.h | 1 + libglusterfs/src/inode.c | 31 +++++++++++---- xlators/mount/fuse/src/fuse-bridge.c | 60 ++++++++++++++++++++++------- xlators/mount/fuse/src/fuse-bridge.h | 3 +- xlators/mount/fuse/utils/mount.glusterfs.in | 7 ++++ 9 files changed, 108 insertions(+), 24 deletions(-) diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 index 286631b..b35b362 100644 --- a/doc/mount.glusterfs.8 +++ b/doc/mount.glusterfs.8 @@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None] Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072] .TP .TP +\fBinvalidate-limit=\fRN +Suspend fuse invalidations implied by 'lru-limit' if number of outstanding +invalidations reaches N +.TP +.TP \fBbackground-qlen=\fRN Set fuse module's background queue length to N [default: 64] .TP diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index 5b5e996..0856471 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -212,6 +212,9 @@ static struct argp_option gf_options[] = { {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, "Set fuse module's limit for number of inodes kept in LRU list to N " "[default: 131072]"}, + {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0, + "Suspend inode invalidations implied by 'lru-limit' if the number of " + "outstanding invalidations reaches N"}, {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0, "Set fuse module's background queue length to N " "[default: 64]"}, @@ -504,6 +507,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options) } } + if (cmd_args->invalidate_limit >= 0) { + ret = dict_set_int32(options, "invalidate-limit", + cmd_args->invalidate_limit); + if (ret < 0) { + gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, + "invalidate-limit"); + goto err; + } + } + if (cmd_args->background_qlen) { ret = dict_set_int32(options, "background-qlen", cmd_args->background_qlen); @@ -1283,6 +1296,14 @@ parse_opts(int key, char *arg, struct argp_state *state) argp_failure(state, -1, 0, "unknown LRU limit option %s", arg); break; + case ARGP_FUSE_INVALIDATE_LIMIT_KEY: + if (!gf_string2int32(arg, &cmd_args->invalidate_limit)) + break; + + argp_failure(state, -1, 0, "unknown invalidate limit option %s", + arg); + break; + case ARGP_FUSE_BACKGROUND_QLEN_KEY: if (!gf_string2int(arg, &cmd_args->background_qlen)) break; diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h index fa55789..ee655f0 100644 --- a/glusterfsd/src/glusterfsd.h +++ b/glusterfsd/src/glusterfsd.h @@ -111,7 +111,8 @@ enum argp_option_keys { ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189, ARGP_FUSE_LRU_LIMIT_KEY = 190, ARGP_FUSE_AUTO_INVAL_KEY = 191, - ARGP_BRICK_MUX_KEY = 192 + ARGP_BRICK_MUX_KEY = 192, + ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195, }; struct _gfd_vol_top_priv { diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h index 79c93ae..3b594c0 100644 --- a/libglusterfs/src/glusterfs/glusterfs.h +++ b/libglusterfs/src/glusterfs/glusterfs.h @@ -541,6 +541,7 @@ struct _cmd_args { int client_pid_set; unsigned uid_map_root; int32_t lru_limit; + int32_t invalidate_limit; int background_qlen; int congestion_threshold; char *fuse_mountopts; diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h index 52efdd8..4421c47 100644 --- a/libglusterfs/src/glusterfs/inode.h +++ b/libglusterfs/src/glusterfs/inode.h @@ -107,6 +107,7 @@ struct _inode { struct list_head list; /* active/lru/purge */ struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ + bool in_invalidate_list; /* Set if inode is in table invalidate list */ bool invalidate_sent; /* Set it if invalidator_fn is called for inode */ }; diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 96ddea5..5331e93 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -558,8 +558,8 @@ __inode_unref(inode_t *inode, bool clear) this = THIS; - if (clear && inode->invalidate_sent) { - inode->invalidate_sent = false; + if (clear && inode->in_invalidate_list) { + inode->in_invalidate_list = false; inode->table->invalidate_size--; __inode_activate(inode); } @@ -573,7 +573,7 @@ __inode_unref(inode_t *inode, bool clear) inode->_ctx[index].ref--; } - if (!inode->ref && !inode->invalidate_sent) { + if (!inode->ref && !inode->in_invalidate_list) { inode->table->active_size--; nlookup = GF_ATOMIC_GET(inode->nlookup); @@ -609,14 +609,14 @@ __inode_ref(inode_t *inode, bool is_invalidate) return inode; if (!inode->ref) { - if (inode->invalidate_sent) { - inode->invalidate_sent = false; + if (inode->in_invalidate_list) { + inode->in_invalidate_list = false; inode->table->invalidate_size--; } else { inode->table->lru_size--; } if (is_invalidate) { - inode->invalidate_sent = true; + inode->in_invalidate_list = true; inode->table->invalidate_size++; list_move_tail(&inode->list, &inode->table->invalidate); } else { @@ -1609,6 +1609,7 @@ static int inode_table_prune(inode_table_t *table) { int ret = 0; + int ret1 = 0; struct list_head purge = { 0, }; @@ -1647,6 +1648,10 @@ inode_table_prune(inode_table_t *table) /* check for valid inode with 'nlookup' */ nlookup = GF_ATOMIC_GET(entry->nlookup); if (nlookup) { + if (entry->invalidate_sent) { + list_move_tail(&entry->list, &table->lru); + continue; + } __inode_ref(entry, true); tmp = entry; break; @@ -1668,9 +1673,19 @@ inode_table_prune(inode_table_t *table) if (tmp) { xlator_t *old_THIS = THIS; THIS = table->invalidator_xl; - table->invalidator_fn(table->invalidator_xl, tmp); + ret1 = table->invalidator_fn(table->invalidator_xl, tmp); THIS = old_THIS; - inode_unref(tmp); + pthread_mutex_lock(&table->lock); + { + if (!ret1) { + tmp->invalidate_sent = true; + __inode_unref(tmp, false); + } else { + /* Move this back to the lru list*/ + __inode_unref(tmp, true); + } + } + pthread_mutex_unlock(&table->lock); } /* Just so that if purge list is handled too, then clear it off */ diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 1c946a2..8b2e7f0 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -26,7 +26,7 @@ static int gf_fuse_xattr_enotsup_log; void fini(xlator_t *this_xl); -static void +static int32_t fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino); /* @@ -312,7 +312,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) #define send_fuse_obj(this, finh, obj) \ send_fuse_data(this, finh, obj, sizeof(*(obj))) -static void +static int32_t fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) { #if FUSE_KERNEL_MINOR_VERSION >= 11 @@ -328,17 +328,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) priv = this->private; if (!priv->reverse_fuse_thread_started) - return; + return -1; + + if (priv->invalidate_limit && + (priv->invalidate_count >= priv->invalidate_limit)) { + return -1; + } inode = (inode_t *)(unsigned long)fuse_ino; if (inode == NULL) - return; + return -1; list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list) { node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); if (node == NULL) - break; + return -1; INIT_LIST_HEAD(&node->next); @@ -375,20 +380,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) pthread_mutex_lock(&priv->invalidate_mutex); { list_add_tail(&node->next, &priv->invalidate_list); + priv->invalidate_count++; pthread_cond_signal(&priv->invalidate_cond); } pthread_mutex_unlock(&priv->invalidate_mutex); } #endif - return; + return 0; } /* * Send an inval inode notification to fuse. This causes an invalidation of the * entire page cache mapping on the inode. */ -static void +static int32_t fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) { #if FUSE_KERNEL_MINOR_VERSION >= 11 @@ -401,15 +407,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) priv = this->private; if (!priv->reverse_fuse_thread_started) - return; + return -1; + + if (priv->invalidate_limit && + (priv->invalidate_count >= priv->invalidate_limit)) { + return -1; + } inode = (inode_t *)(unsigned long)fuse_ino; if (inode == NULL) - return; + return -1; node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); if (node == NULL) - return; + return -1; INIT_LIST_HEAD(&node->next); @@ -435,6 +446,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) pthread_mutex_lock(&priv->invalidate_mutex); { list_add_tail(&node->next, &priv->invalidate_list); + priv->invalidate_count++; pthread_cond_signal(&priv->invalidate_cond); } pthread_mutex_unlock(&priv->invalidate_mutex); @@ -443,7 +455,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) gf_log("glusterfs-fuse", GF_LOG_WARNING, "fuse_invalidate_inode not implemented on this system"); #endif - return; + return 0; } #if FUSE_KERNEL_MINOR_VERSION >= 11 @@ -451,8 +463,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) static int32_t fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) { - fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode); - return 0; + int32_t ret = 0; + ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode); + return ret; } #endif @@ -4003,7 +4016,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64, finh->nodeid); #if FUSE_KERNEL_MINOR_VERSION >= 11 - fuse_invalidate_entry(this, finh->nodeid); + ret = fuse_invalidate_entry(this, finh->nodeid); + if (ret) + op_errno = EBUSY; #endif goto done; } @@ -4812,6 +4827,7 @@ notify_kernel_loop(void *data) fuse_invalidate_node_t, next); list_del_init(&node->next); + priv->invalidate_count--; } pthread_mutex_unlock(&priv->invalidate_mutex); @@ -4855,6 +4871,7 @@ notify_kernel_loop(void *data) list_del_init(&node->next); GF_FREE(node); } + priv->invalidate_count = 0; } pthread_mutex_unlock(&priv->invalidate_mutex); @@ -6080,6 +6097,9 @@ fuse_priv_dump(xlator_t *this) (int)private->timed_response_fuse_thread_started); gf_proc_dump_write("reverse_thread_started", "%d", (int)private->reverse_fuse_thread_started); + gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit); + gf_proc_dump_write("invalidate_queue_length", "%" PRIu64, + private->invalidate_count); gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp); return 0; @@ -6619,6 +6639,9 @@ init(xlator_t *this_xl) GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit); + GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32, + cleanup_exit); + GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit); GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit); @@ -6955,6 +6978,15 @@ struct volume_options options[] = { "reaching this limit (0 means 'unlimited')", }, { + .key = {"invalidate-limit"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .min = 0, + .description = "suspend invalidations as of 'lru-limit' if the number " + "of outstanding invalidations reaches this limit " + "(0 means 'unlimited')", + }, + { .key = {"auto-invalidation"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "true", diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index 697bd88..2311582 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -139,7 +139,7 @@ struct fuse_private { pthread_cond_t invalidate_cond; pthread_mutex_t invalidate_mutex; gf_boolean_t reverse_fuse_thread_started; - + uint64_t invalidate_count; /* For communicating with separate mount thread. */ int status_pipe[2]; @@ -191,6 +191,7 @@ struct fuse_private { /* LRU Limit, if not set, default is 128k for now */ uint32_t lru_limit; + uint32_t invalidate_limit; }; typedef struct fuse_private fuse_private_t; diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index cbde42d..61d7422 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -257,6 +257,10 @@ start_glusterfs () cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit"); fi + if [ -n "$invalidate_limit" ]; then + cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit"); + fi + if [ -n "$bg_qlen" ]; then cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); fi @@ -505,6 +509,9 @@ with_options() "lru-limit") lru_limit=$value ;; + "invalidate-limit") + invalidate_limit=$value + ;; "background-qlen") bg_qlen=$value ;; -- 1.8.3.1