74b1de
From ddb0038de77a4269fa7eed1bb217bfb6bed1b7ba Mon Sep 17 00:00:00 2001
74b1de
From: N Balachandran <nbalacha@redhat.com>
74b1de
Date: Fri, 9 Aug 2019 14:34:22 +0530
74b1de
Subject: [PATCH 337/344] fuse: Set limit on invalidate queue size
74b1de
74b1de
If the glusterfs fuse client process is unable to
74b1de
process the invalidate requests quickly enough, the
74b1de
number of such requests quickly grows large enough
74b1de
to use a significant amount of memory.
74b1de
We are now introducing another option to set an upper
74b1de
limit on these to prevent runaway memory usage.
74b1de
74b1de
> Upstream https://review.gluster.org/23187
74b1de
> Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788
74b1de
> Fixes: bz#1732717
74b1de
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
74b1de
74b1de
BUG: 1763208
74b1de
Change-Id: I666cdf6c70999a0f0bc79969e8df0a9dde93b6e4
74b1de
Signed-off-by: Csaba Henk <csaba@redhat.com>
74b1de
Reviewed-on: https://code.engineering.redhat.com/gerrit/187529
74b1de
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74b1de
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74b1de
---
74b1de
 doc/mount.glusterfs.8                       |  5 +++
74b1de
 glusterfsd/src/glusterfsd.c                 | 21 ++++++++++
74b1de
 glusterfsd/src/glusterfsd.h                 |  3 +-
74b1de
 libglusterfs/src/glusterfs/glusterfs.h      |  1 +
74b1de
 libglusterfs/src/glusterfs/inode.h          |  1 +
74b1de
 libglusterfs/src/inode.c                    | 31 +++++++++++----
74b1de
 xlators/mount/fuse/src/fuse-bridge.c        | 60 ++++++++++++++++++++++-------
74b1de
 xlators/mount/fuse/src/fuse-bridge.h        |  3 +-
74b1de
 xlators/mount/fuse/utils/mount.glusterfs.in |  7 ++++
74b1de
 9 files changed, 108 insertions(+), 24 deletions(-)
74b1de
74b1de
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
74b1de
index 286631b..b35b362 100644
74b1de
--- a/doc/mount.glusterfs.8
74b1de
+++ b/doc/mount.glusterfs.8
74b1de
@@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None]
74b1de
 Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072]
74b1de
 .TP
74b1de
 .TP
74b1de
+\fBinvalidate-limit=\fRN
74b1de
+Suspend fuse invalidations implied by 'lru-limit' if  number of outstanding
74b1de
+invalidations reaches N
74b1de
+.TP
74b1de
+.TP
74b1de
 \fBbackground-qlen=\fRN
74b1de
 Set fuse module's background queue length to N [default: 64]
74b1de
 .TP
74b1de
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
74b1de
index 5b5e996..0856471 100644
74b1de
--- a/glusterfsd/src/glusterfsd.c
74b1de
+++ b/glusterfsd/src/glusterfsd.c
74b1de
@@ -212,6 +212,9 @@ static struct argp_option gf_options[] = {
74b1de
     {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
74b1de
      "Set fuse module's limit for number of inodes kept in LRU list to N "
74b1de
      "[default: 131072]"},
74b1de
+    {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
74b1de
+     "Suspend inode invalidations implied by 'lru-limit' if the number of "
74b1de
+     "outstanding invalidations reaches N"},
74b1de
     {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
74b1de
      "Set fuse module's background queue length to N "
74b1de
      "[default: 64]"},
74b1de
@@ -504,6 +507,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
74b1de
         }
74b1de
     }
74b1de
 
74b1de
+    if (cmd_args->invalidate_limit >= 0) {
74b1de
+        ret = dict_set_int32(options, "invalidate-limit",
74b1de
+                             cmd_args->invalidate_limit);
74b1de
+        if (ret < 0) {
74b1de
+            gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
74b1de
+                   "invalidate-limit");
74b1de
+            goto err;
74b1de
+        }
74b1de
+    }
74b1de
+
74b1de
     if (cmd_args->background_qlen) {
74b1de
         ret = dict_set_int32(options, "background-qlen",
74b1de
                              cmd_args->background_qlen);
74b1de
@@ -1283,6 +1296,14 @@ parse_opts(int key, char *arg, struct argp_state *state)
74b1de
             argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
74b1de
             break;
74b1de
 
74b1de
+        case ARGP_FUSE_INVALIDATE_LIMIT_KEY:
74b1de
+            if (!gf_string2int32(arg, &cmd_args->invalidate_limit))
74b1de
+                break;
74b1de
+
74b1de
+            argp_failure(state, -1, 0, "unknown invalidate limit option %s",
74b1de
+                         arg);
74b1de
+            break;
74b1de
+
74b1de
         case ARGP_FUSE_BACKGROUND_QLEN_KEY:
74b1de
             if (!gf_string2int(arg, &cmd_args->background_qlen))
74b1de
                 break;
74b1de
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
74b1de
index fa55789..ee655f0 100644
74b1de
--- a/glusterfsd/src/glusterfsd.h
74b1de
+++ b/glusterfsd/src/glusterfsd.h
74b1de
@@ -111,7 +111,8 @@ enum argp_option_keys {
74b1de
     ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189,
74b1de
     ARGP_FUSE_LRU_LIMIT_KEY = 190,
74b1de
     ARGP_FUSE_AUTO_INVAL_KEY = 191,
74b1de
-    ARGP_BRICK_MUX_KEY = 192
74b1de
+    ARGP_BRICK_MUX_KEY = 192,
74b1de
+    ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195,
74b1de
 };
74b1de
 
74b1de
 struct _gfd_vol_top_priv {
74b1de
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
74b1de
index 79c93ae..3b594c0 100644
74b1de
--- a/libglusterfs/src/glusterfs/glusterfs.h
74b1de
+++ b/libglusterfs/src/glusterfs/glusterfs.h
74b1de
@@ -541,6 +541,7 @@ struct _cmd_args {
74b1de
     int client_pid_set;
74b1de
     unsigned uid_map_root;
74b1de
     int32_t lru_limit;
74b1de
+    int32_t invalidate_limit;
74b1de
     int background_qlen;
74b1de
     int congestion_threshold;
74b1de
     char *fuse_mountopts;
74b1de
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
74b1de
index 52efdd8..4421c47 100644
74b1de
--- a/libglusterfs/src/glusterfs/inode.h
74b1de
+++ b/libglusterfs/src/glusterfs/inode.h
74b1de
@@ -107,6 +107,7 @@ struct _inode {
74b1de
     struct list_head list;        /* active/lru/purge */
74b1de
 
74b1de
     struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
74b1de
+    bool in_invalidate_list; /* Set if inode is in table invalidate list */
74b1de
     bool invalidate_sent;    /* Set it if invalidator_fn is called for inode */
74b1de
 };
74b1de
 
74b1de
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
74b1de
index 96ddea5..5331e93 100644
74b1de
--- a/libglusterfs/src/inode.c
74b1de
+++ b/libglusterfs/src/inode.c
74b1de
@@ -558,8 +558,8 @@ __inode_unref(inode_t *inode, bool clear)
74b1de
 
74b1de
     this = THIS;
74b1de
 
74b1de
-    if (clear && inode->invalidate_sent) {
74b1de
-        inode->invalidate_sent = false;
74b1de
+    if (clear && inode->in_invalidate_list) {
74b1de
+        inode->in_invalidate_list = false;
74b1de
         inode->table->invalidate_size--;
74b1de
         __inode_activate(inode);
74b1de
     }
74b1de
@@ -573,7 +573,7 @@ __inode_unref(inode_t *inode, bool clear)
74b1de
         inode->_ctx[index].ref--;
74b1de
     }
74b1de
 
74b1de
-    if (!inode->ref && !inode->invalidate_sent) {
74b1de
+    if (!inode->ref && !inode->in_invalidate_list) {
74b1de
         inode->table->active_size--;
74b1de
 
74b1de
         nlookup = GF_ATOMIC_GET(inode->nlookup);
74b1de
@@ -609,14 +609,14 @@ __inode_ref(inode_t *inode, bool is_invalidate)
74b1de
         return inode;
74b1de
 
74b1de
     if (!inode->ref) {
74b1de
-        if (inode->invalidate_sent) {
74b1de
-            inode->invalidate_sent = false;
74b1de
+        if (inode->in_invalidate_list) {
74b1de
+            inode->in_invalidate_list = false;
74b1de
             inode->table->invalidate_size--;
74b1de
         } else {
74b1de
             inode->table->lru_size--;
74b1de
         }
74b1de
         if (is_invalidate) {
74b1de
-            inode->invalidate_sent = true;
74b1de
+            inode->in_invalidate_list = true;
74b1de
             inode->table->invalidate_size++;
74b1de
             list_move_tail(&inode->list, &inode->table->invalidate);
74b1de
         } else {
74b1de
@@ -1609,6 +1609,7 @@ static int
74b1de
 inode_table_prune(inode_table_t *table)
74b1de
 {
74b1de
     int ret = 0;
74b1de
+    int ret1 = 0;
74b1de
     struct list_head purge = {
74b1de
         0,
74b1de
     };
74b1de
@@ -1647,6 +1648,10 @@ inode_table_prune(inode_table_t *table)
74b1de
                 /* check for valid inode with 'nlookup' */
74b1de
                 nlookup = GF_ATOMIC_GET(entry->nlookup);
74b1de
                 if (nlookup) {
74b1de
+                    if (entry->invalidate_sent) {
74b1de
+                        list_move_tail(&entry->list, &table->lru);
74b1de
+                        continue;
74b1de
+                    }
74b1de
                     __inode_ref(entry, true);
74b1de
                     tmp = entry;
74b1de
                     break;
74b1de
@@ -1668,9 +1673,19 @@ inode_table_prune(inode_table_t *table)
74b1de
     if (tmp) {
74b1de
         xlator_t *old_THIS = THIS;
74b1de
         THIS = table->invalidator_xl;
74b1de
-        table->invalidator_fn(table->invalidator_xl, tmp);
74b1de
+        ret1 = table->invalidator_fn(table->invalidator_xl, tmp);
74b1de
         THIS = old_THIS;
74b1de
-        inode_unref(tmp);
74b1de
+        pthread_mutex_lock(&table->lock);
74b1de
+        {
74b1de
+            if (!ret1) {
74b1de
+                tmp->invalidate_sent = true;
74b1de
+                __inode_unref(tmp, false);
74b1de
+            } else {
74b1de
+                /* Move this back to the lru list*/
74b1de
+                __inode_unref(tmp, true);
74b1de
+            }
74b1de
+        }
74b1de
+        pthread_mutex_unlock(&table->lock);
74b1de
     }
74b1de
 
74b1de
     /* Just so that if purge list is handled too, then clear it off */
74b1de
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
74b1de
index 1c946a2..8b2e7f0 100644
74b1de
--- a/xlators/mount/fuse/src/fuse-bridge.c
74b1de
+++ b/xlators/mount/fuse/src/fuse-bridge.c
74b1de
@@ -26,7 +26,7 @@ static int gf_fuse_xattr_enotsup_log;
74b1de
 void
74b1de
 fini(xlator_t *this_xl);
74b1de
 
74b1de
-static void
74b1de
+static int32_t
74b1de
 fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
74b1de
 
74b1de
 /*
74b1de
@@ -312,7 +312,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
74b1de
 #define send_fuse_obj(this, finh, obj)                                         \
74b1de
     send_fuse_data(this, finh, obj, sizeof(*(obj)))
74b1de
 
74b1de
-static void
74b1de
+static int32_t
74b1de
 fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
74b1de
 {
74b1de
 #if FUSE_KERNEL_MINOR_VERSION >= 11
74b1de
@@ -328,17 +328,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
74b1de
 
74b1de
     priv = this->private;
74b1de
     if (!priv->reverse_fuse_thread_started)
74b1de
-        return;
74b1de
+        return -1;
74b1de
+
74b1de
+    if (priv->invalidate_limit &&
74b1de
+        (priv->invalidate_count >= priv->invalidate_limit)) {
74b1de
+        return -1;
74b1de
+    }
74b1de
 
74b1de
     inode = (inode_t *)(unsigned long)fuse_ino;
74b1de
     if (inode == NULL)
74b1de
-        return;
74b1de
+        return -1;
74b1de
 
74b1de
     list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list)
74b1de
     {
74b1de
         node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
74b1de
         if (node == NULL)
74b1de
-            break;
74b1de
+            return -1;
74b1de
 
74b1de
         INIT_LIST_HEAD(&node->next);
74b1de
 
74b1de
@@ -375,20 +380,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
74b1de
         pthread_mutex_lock(&priv->invalidate_mutex);
74b1de
         {
74b1de
             list_add_tail(&node->next, &priv->invalidate_list);
74b1de
+            priv->invalidate_count++;
74b1de
             pthread_cond_signal(&priv->invalidate_cond);
74b1de
         }
74b1de
         pthread_mutex_unlock(&priv->invalidate_mutex);
74b1de
     }
74b1de
 
74b1de
 #endif
74b1de
-    return;
74b1de
+    return 0;
74b1de
 }
74b1de
 
74b1de
 /*
74b1de
  * Send an inval inode notification to fuse. This causes an invalidation of the
74b1de
  * entire page cache mapping on the inode.
74b1de
  */
74b1de
-static void
74b1de
+static int32_t
74b1de
 fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
74b1de
 {
74b1de
 #if FUSE_KERNEL_MINOR_VERSION >= 11
74b1de
@@ -401,15 +407,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
74b1de
     priv = this->private;
74b1de
 
74b1de
     if (!priv->reverse_fuse_thread_started)
74b1de
-        return;
74b1de
+        return -1;
74b1de
+
74b1de
+    if (priv->invalidate_limit &&
74b1de
+        (priv->invalidate_count >= priv->invalidate_limit)) {
74b1de
+        return -1;
74b1de
+    }
74b1de
 
74b1de
     inode = (inode_t *)(unsigned long)fuse_ino;
74b1de
     if (inode == NULL)
74b1de
-        return;
74b1de
+        return -1;
74b1de
 
74b1de
     node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
74b1de
     if (node == NULL)
74b1de
-        return;
74b1de
+        return -1;
74b1de
 
74b1de
     INIT_LIST_HEAD(&node->next);
74b1de
 
74b1de
@@ -435,6 +446,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
74b1de
     pthread_mutex_lock(&priv->invalidate_mutex);
74b1de
     {
74b1de
         list_add_tail(&node->next, &priv->invalidate_list);
74b1de
+        priv->invalidate_count++;
74b1de
         pthread_cond_signal(&priv->invalidate_cond);
74b1de
     }
74b1de
     pthread_mutex_unlock(&priv->invalidate_mutex);
74b1de
@@ -443,7 +455,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
74b1de
     gf_log("glusterfs-fuse", GF_LOG_WARNING,
74b1de
            "fuse_invalidate_inode not implemented on this system");
74b1de
 #endif
74b1de
-    return;
74b1de
+    return 0;
74b1de
 }
74b1de
 
74b1de
 #if FUSE_KERNEL_MINOR_VERSION >= 11
74b1de
@@ -451,8 +463,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
74b1de
 static int32_t
74b1de
 fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode)
74b1de
 {
74b1de
-    fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
74b1de
-    return 0;
74b1de
+    int32_t ret = 0;
74b1de
+    ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
74b1de
+    return ret;
74b1de
 }
74b1de
 #endif
74b1de
 
74b1de
@@ -4003,7 +4016,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
74b1de
         gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64,
74b1de
                finh->nodeid);
74b1de
 #if FUSE_KERNEL_MINOR_VERSION >= 11
74b1de
-        fuse_invalidate_entry(this, finh->nodeid);
74b1de
+        ret = fuse_invalidate_entry(this, finh->nodeid);
74b1de
+        if (ret)
74b1de
+            op_errno = EBUSY;
74b1de
 #endif
74b1de
         goto done;
74b1de
     }
74b1de
@@ -4812,6 +4827,7 @@ notify_kernel_loop(void *data)
74b1de
                               fuse_invalidate_node_t, next);
74b1de
 
74b1de
             list_del_init(&node->next);
74b1de
+            priv->invalidate_count--;
74b1de
         }
74b1de
         pthread_mutex_unlock(&priv->invalidate_mutex);
74b1de
 
74b1de
@@ -4855,6 +4871,7 @@ notify_kernel_loop(void *data)
74b1de
             list_del_init(&node->next);
74b1de
             GF_FREE(node);
74b1de
         }
74b1de
+        priv->invalidate_count = 0;
74b1de
     }
74b1de
     pthread_mutex_unlock(&priv->invalidate_mutex);
74b1de
 
74b1de
@@ -6080,6 +6097,9 @@ fuse_priv_dump(xlator_t *this)
74b1de
                        (int)private->timed_response_fuse_thread_started);
74b1de
     gf_proc_dump_write("reverse_thread_started", "%d",
74b1de
                        (int)private->reverse_fuse_thread_started);
74b1de
+    gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit);
74b1de
+    gf_proc_dump_write("invalidate_queue_length", "%" PRIu64,
74b1de
+                       private->invalidate_count);
74b1de
     gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp);
74b1de
 
74b1de
     return 0;
74b1de
@@ -6619,6 +6639,9 @@ init(xlator_t *this_xl)
74b1de
 
74b1de
     GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit);
74b1de
 
74b1de
+    GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32,
74b1de
+                   cleanup_exit);
74b1de
+
74b1de
     GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit);
74b1de
 
74b1de
     GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit);
74b1de
@@ -6955,6 +6978,15 @@ struct volume_options options[] = {
74b1de
                        "reaching this limit (0 means 'unlimited')",
74b1de
     },
74b1de
     {
74b1de
+        .key = {"invalidate-limit"},
74b1de
+        .type = GF_OPTION_TYPE_INT,
74b1de
+        .default_value = "0",
74b1de
+        .min = 0,
74b1de
+        .description = "suspend invalidations as of 'lru-limit' if the number "
74b1de
+                       "of outstanding invalidations reaches this limit "
74b1de
+                       "(0 means 'unlimited')",
74b1de
+    },
74b1de
+    {
74b1de
         .key = {"auto-invalidation"},
74b1de
         .type = GF_OPTION_TYPE_BOOL,
74b1de
         .default_value = "true",
74b1de
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
74b1de
index 697bd88..2311582 100644
74b1de
--- a/xlators/mount/fuse/src/fuse-bridge.h
74b1de
+++ b/xlators/mount/fuse/src/fuse-bridge.h
74b1de
@@ -139,7 +139,7 @@ struct fuse_private {
74b1de
     pthread_cond_t invalidate_cond;
74b1de
     pthread_mutex_t invalidate_mutex;
74b1de
     gf_boolean_t reverse_fuse_thread_started;
74b1de
-
74b1de
+    uint64_t invalidate_count;
74b1de
     /* For communicating with separate mount thread. */
74b1de
     int status_pipe[2];
74b1de
 
74b1de
@@ -191,6 +191,7 @@ struct fuse_private {
74b1de
 
74b1de
     /* LRU Limit, if not set, default is 128k for now */
74b1de
     uint32_t lru_limit;
74b1de
+    uint32_t invalidate_limit;
74b1de
 };
74b1de
 typedef struct fuse_private fuse_private_t;
74b1de
 
74b1de
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
74b1de
index cbde42d..61d7422 100755
74b1de
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
74b1de
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
74b1de
@@ -257,6 +257,10 @@ start_glusterfs ()
74b1de
         cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit");
74b1de
     fi
74b1de
 
74b1de
+    if [ -n "$invalidate_limit" ]; then
74b1de
+        cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit");
74b1de
+    fi
74b1de
+
74b1de
     if [ -n "$bg_qlen" ]; then
74b1de
         cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
74b1de
     fi
74b1de
@@ -505,6 +509,9 @@ with_options()
74b1de
         "lru-limit")
74b1de
             lru_limit=$value
74b1de
             ;;
74b1de
+        "invalidate-limit")
74b1de
+            invalidate_limit=$value
74b1de
+            ;;
74b1de
         "background-qlen")
74b1de
             bg_qlen=$value
74b1de
             ;;
74b1de
-- 
74b1de
1.8.3.1
74b1de