17b94a
From ddb0038de77a4269fa7eed1bb217bfb6bed1b7ba Mon Sep 17 00:00:00 2001
17b94a
From: N Balachandran <nbalacha@redhat.com>
17b94a
Date: Fri, 9 Aug 2019 14:34:22 +0530
17b94a
Subject: [PATCH 337/344] fuse: Set limit on invalidate queue size
17b94a
17b94a
If the glusterfs fuse client process is unable to
17b94a
process the invalidate requests quickly enough, the
17b94a
number of such requests quickly grows large enough
17b94a
to use a significant amount of memory.
17b94a
We are now introducing another option to set an upper
17b94a
limit on these to prevent runaway memory usage.
17b94a
17b94a
> Upstream https://review.gluster.org/23187
17b94a
> Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788
17b94a
> Fixes: bz#1732717
17b94a
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
17b94a
17b94a
BUG: 1763208
17b94a
Change-Id: I666cdf6c70999a0f0bc79969e8df0a9dde93b6e4
17b94a
Signed-off-by: Csaba Henk <csaba@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/187529
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
17b94a
---
17b94a
 doc/mount.glusterfs.8                       |  5 +++
17b94a
 glusterfsd/src/glusterfsd.c                 | 21 ++++++++++
17b94a
 glusterfsd/src/glusterfsd.h                 |  3 +-
17b94a
 libglusterfs/src/glusterfs/glusterfs.h      |  1 +
17b94a
 libglusterfs/src/glusterfs/inode.h          |  1 +
17b94a
 libglusterfs/src/inode.c                    | 31 +++++++++++----
17b94a
 xlators/mount/fuse/src/fuse-bridge.c        | 60 ++++++++++++++++++++++-------
17b94a
 xlators/mount/fuse/src/fuse-bridge.h        |  3 +-
17b94a
 xlators/mount/fuse/utils/mount.glusterfs.in |  7 ++++
17b94a
 9 files changed, 108 insertions(+), 24 deletions(-)
17b94a
17b94a
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
17b94a
index 286631b..b35b362 100644
17b94a
--- a/doc/mount.glusterfs.8
17b94a
+++ b/doc/mount.glusterfs.8
17b94a
@@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None]
17b94a
 Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072]
17b94a
 .TP
17b94a
 .TP
17b94a
+\fBinvalidate-limit=\fRN
17b94a
+Suspend fuse invalidations implied by 'lru-limit' if  number of outstanding
17b94a
+invalidations reaches N
17b94a
+.TP
17b94a
+.TP
17b94a
 \fBbackground-qlen=\fRN
17b94a
 Set fuse module's background queue length to N [default: 64]
17b94a
 .TP
17b94a
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
17b94a
index 5b5e996..0856471 100644
17b94a
--- a/glusterfsd/src/glusterfsd.c
17b94a
+++ b/glusterfsd/src/glusterfsd.c
17b94a
@@ -212,6 +212,9 @@ static struct argp_option gf_options[] = {
17b94a
     {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
17b94a
      "Set fuse module's limit for number of inodes kept in LRU list to N "
17b94a
      "[default: 131072]"},
17b94a
+    {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
17b94a
+     "Suspend inode invalidations implied by 'lru-limit' if the number of "
17b94a
+     "outstanding invalidations reaches N"},
17b94a
     {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
17b94a
      "Set fuse module's background queue length to N "
17b94a
      "[default: 64]"},
17b94a
@@ -504,6 +507,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
17b94a
         }
17b94a
     }
17b94a
 
17b94a
+    if (cmd_args->invalidate_limit >= 0) {
17b94a
+        ret = dict_set_int32(options, "invalidate-limit",
17b94a
+                             cmd_args->invalidate_limit);
17b94a
+        if (ret < 0) {
17b94a
+            gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
17b94a
+                   "invalidate-limit");
17b94a
+            goto err;
17b94a
+        }
17b94a
+    }
17b94a
+
17b94a
     if (cmd_args->background_qlen) {
17b94a
         ret = dict_set_int32(options, "background-qlen",
17b94a
                              cmd_args->background_qlen);
17b94a
@@ -1283,6 +1296,14 @@ parse_opts(int key, char *arg, struct argp_state *state)
17b94a
             argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
17b94a
             break;
17b94a
 
17b94a
+        case ARGP_FUSE_INVALIDATE_LIMIT_KEY:
17b94a
+            if (!gf_string2int32(arg, &cmd_args->invalidate_limit))
17b94a
+                break;
17b94a
+
17b94a
+            argp_failure(state, -1, 0, "unknown invalidate limit option %s",
17b94a
+                         arg);
17b94a
+            break;
17b94a
+
17b94a
         case ARGP_FUSE_BACKGROUND_QLEN_KEY:
17b94a
             if (!gf_string2int(arg, &cmd_args->background_qlen))
17b94a
                 break;
17b94a
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
17b94a
index fa55789..ee655f0 100644
17b94a
--- a/glusterfsd/src/glusterfsd.h
17b94a
+++ b/glusterfsd/src/glusterfsd.h
17b94a
@@ -111,7 +111,8 @@ enum argp_option_keys {
17b94a
     ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189,
17b94a
     ARGP_FUSE_LRU_LIMIT_KEY = 190,
17b94a
     ARGP_FUSE_AUTO_INVAL_KEY = 191,
17b94a
-    ARGP_BRICK_MUX_KEY = 192
17b94a
+    ARGP_BRICK_MUX_KEY = 192,
17b94a
+    ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195,
17b94a
 };
17b94a
 
17b94a
 struct _gfd_vol_top_priv {
17b94a
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
17b94a
index 79c93ae..3b594c0 100644
17b94a
--- a/libglusterfs/src/glusterfs/glusterfs.h
17b94a
+++ b/libglusterfs/src/glusterfs/glusterfs.h
17b94a
@@ -541,6 +541,7 @@ struct _cmd_args {
17b94a
     int client_pid_set;
17b94a
     unsigned uid_map_root;
17b94a
     int32_t lru_limit;
17b94a
+    int32_t invalidate_limit;
17b94a
     int background_qlen;
17b94a
     int congestion_threshold;
17b94a
     char *fuse_mountopts;
17b94a
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
17b94a
index 52efdd8..4421c47 100644
17b94a
--- a/libglusterfs/src/glusterfs/inode.h
17b94a
+++ b/libglusterfs/src/glusterfs/inode.h
17b94a
@@ -107,6 +107,7 @@ struct _inode {
17b94a
     struct list_head list;        /* active/lru/purge */
17b94a
 
17b94a
     struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
17b94a
+    bool in_invalidate_list; /* Set if inode is in table invalidate list */
17b94a
     bool invalidate_sent;    /* Set it if invalidator_fn is called for inode */
17b94a
 };
17b94a
 
17b94a
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
17b94a
index 96ddea5..5331e93 100644
17b94a
--- a/libglusterfs/src/inode.c
17b94a
+++ b/libglusterfs/src/inode.c
17b94a
@@ -558,8 +558,8 @@ __inode_unref(inode_t *inode, bool clear)
17b94a
 
17b94a
     this = THIS;
17b94a
 
17b94a
-    if (clear && inode->invalidate_sent) {
17b94a
-        inode->invalidate_sent = false;
17b94a
+    if (clear && inode->in_invalidate_list) {
17b94a
+        inode->in_invalidate_list = false;
17b94a
         inode->table->invalidate_size--;
17b94a
         __inode_activate(inode);
17b94a
     }
17b94a
@@ -573,7 +573,7 @@ __inode_unref(inode_t *inode, bool clear)
17b94a
         inode->_ctx[index].ref--;
17b94a
     }
17b94a
 
17b94a
-    if (!inode->ref && !inode->invalidate_sent) {
17b94a
+    if (!inode->ref && !inode->in_invalidate_list) {
17b94a
         inode->table->active_size--;
17b94a
 
17b94a
         nlookup = GF_ATOMIC_GET(inode->nlookup);
17b94a
@@ -609,14 +609,14 @@ __inode_ref(inode_t *inode, bool is_invalidate)
17b94a
         return inode;
17b94a
 
17b94a
     if (!inode->ref) {
17b94a
-        if (inode->invalidate_sent) {
17b94a
-            inode->invalidate_sent = false;
17b94a
+        if (inode->in_invalidate_list) {
17b94a
+            inode->in_invalidate_list = false;
17b94a
             inode->table->invalidate_size--;
17b94a
         } else {
17b94a
             inode->table->lru_size--;
17b94a
         }
17b94a
         if (is_invalidate) {
17b94a
-            inode->invalidate_sent = true;
17b94a
+            inode->in_invalidate_list = true;
17b94a
             inode->table->invalidate_size++;
17b94a
             list_move_tail(&inode->list, &inode->table->invalidate);
17b94a
         } else {
17b94a
@@ -1609,6 +1609,7 @@ static int
17b94a
 inode_table_prune(inode_table_t *table)
17b94a
 {
17b94a
     int ret = 0;
17b94a
+    int ret1 = 0;
17b94a
     struct list_head purge = {
17b94a
         0,
17b94a
     };
17b94a
@@ -1647,6 +1648,10 @@ inode_table_prune(inode_table_t *table)
17b94a
                 /* check for valid inode with 'nlookup' */
17b94a
                 nlookup = GF_ATOMIC_GET(entry->nlookup);
17b94a
                 if (nlookup) {
17b94a
+                    if (entry->invalidate_sent) {
17b94a
+                        list_move_tail(&entry->list, &table->lru);
17b94a
+                        continue;
17b94a
+                    }
17b94a
                     __inode_ref(entry, true);
17b94a
                     tmp = entry;
17b94a
                     break;
17b94a
@@ -1668,9 +1673,19 @@ inode_table_prune(inode_table_t *table)
17b94a
     if (tmp) {
17b94a
         xlator_t *old_THIS = THIS;
17b94a
         THIS = table->invalidator_xl;
17b94a
-        table->invalidator_fn(table->invalidator_xl, tmp);
17b94a
+        ret1 = table->invalidator_fn(table->invalidator_xl, tmp);
17b94a
         THIS = old_THIS;
17b94a
-        inode_unref(tmp);
17b94a
+        pthread_mutex_lock(&table->lock);
17b94a
+        {
17b94a
+            if (!ret1) {
17b94a
+                tmp->invalidate_sent = true;
17b94a
+                __inode_unref(tmp, false);
17b94a
+            } else {
17b94a
+                /* Move this back to the lru list*/
17b94a
+                __inode_unref(tmp, true);
17b94a
+            }
17b94a
+        }
17b94a
+        pthread_mutex_unlock(&table->lock);
17b94a
     }
17b94a
 
17b94a
     /* Just so that if purge list is handled too, then clear it off */
17b94a
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
17b94a
index 1c946a2..8b2e7f0 100644
17b94a
--- a/xlators/mount/fuse/src/fuse-bridge.c
17b94a
+++ b/xlators/mount/fuse/src/fuse-bridge.c
17b94a
@@ -26,7 +26,7 @@ static int gf_fuse_xattr_enotsup_log;
17b94a
 void
17b94a
 fini(xlator_t *this_xl);
17b94a
 
17b94a
-static void
17b94a
+static int32_t
17b94a
 fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
17b94a
 
17b94a
 /*
17b94a
@@ -312,7 +312,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
17b94a
 #define send_fuse_obj(this, finh, obj)                                         \
17b94a
     send_fuse_data(this, finh, obj, sizeof(*(obj)))
17b94a
 
17b94a
-static void
17b94a
+static int32_t
17b94a
 fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
17b94a
 {
17b94a
 #if FUSE_KERNEL_MINOR_VERSION >= 11
17b94a
@@ -328,17 +328,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
17b94a
 
17b94a
     priv = this->private;
17b94a
     if (!priv->reverse_fuse_thread_started)
17b94a
-        return;
17b94a
+        return -1;
17b94a
+
17b94a
+    if (priv->invalidate_limit &&
17b94a
+        (priv->invalidate_count >= priv->invalidate_limit)) {
17b94a
+        return -1;
17b94a
+    }
17b94a
 
17b94a
     inode = (inode_t *)(unsigned long)fuse_ino;
17b94a
     if (inode == NULL)
17b94a
-        return;
17b94a
+        return -1;
17b94a
 
17b94a
     list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list)
17b94a
     {
17b94a
         node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
17b94a
         if (node == NULL)
17b94a
-            break;
17b94a
+            return -1;
17b94a
 
17b94a
         INIT_LIST_HEAD(&node->next);
17b94a
 
17b94a
@@ -375,20 +380,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
17b94a
         pthread_mutex_lock(&priv->invalidate_mutex);
17b94a
         {
17b94a
             list_add_tail(&node->next, &priv->invalidate_list);
17b94a
+            priv->invalidate_count++;
17b94a
             pthread_cond_signal(&priv->invalidate_cond);
17b94a
         }
17b94a
         pthread_mutex_unlock(&priv->invalidate_mutex);
17b94a
     }
17b94a
 
17b94a
 #endif
17b94a
-    return;
17b94a
+    return 0;
17b94a
 }
17b94a
 
17b94a
 /*
17b94a
  * Send an inval inode notification to fuse. This causes an invalidation of the
17b94a
  * entire page cache mapping on the inode.
17b94a
  */
17b94a
-static void
17b94a
+static int32_t
17b94a
 fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
17b94a
 {
17b94a
 #if FUSE_KERNEL_MINOR_VERSION >= 11
17b94a
@@ -401,15 +407,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
17b94a
     priv = this->private;
17b94a
 
17b94a
     if (!priv->reverse_fuse_thread_started)
17b94a
-        return;
17b94a
+        return -1;
17b94a
+
17b94a
+    if (priv->invalidate_limit &&
17b94a
+        (priv->invalidate_count >= priv->invalidate_limit)) {
17b94a
+        return -1;
17b94a
+    }
17b94a
 
17b94a
     inode = (inode_t *)(unsigned long)fuse_ino;
17b94a
     if (inode == NULL)
17b94a
-        return;
17b94a
+        return -1;
17b94a
 
17b94a
     node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
17b94a
     if (node == NULL)
17b94a
-        return;
17b94a
+        return -1;
17b94a
 
17b94a
     INIT_LIST_HEAD(&node->next);
17b94a
 
17b94a
@@ -435,6 +446,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
17b94a
     pthread_mutex_lock(&priv->invalidate_mutex);
17b94a
     {
17b94a
         list_add_tail(&node->next, &priv->invalidate_list);
17b94a
+        priv->invalidate_count++;
17b94a
         pthread_cond_signal(&priv->invalidate_cond);
17b94a
     }
17b94a
     pthread_mutex_unlock(&priv->invalidate_mutex);
17b94a
@@ -443,7 +455,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
17b94a
     gf_log("glusterfs-fuse", GF_LOG_WARNING,
17b94a
            "fuse_invalidate_inode not implemented on this system");
17b94a
 #endif
17b94a
-    return;
17b94a
+    return 0;
17b94a
 }
17b94a
 
17b94a
 #if FUSE_KERNEL_MINOR_VERSION >= 11
17b94a
@@ -451,8 +463,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
17b94a
 static int32_t
17b94a
 fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode)
17b94a
 {
17b94a
-    fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
17b94a
-    return 0;
17b94a
+    int32_t ret = 0;
17b94a
+    ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
17b94a
+    return ret;
17b94a
 }
17b94a
 #endif
17b94a
 
17b94a
@@ -4003,7 +4016,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
17b94a
         gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64,
17b94a
                finh->nodeid);
17b94a
 #if FUSE_KERNEL_MINOR_VERSION >= 11
17b94a
-        fuse_invalidate_entry(this, finh->nodeid);
17b94a
+        ret = fuse_invalidate_entry(this, finh->nodeid);
17b94a
+        if (ret)
17b94a
+            op_errno = EBUSY;
17b94a
 #endif
17b94a
         goto done;
17b94a
     }
17b94a
@@ -4812,6 +4827,7 @@ notify_kernel_loop(void *data)
17b94a
                               fuse_invalidate_node_t, next);
17b94a
 
17b94a
             list_del_init(&node->next);
17b94a
+            priv->invalidate_count--;
17b94a
         }
17b94a
         pthread_mutex_unlock(&priv->invalidate_mutex);
17b94a
 
17b94a
@@ -4855,6 +4871,7 @@ notify_kernel_loop(void *data)
17b94a
             list_del_init(&node->next);
17b94a
             GF_FREE(node);
17b94a
         }
17b94a
+        priv->invalidate_count = 0;
17b94a
     }
17b94a
     pthread_mutex_unlock(&priv->invalidate_mutex);
17b94a
 
17b94a
@@ -6080,6 +6097,9 @@ fuse_priv_dump(xlator_t *this)
17b94a
                        (int)private->timed_response_fuse_thread_started);
17b94a
     gf_proc_dump_write("reverse_thread_started", "%d",
17b94a
                        (int)private->reverse_fuse_thread_started);
17b94a
+    gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit);
17b94a
+    gf_proc_dump_write("invalidate_queue_length", "%" PRIu64,
17b94a
+                       private->invalidate_count);
17b94a
     gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp);
17b94a
 
17b94a
     return 0;
17b94a
@@ -6619,6 +6639,9 @@ init(xlator_t *this_xl)
17b94a
 
17b94a
     GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit);
17b94a
 
17b94a
+    GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32,
17b94a
+                   cleanup_exit);
17b94a
+
17b94a
     GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit);
17b94a
 
17b94a
     GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit);
17b94a
@@ -6955,6 +6978,15 @@ struct volume_options options[] = {
17b94a
                        "reaching this limit (0 means 'unlimited')",
17b94a
     },
17b94a
     {
17b94a
+        .key = {"invalidate-limit"},
17b94a
+        .type = GF_OPTION_TYPE_INT,
17b94a
+        .default_value = "0",
17b94a
+        .min = 0,
17b94a
+        .description = "suspend invalidations as of 'lru-limit' if the number "
17b94a
+                       "of outstanding invalidations reaches this limit "
17b94a
+                       "(0 means 'unlimited')",
17b94a
+    },
17b94a
+    {
17b94a
         .key = {"auto-invalidation"},
17b94a
         .type = GF_OPTION_TYPE_BOOL,
17b94a
         .default_value = "true",
17b94a
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
17b94a
index 697bd88..2311582 100644
17b94a
--- a/xlators/mount/fuse/src/fuse-bridge.h
17b94a
+++ b/xlators/mount/fuse/src/fuse-bridge.h
17b94a
@@ -139,7 +139,7 @@ struct fuse_private {
17b94a
     pthread_cond_t invalidate_cond;
17b94a
     pthread_mutex_t invalidate_mutex;
17b94a
     gf_boolean_t reverse_fuse_thread_started;
17b94a
-
17b94a
+    uint64_t invalidate_count;
17b94a
     /* For communicating with separate mount thread. */
17b94a
     int status_pipe[2];
17b94a
 
17b94a
@@ -191,6 +191,7 @@ struct fuse_private {
17b94a
 
17b94a
     /* LRU Limit, if not set, default is 128k for now */
17b94a
     uint32_t lru_limit;
17b94a
+    uint32_t invalidate_limit;
17b94a
 };
17b94a
 typedef struct fuse_private fuse_private_t;
17b94a
 
17b94a
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
17b94a
index cbde42d..61d7422 100755
17b94a
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
17b94a
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
17b94a
@@ -257,6 +257,10 @@ start_glusterfs ()
17b94a
         cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit");
17b94a
     fi
17b94a
 
17b94a
+    if [ -n "$invalidate_limit" ]; then
17b94a
+        cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit");
17b94a
+    fi
17b94a
+
17b94a
     if [ -n "$bg_qlen" ]; then
17b94a
         cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
17b94a
     fi
17b94a
@@ -505,6 +509,9 @@ with_options()
17b94a
         "lru-limit")
17b94a
             lru_limit=$value
17b94a
             ;;
17b94a
+        "invalidate-limit")
17b94a
+            invalidate_limit=$value
17b94a
+            ;;
17b94a
         "background-qlen")
17b94a
             bg_qlen=$value
17b94a
             ;;
17b94a
-- 
17b94a
1.8.3.1
17b94a