ca3909
From ddb0038de77a4269fa7eed1bb217bfb6bed1b7ba Mon Sep 17 00:00:00 2001
ca3909
From: N Balachandran <nbalacha@redhat.com>
ca3909
Date: Fri, 9 Aug 2019 14:34:22 +0530
ca3909
Subject: [PATCH 337/344] fuse: Set limit on invalidate queue size
ca3909
ca3909
If the glusterfs fuse client process is unable to
ca3909
process the invalidate requests quickly enough, the
ca3909
number of such requests quickly grows large enough
ca3909
to use a significant amount of memory.
ca3909
We are now introducing another option to set an upper
ca3909
limit on these to prevent runaway memory usage.
ca3909
ca3909
> Upstream https://review.gluster.org/23187
ca3909
> Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788
ca3909
> Fixes: bz#1732717
ca3909
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
ca3909
ca3909
BUG: 1763208
ca3909
Change-Id: I666cdf6c70999a0f0bc79969e8df0a9dde93b6e4
ca3909
Signed-off-by: Csaba Henk <csaba@redhat.com>
ca3909
Reviewed-on: https://code.engineering.redhat.com/gerrit/187529
ca3909
Tested-by: RHGS Build Bot <nigelb@redhat.com>
ca3909
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
ca3909
---
ca3909
 doc/mount.glusterfs.8                       |  5 +++
ca3909
 glusterfsd/src/glusterfsd.c                 | 21 ++++++++++
ca3909
 glusterfsd/src/glusterfsd.h                 |  3 +-
ca3909
 libglusterfs/src/glusterfs/glusterfs.h      |  1 +
ca3909
 libglusterfs/src/glusterfs/inode.h          |  1 +
ca3909
 libglusterfs/src/inode.c                    | 31 +++++++++++----
ca3909
 xlators/mount/fuse/src/fuse-bridge.c        | 60 ++++++++++++++++++++++-------
ca3909
 xlators/mount/fuse/src/fuse-bridge.h        |  3 +-
ca3909
 xlators/mount/fuse/utils/mount.glusterfs.in |  7 ++++
ca3909
 9 files changed, 108 insertions(+), 24 deletions(-)
ca3909
ca3909
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
ca3909
index 286631b..b35b362 100644
ca3909
--- a/doc/mount.glusterfs.8
ca3909
+++ b/doc/mount.glusterfs.8
ca3909
@@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None]
ca3909
 Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072]
ca3909
 .TP
ca3909
 .TP
ca3909
+\fBinvalidate-limit=\fRN
ca3909
+Suspend fuse invalidations implied by 'lru-limit' if  number of outstanding
ca3909
+invalidations reaches N
ca3909
+.TP
ca3909
+.TP
ca3909
 \fBbackground-qlen=\fRN
ca3909
 Set fuse module's background queue length to N [default: 64]
ca3909
 .TP
ca3909
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
ca3909
index 5b5e996..0856471 100644
ca3909
--- a/glusterfsd/src/glusterfsd.c
ca3909
+++ b/glusterfsd/src/glusterfsd.c
ca3909
@@ -212,6 +212,9 @@ static struct argp_option gf_options[] = {
ca3909
     {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
ca3909
      "Set fuse module's limit for number of inodes kept in LRU list to N "
ca3909
      "[default: 131072]"},
ca3909
+    {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
ca3909
+     "Suspend inode invalidations implied by 'lru-limit' if the number of "
ca3909
+     "outstanding invalidations reaches N"},
ca3909
     {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
ca3909
      "Set fuse module's background queue length to N "
ca3909
      "[default: 64]"},
ca3909
@@ -504,6 +507,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
ca3909
         }
ca3909
     }
ca3909
 
ca3909
+    if (cmd_args->invalidate_limit >= 0) {
ca3909
+        ret = dict_set_int32(options, "invalidate-limit",
ca3909
+                             cmd_args->invalidate_limit);
ca3909
+        if (ret < 0) {
ca3909
+            gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
ca3909
+                   "invalidate-limit");
ca3909
+            goto err;
ca3909
+        }
ca3909
+    }
ca3909
+
ca3909
     if (cmd_args->background_qlen) {
ca3909
         ret = dict_set_int32(options, "background-qlen",
ca3909
                              cmd_args->background_qlen);
ca3909
@@ -1283,6 +1296,14 @@ parse_opts(int key, char *arg, struct argp_state *state)
ca3909
             argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
ca3909
             break;
ca3909
 
ca3909
+        case ARGP_FUSE_INVALIDATE_LIMIT_KEY:
ca3909
+            if (!gf_string2int32(arg, &cmd_args->invalidate_limit))
ca3909
+                break;
ca3909
+
ca3909
+            argp_failure(state, -1, 0, "unknown invalidate limit option %s",
ca3909
+                         arg);
ca3909
+            break;
ca3909
+
ca3909
         case ARGP_FUSE_BACKGROUND_QLEN_KEY:
ca3909
             if (!gf_string2int(arg, &cmd_args->background_qlen))
ca3909
                 break;
ca3909
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
ca3909
index fa55789..ee655f0 100644
ca3909
--- a/glusterfsd/src/glusterfsd.h
ca3909
+++ b/glusterfsd/src/glusterfsd.h
ca3909
@@ -111,7 +111,8 @@ enum argp_option_keys {
ca3909
     ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189,
ca3909
     ARGP_FUSE_LRU_LIMIT_KEY = 190,
ca3909
     ARGP_FUSE_AUTO_INVAL_KEY = 191,
ca3909
-    ARGP_BRICK_MUX_KEY = 192
ca3909
+    ARGP_BRICK_MUX_KEY = 192,
ca3909
+    ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195,
ca3909
 };
ca3909
 
ca3909
 struct _gfd_vol_top_priv {
ca3909
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
ca3909
index 79c93ae..3b594c0 100644
ca3909
--- a/libglusterfs/src/glusterfs/glusterfs.h
ca3909
+++ b/libglusterfs/src/glusterfs/glusterfs.h
ca3909
@@ -541,6 +541,7 @@ struct _cmd_args {
ca3909
     int client_pid_set;
ca3909
     unsigned uid_map_root;
ca3909
     int32_t lru_limit;
ca3909
+    int32_t invalidate_limit;
ca3909
     int background_qlen;
ca3909
     int congestion_threshold;
ca3909
     char *fuse_mountopts;
ca3909
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
ca3909
index 52efdd8..4421c47 100644
ca3909
--- a/libglusterfs/src/glusterfs/inode.h
ca3909
+++ b/libglusterfs/src/glusterfs/inode.h
ca3909
@@ -107,6 +107,7 @@ struct _inode {
ca3909
     struct list_head list;        /* active/lru/purge */
ca3909
 
ca3909
     struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
ca3909
+    bool in_invalidate_list; /* Set if inode is in table invalidate list */
ca3909
     bool invalidate_sent;    /* Set it if invalidator_fn is called for inode */
ca3909
 };
ca3909
 
ca3909
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
ca3909
index 96ddea5..5331e93 100644
ca3909
--- a/libglusterfs/src/inode.c
ca3909
+++ b/libglusterfs/src/inode.c
ca3909
@@ -558,8 +558,8 @@ __inode_unref(inode_t *inode, bool clear)
ca3909
 
ca3909
     this = THIS;
ca3909
 
ca3909
-    if (clear && inode->invalidate_sent) {
ca3909
-        inode->invalidate_sent = false;
ca3909
+    if (clear && inode->in_invalidate_list) {
ca3909
+        inode->in_invalidate_list = false;
ca3909
         inode->table->invalidate_size--;
ca3909
         __inode_activate(inode);
ca3909
     }
ca3909
@@ -573,7 +573,7 @@ __inode_unref(inode_t *inode, bool clear)
ca3909
         inode->_ctx[index].ref--;
ca3909
     }
ca3909
 
ca3909
-    if (!inode->ref && !inode->invalidate_sent) {
ca3909
+    if (!inode->ref && !inode->in_invalidate_list) {
ca3909
         inode->table->active_size--;
ca3909
 
ca3909
         nlookup = GF_ATOMIC_GET(inode->nlookup);
ca3909
@@ -609,14 +609,14 @@ __inode_ref(inode_t *inode, bool is_invalidate)
ca3909
         return inode;
ca3909
 
ca3909
     if (!inode->ref) {
ca3909
-        if (inode->invalidate_sent) {
ca3909
-            inode->invalidate_sent = false;
ca3909
+        if (inode->in_invalidate_list) {
ca3909
+            inode->in_invalidate_list = false;
ca3909
             inode->table->invalidate_size--;
ca3909
         } else {
ca3909
             inode->table->lru_size--;
ca3909
         }
ca3909
         if (is_invalidate) {
ca3909
-            inode->invalidate_sent = true;
ca3909
+            inode->in_invalidate_list = true;
ca3909
             inode->table->invalidate_size++;
ca3909
             list_move_tail(&inode->list, &inode->table->invalidate);
ca3909
         } else {
ca3909
@@ -1609,6 +1609,7 @@ static int
ca3909
 inode_table_prune(inode_table_t *table)
ca3909
 {
ca3909
     int ret = 0;
ca3909
+    int ret1 = 0;
ca3909
     struct list_head purge = {
ca3909
         0,
ca3909
     };
ca3909
@@ -1647,6 +1648,10 @@ inode_table_prune(inode_table_t *table)
ca3909
                 /* check for valid inode with 'nlookup' */
ca3909
                 nlookup = GF_ATOMIC_GET(entry->nlookup);
ca3909
                 if (nlookup) {
ca3909
+                    if (entry->invalidate_sent) {
ca3909
+                        list_move_tail(&entry->list, &table->lru);
ca3909
+                        continue;
ca3909
+                    }
ca3909
                     __inode_ref(entry, true);
ca3909
                     tmp = entry;
ca3909
                     break;
ca3909
@@ -1668,9 +1673,19 @@ inode_table_prune(inode_table_t *table)
ca3909
     if (tmp) {
ca3909
         xlator_t *old_THIS = THIS;
ca3909
         THIS = table->invalidator_xl;
ca3909
-        table->invalidator_fn(table->invalidator_xl, tmp);
ca3909
+        ret1 = table->invalidator_fn(table->invalidator_xl, tmp);
ca3909
         THIS = old_THIS;
ca3909
-        inode_unref(tmp);
ca3909
+        pthread_mutex_lock(&table->lock);
ca3909
+        {
ca3909
+            if (!ret1) {
ca3909
+                tmp->invalidate_sent = true;
ca3909
+                __inode_unref(tmp, false);
ca3909
+            } else {
ca3909
+                /* Move this back to the lru list*/
ca3909
+                __inode_unref(tmp, true);
ca3909
+            }
ca3909
+        }
ca3909
+        pthread_mutex_unlock(&table->lock);
ca3909
     }
ca3909
 
ca3909
     /* Just so that if purge list is handled too, then clear it off */
ca3909
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
ca3909
index 1c946a2..8b2e7f0 100644
ca3909
--- a/xlators/mount/fuse/src/fuse-bridge.c
ca3909
+++ b/xlators/mount/fuse/src/fuse-bridge.c
ca3909
@@ -26,7 +26,7 @@ static int gf_fuse_xattr_enotsup_log;
ca3909
 void
ca3909
 fini(xlator_t *this_xl);
ca3909
 
ca3909
-static void
ca3909
+static int32_t
ca3909
 fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
ca3909
 
ca3909
 /*
ca3909
@@ -312,7 +312,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
ca3909
 #define send_fuse_obj(this, finh, obj)                                         \
ca3909
     send_fuse_data(this, finh, obj, sizeof(*(obj)))
ca3909
 
ca3909
-static void
ca3909
+static int32_t
ca3909
 fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
ca3909
 {
ca3909
 #if FUSE_KERNEL_MINOR_VERSION >= 11
ca3909
@@ -328,17 +328,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
ca3909
 
ca3909
     priv = this->private;
ca3909
     if (!priv->reverse_fuse_thread_started)
ca3909
-        return;
ca3909
+        return -1;
ca3909
+
ca3909
+    if (priv->invalidate_limit &&
ca3909
+        (priv->invalidate_count >= priv->invalidate_limit)) {
ca3909
+        return -1;
ca3909
+    }
ca3909
 
ca3909
     inode = (inode_t *)(unsigned long)fuse_ino;
ca3909
     if (inode == NULL)
ca3909
-        return;
ca3909
+        return -1;
ca3909
 
ca3909
     list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list)
ca3909
     {
ca3909
         node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
ca3909
         if (node == NULL)
ca3909
-            break;
ca3909
+            return -1;
ca3909
 
ca3909
         INIT_LIST_HEAD(&node->next);
ca3909
 
ca3909
@@ -375,20 +380,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
ca3909
         pthread_mutex_lock(&priv->invalidate_mutex);
ca3909
         {
ca3909
             list_add_tail(&node->next, &priv->invalidate_list);
ca3909
+            priv->invalidate_count++;
ca3909
             pthread_cond_signal(&priv->invalidate_cond);
ca3909
         }
ca3909
         pthread_mutex_unlock(&priv->invalidate_mutex);
ca3909
     }
ca3909
 
ca3909
 #endif
ca3909
-    return;
ca3909
+    return 0;
ca3909
 }
ca3909
 
ca3909
 /*
ca3909
  * Send an inval inode notification to fuse. This causes an invalidation of the
ca3909
  * entire page cache mapping on the inode.
ca3909
  */
ca3909
-static void
ca3909
+static int32_t
ca3909
 fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
ca3909
 {
ca3909
 #if FUSE_KERNEL_MINOR_VERSION >= 11
ca3909
@@ -401,15 +407,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
ca3909
     priv = this->private;
ca3909
 
ca3909
     if (!priv->reverse_fuse_thread_started)
ca3909
-        return;
ca3909
+        return -1;
ca3909
+
ca3909
+    if (priv->invalidate_limit &&
ca3909
+        (priv->invalidate_count >= priv->invalidate_limit)) {
ca3909
+        return -1;
ca3909
+    }
ca3909
 
ca3909
     inode = (inode_t *)(unsigned long)fuse_ino;
ca3909
     if (inode == NULL)
ca3909
-        return;
ca3909
+        return -1;
ca3909
 
ca3909
     node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
ca3909
     if (node == NULL)
ca3909
-        return;
ca3909
+        return -1;
ca3909
 
ca3909
     INIT_LIST_HEAD(&node->next);
ca3909
 
ca3909
@@ -435,6 +446,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
ca3909
     pthread_mutex_lock(&priv->invalidate_mutex);
ca3909
     {
ca3909
         list_add_tail(&node->next, &priv->invalidate_list);
ca3909
+        priv->invalidate_count++;
ca3909
         pthread_cond_signal(&priv->invalidate_cond);
ca3909
     }
ca3909
     pthread_mutex_unlock(&priv->invalidate_mutex);
ca3909
@@ -443,7 +455,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
ca3909
     gf_log("glusterfs-fuse", GF_LOG_WARNING,
ca3909
            "fuse_invalidate_inode not implemented on this system");
ca3909
 #endif
ca3909
-    return;
ca3909
+    return 0;
ca3909
 }
ca3909
 
ca3909
 #if FUSE_KERNEL_MINOR_VERSION >= 11
ca3909
@@ -451,8 +463,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
ca3909
 static int32_t
ca3909
 fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode)
ca3909
 {
ca3909
-    fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
ca3909
-    return 0;
ca3909
+    int32_t ret = 0;
ca3909
+    ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
ca3909
+    return ret;
ca3909
 }
ca3909
 #endif
ca3909
 
ca3909
@@ -4003,7 +4016,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
ca3909
         gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64,
ca3909
                finh->nodeid);
ca3909
 #if FUSE_KERNEL_MINOR_VERSION >= 11
ca3909
-        fuse_invalidate_entry(this, finh->nodeid);
ca3909
+        ret = fuse_invalidate_entry(this, finh->nodeid);
ca3909
+        if (ret)
ca3909
+            op_errno = EBUSY;
ca3909
 #endif
ca3909
         goto done;
ca3909
     }
ca3909
@@ -4812,6 +4827,7 @@ notify_kernel_loop(void *data)
ca3909
                               fuse_invalidate_node_t, next);
ca3909
 
ca3909
             list_del_init(&node->next);
ca3909
+            priv->invalidate_count--;
ca3909
         }
ca3909
         pthread_mutex_unlock(&priv->invalidate_mutex);
ca3909
 
ca3909
@@ -4855,6 +4871,7 @@ notify_kernel_loop(void *data)
ca3909
             list_del_init(&node->next);
ca3909
             GF_FREE(node);
ca3909
         }
ca3909
+        priv->invalidate_count = 0;
ca3909
     }
ca3909
     pthread_mutex_unlock(&priv->invalidate_mutex);
ca3909
 
ca3909
@@ -6080,6 +6097,9 @@ fuse_priv_dump(xlator_t *this)
ca3909
                        (int)private->timed_response_fuse_thread_started);
ca3909
     gf_proc_dump_write("reverse_thread_started", "%d",
ca3909
                        (int)private->reverse_fuse_thread_started);
ca3909
+    gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit);
ca3909
+    gf_proc_dump_write("invalidate_queue_length", "%" PRIu64,
ca3909
+                       private->invalidate_count);
ca3909
     gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp);
ca3909
 
ca3909
     return 0;
ca3909
@@ -6619,6 +6639,9 @@ init(xlator_t *this_xl)
ca3909
 
ca3909
     GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit);
ca3909
 
ca3909
+    GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32,
ca3909
+                   cleanup_exit);
ca3909
+
ca3909
     GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit);
ca3909
 
ca3909
     GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit);
ca3909
@@ -6955,6 +6978,15 @@ struct volume_options options[] = {
ca3909
                        "reaching this limit (0 means 'unlimited')",
ca3909
     },
ca3909
     {
ca3909
+        .key = {"invalidate-limit"},
ca3909
+        .type = GF_OPTION_TYPE_INT,
ca3909
+        .default_value = "0",
ca3909
+        .min = 0,
ca3909
+        .description = "suspend invalidations as of 'lru-limit' if the number "
ca3909
+                       "of outstanding invalidations reaches this limit "
ca3909
+                       "(0 means 'unlimited')",
ca3909
+    },
ca3909
+    {
ca3909
         .key = {"auto-invalidation"},
ca3909
         .type = GF_OPTION_TYPE_BOOL,
ca3909
         .default_value = "true",
ca3909
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
ca3909
index 697bd88..2311582 100644
ca3909
--- a/xlators/mount/fuse/src/fuse-bridge.h
ca3909
+++ b/xlators/mount/fuse/src/fuse-bridge.h
ca3909
@@ -139,7 +139,7 @@ struct fuse_private {
ca3909
     pthread_cond_t invalidate_cond;
ca3909
     pthread_mutex_t invalidate_mutex;
ca3909
     gf_boolean_t reverse_fuse_thread_started;
ca3909
-
ca3909
+    uint64_t invalidate_count;
ca3909
     /* For communicating with separate mount thread. */
ca3909
     int status_pipe[2];
ca3909
 
ca3909
@@ -191,6 +191,7 @@ struct fuse_private {
ca3909
 
ca3909
     /* LRU Limit, if not set, default is 128k for now */
ca3909
     uint32_t lru_limit;
ca3909
+    uint32_t invalidate_limit;
ca3909
 };
ca3909
 typedef struct fuse_private fuse_private_t;
ca3909
 
ca3909
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
ca3909
index cbde42d..61d7422 100755
ca3909
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
ca3909
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
ca3909
@@ -257,6 +257,10 @@ start_glusterfs ()
ca3909
         cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit");
ca3909
     fi
ca3909
 
ca3909
+    if [ -n "$invalidate_limit" ]; then
ca3909
+        cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit");
ca3909
+    fi
ca3909
+
ca3909
     if [ -n "$bg_qlen" ]; then
ca3909
         cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
ca3909
     fi
ca3909
@@ -505,6 +509,9 @@ with_options()
ca3909
         "lru-limit")
ca3909
             lru_limit=$value
ca3909
             ;;
ca3909
+        "invalidate-limit")
ca3909
+            invalidate_limit=$value
ca3909
+            ;;
ca3909
         "background-qlen")
ca3909
             bg_qlen=$value
ca3909
             ;;
ca3909
-- 
ca3909
1.8.3.1
ca3909