Blob Blame History Raw
From a18f03cbf2b5652f8617cb4dd236bb4ca9838d96 Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Tue, 6 Oct 2020 16:54:15 +0530
Subject: [PATCH 509/511] core: configure optimum inode table hash_size for shd

In brick_mux environment a shd process consume high memory.
After print the statedump i have found it allocates 1M per afr xlator
for all bricks.In case of configure 4k volumes it consumes almost total
6G RSS size in which 4G consumes by inode_tables

[cluster/replicate.test1-replicate-0 - usage-type gf_common_mt_list_head memusage]
size=1273488
num_allocs=2
max_size=1273488
max_num_allocs=2
total_allocs=2

inode_new_table function allocates memory(1M) for a list of inode and dentry hash.
For shd lru_limit size is 1 so we don't need to create a big hash table so to reduce
RSS size for shd process pass optimum bucket count at the time of creating inode_table.

> Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb
> Fixes: #1538
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> (Cherry pick from commit ca6bbc486e76fdb9a8e07119bb10d7fa45b2e93b)
> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1538)

Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb
BUG: 1898777
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/221191
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 api/src/glfs-master.c                          |  2 +-
 libglusterfs/src/glusterfs/inode.h             | 17 +++++----
 libglusterfs/src/inode.c                       | 53 +++++++++++++++++---------
 xlators/cluster/afr/src/afr.c                  | 10 ++++-
 xlators/cluster/dht/src/dht-rebalance.c        |  3 +-
 xlators/cluster/ec/src/ec.c                    |  2 +-
 xlators/features/bit-rot/src/bitd/bit-rot.c    |  2 +-
 xlators/features/quota/src/quotad-helpers.c    |  2 +-
 xlators/features/trash/src/trash.c             |  4 +-
 xlators/mount/fuse/src/fuse-bridge.c           |  6 +--
 xlators/nfs/server/src/nfs.c                   |  2 +-
 xlators/protocol/server/src/server-handshake.c |  3 +-
 12 files changed, 66 insertions(+), 40 deletions(-)

diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
index b4473b1..9e604d3 100644
--- a/api/src/glfs-master.c
+++ b/api/src/glfs-master.c
@@ -45,7 +45,7 @@ graph_setup(struct glfs *fs, glusterfs_graph_t *graph)
         }
 
         if (!new_subvol->itable) {
-            itable = inode_table_new(131072, new_subvol);
+            itable = inode_table_new(131072, new_subvol, 0, 0);
             if (!itable) {
                 errno = ENOMEM;
                 ret = -1;
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
index c875653..62c093d 100644
--- a/libglusterfs/src/glusterfs/inode.h
+++ b/libglusterfs/src/glusterfs/inode.h
@@ -35,11 +35,12 @@ typedef struct _dentry dentry_t;
 
 struct _inode_table {
     pthread_mutex_t lock;
-    size_t hashsize;    /* bucket size of inode hash and dentry hash */
-    char *name;         /* name of the inode table, just for gf_log() */
-    inode_t *root;      /* root directory inode, with number 1 */
-    xlator_t *xl;       /* xlator to be called to do purge */
-    uint32_t lru_limit; /* maximum LRU cache size */
+    size_t dentry_hashsize; /* Number of buckets for dentry hash*/
+    size_t inode_hashsize;  /* Size of inode hash table */
+    char *name;             /* name of the inode table, just for gf_log() */
+    inode_t *root;          /* root directory inode, with number 1 */
+    xlator_t *xl;           /* xlator to be called to do purge */
+    uint32_t lru_limit;     /* maximum LRU cache size */
     struct list_head *inode_hash; /* buckets for inode hash table */
     struct list_head *name_hash;  /* buckets for dentry hash table */
     struct list_head active; /* list of inodes currently active (in an fop) */
@@ -116,12 +117,14 @@ struct _inode {
 #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)
 
 inode_table_t *
-inode_table_new(uint32_t lru_limit, xlator_t *xl);
+inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dhash_size,
+                uint32_t inodehash_size);
 
 inode_table_t *
 inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
                              int32_t (*invalidator_fn)(xlator_t *, inode_t *),
-                             xlator_t *invalidator_xl);
+                             xlator_t *invalidator_xl, uint32_t dentry_hashsize,
+                             uint32_t inode_hashsize);
 
 void
 inode_table_destroy_all(glusterfs_ctx_t *ctx);
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index 71b2d2a..98f8ea6 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -763,7 +763,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
         return NULL;
     }
 
-    int hash = hash_dentry(parent, name, table->hashsize);
+    int hash = hash_dentry(parent, name, table->dentry_hashsize);
 
     pthread_mutex_lock(&table->lock);
     {
@@ -839,7 +839,7 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
         return ret;
     }
 
-    int hash = hash_dentry(parent, name, table->hashsize);
+    int hash = hash_dentry(parent, name, table->dentry_hashsize);
 
     pthread_mutex_lock(&table->lock);
     {
@@ -903,7 +903,7 @@ inode_find(inode_table_t *table, uuid_t gfid)
         return NULL;
     }
 
-    int hash = hash_gfid(gfid, 65536);
+    int hash = hash_gfid(gfid, table->inode_hashsize);
 
     pthread_mutex_lock(&table->lock);
     {
@@ -964,7 +964,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
             return NULL;
         }
 
-        int ihash = hash_gfid(iatt->ia_gfid, 65536);
+        int ihash = hash_gfid(iatt->ia_gfid, table->inode_hashsize);
 
         old_inode = __inode_find(table, iatt->ia_gfid, ihash);
 
@@ -1043,7 +1043,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
     table = inode->table;
 
     if (parent && name) {
-        hash = hash_dentry(parent, name, table->hashsize);
+        hash = hash_dentry(parent, name, table->dentry_hashsize);
     }
 
     if (name && strchr(name, '/')) {
@@ -1262,7 +1262,7 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
     }
 
     if (dstdir && dstname) {
-        hash = hash_dentry(dstdir, dstname, table->hashsize);
+        hash = hash_dentry(dstdir, dstname, table->dentry_hashsize);
     }
 
     pthread_mutex_lock(&table->lock);
@@ -1626,7 +1626,8 @@ __inode_table_init_root(inode_table_t *table)
 inode_table_t *
 inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
                              int32_t (*invalidator_fn)(xlator_t *, inode_t *),
-                             xlator_t *invalidator_xl)
+                             xlator_t *invalidator_xl, uint32_t dentry_hashsize,
+                             uint32_t inode_hashsize)
 {
     inode_table_t *new = NULL;
     uint32_t mem_pool_size = lru_limit;
@@ -1644,7 +1645,19 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
     new->invalidator_fn = invalidator_fn;
     new->invalidator_xl = invalidator_xl;
 
-    new->hashsize = 14057; /* TODO: Random Number?? */
+    if (dentry_hashsize == 0) {
+        /* Prime number for uniform distribution */
+        new->dentry_hashsize = 14057;
+    } else {
+        new->dentry_hashsize = dentry_hashsize;
+    }
+
+    if (inode_hashsize == 0) {
+        /* The size of hash table always should be power of 2 */
+        new->inode_hashsize = 65536;
+    } else {
+        new->inode_hashsize = inode_hashsize;
+    }
 
     /* In case FUSE is initing the inode table. */
     if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
@@ -1658,13 +1671,13 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
     if (!new->dentry_pool)
         goto out;
 
-    new->inode_hash = (void *)GF_CALLOC(65536, sizeof(struct list_head),
-                                        gf_common_mt_list_head);
+    new->inode_hash = (void *)GF_CALLOC(
+        new->inode_hashsize, sizeof(struct list_head), gf_common_mt_list_head);
     if (!new->inode_hash)
         goto out;
 
-    new->name_hash = (void *)GF_CALLOC(new->hashsize, sizeof(struct list_head),
-                                       gf_common_mt_list_head);
+    new->name_hash = (void *)GF_CALLOC(
+        new->dentry_hashsize, sizeof(struct list_head), gf_common_mt_list_head);
     if (!new->name_hash)
         goto out;
 
@@ -1675,11 +1688,11 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
     if (!new->fd_mem_pool)
         goto out;
 
-    for (i = 0; i < 65536; i++) {
+    for (i = 0; i < new->inode_hashsize; i++) {
         INIT_LIST_HEAD(&new->inode_hash[i]);
     }
 
-    for (i = 0; i < new->hashsize; i++) {
+    for (i = 0; i < new->dentry_hashsize; i++) {
         INIT_LIST_HEAD(&new->name_hash[i]);
     }
 
@@ -1717,10 +1730,12 @@ out:
 }
 
 inode_table_t *
-inode_table_new(uint32_t lru_limit, xlator_t *xl)
+inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dentry_hashsize,
+                uint32_t inode_hashsize)
 {
     /* Only fuse for now requires the inode table with invalidator */
-    return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
+    return inode_table_with_invalidator(lru_limit, xl, NULL, NULL,
+                                        dentry_hashsize, inode_hashsize);
 }
 
 int
@@ -2439,8 +2454,10 @@ inode_table_dump(inode_table_t *itable, char *prefix)
         return;
     }
 
-    gf_proc_dump_build_key(key, prefix, "hashsize");
-    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->hashsize);
+    gf_proc_dump_build_key(key, prefix, "dentry_hashsize");
+    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->dentry_hashsize);
+    gf_proc_dump_build_key(key, prefix, "inode_hashsize");
+    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->inode_hashsize);
     gf_proc_dump_build_key(key, prefix, "name");
     gf_proc_dump_write(key, "%s", itable->name);
 
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 8f9e71f..bfa464f 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -594,7 +594,15 @@ init(xlator_t *this)
         goto out;
     }
 
-    this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
+    if (priv->shd.iamshd) {
+        /* Number of hash bucket should be prime number so declare 131
+           total dentry hash buckets
+        */
+        this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 131, 128);
+    } else {
+        this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 0, 0);
+    }
+
     if (!this->itable) {
         ret = -ENOMEM;
         goto out;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 16ac16c..072896d 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -1168,7 +1168,6 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
             break;
         }
 
-
         offset += ret;
         total += ret;
 
@@ -2467,7 +2466,7 @@ dht_build_root_inode(xlator_t *this, inode_t **inode)
         0,
     };
 
-    itable = inode_table_new(0, this);
+    itable = inode_table_new(0, this, 0, 0);
     if (!itable)
         return;
 
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 3f31c74..4118c3b 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -734,7 +734,7 @@ init(xlator_t *this)
     GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
     GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
 
-    this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
+    this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this, 0, 0);
     if (!this->itable)
         goto failed;
 
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index 424c0d5..4e0e798 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -1658,7 +1658,7 @@ notify(xlator_t *this, int32_t event, void *data, ...)
                 child->child_up = 1;
                 child->xl = subvol;
                 if (!child->table)
-                    child->table = inode_table_new(4096, subvol);
+                    child->table = inode_table_new(4096, subvol, 0, 0);
 
                 _br_qchild_event(this, child, br_brick_connect);
                 pthread_cond_signal(&priv->cond);
diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
index d9f0351..46ac116 100644
--- a/xlators/features/quota/src/quotad-helpers.c
+++ b/xlators/features/quota/src/quotad-helpers.c
@@ -32,7 +32,7 @@ get_quotad_aggregator_state(xlator_t *this, rpcsvc_request_t *req)
     UNLOCK(&priv->lock);
 
     if (active_subvol->itable == NULL)
-        active_subvol->itable = inode_table_new(4096, active_subvol);
+        active_subvol->itable = inode_table_new(4096, active_subvol, 0, 0);
 
     state->itable = active_subvol->itable;
 
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index 93f020f..099c887 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -2261,7 +2261,7 @@ reconfigure(xlator_t *this, dict_t *options)
 
     if (!active_earlier && active_now) {
         if (!priv->trash_itable) {
-            priv->trash_itable = inode_table_new(0, this);
+            priv->trash_itable = inode_table_new(0, this, 0, 0);
             if (!priv->trash_itable) {
                 ret = -ENOMEM;
                 gf_log(this->name, GF_LOG_ERROR,
@@ -2533,7 +2533,7 @@ init(xlator_t *this)
     }
 
     if (priv->state) {
-        priv->trash_itable = inode_table_new(0, this);
+        priv->trash_itable = inode_table_new(0, this, 0, 0);
         if (!priv->trash_itable) {
             ret = -ENOMEM;
             priv->state = _gf_false;
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 1bddac2..919eea3 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -6298,10 +6298,10 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph)
         }
 
 #if FUSE_KERNEL_MINOR_VERSION >= 11
-        itable = inode_table_with_invalidator(priv->lru_limit, graph->top,
-                                              fuse_inode_invalidate_fn, this);
+        itable = inode_table_with_invalidator(
+            priv->lru_limit, graph->top, fuse_inode_invalidate_fn, this, 0, 0);
 #else
-        itable = inode_table_new(0, graph->top);
+        itable = inode_table_new(0, graph->top, 0, 0);
 #endif
         if (!itable) {
             ret = -1;
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
index ebded41..402be30 100644
--- a/xlators/nfs/server/src/nfs.c
+++ b/xlators/nfs/server/src/nfs.c
@@ -564,7 +564,7 @@ nfs_init_subvolume(struct nfs_state *nfs, xlator_t *xl)
         return -1;
 
     lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT;
-    xl->itable = inode_table_new(lrusize, xl);
+    xl->itable = inode_table_new(lrusize, xl, 0, 0);
     if (!xl->itable) {
         gf_msg(GF_NFS, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
                "Failed to allocate inode table");
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
index 1d1177d..eeca73c 100644
--- a/xlators/protocol/server/src/server-handshake.c
+++ b/xlators/protocol/server/src/server-handshake.c
@@ -36,7 +36,6 @@ gf_compare_client_version(rpcsvc_request_t *req, int fop_prognum,
     return ret;
 }
 
-
 int
 server_getspec(rpcsvc_request_t *req)
 {
@@ -629,7 +628,7 @@ server_setvolume(rpcsvc_request_t *req)
 
             /* TODO: what is this ? */
             client->bound_xl->itable = inode_table_new(conf->inode_lru_limit,
-                                                       client->bound_xl);
+                                                       client->bound_xl, 0, 0);
         }
     }
     UNLOCK(&conf->itable_lock);
-- 
1.8.3.1