17b94a
From a18f03cbf2b5652f8617cb4dd236bb4ca9838d96 Mon Sep 17 00:00:00 2001
17b94a
From: Mohit Agrawal <moagrawa@redhat.com>
17b94a
Date: Tue, 6 Oct 2020 16:54:15 +0530
17b94a
Subject: [PATCH 509/511] core: configure optimum inode table hash_size for shd
17b94a
17b94a
In brick_mux environment a shd process consume high memory.
17b94a
After print the statedump i have found it allocates 1M per afr xlator
17b94a
for all bricks.In case of configure 4k volumes it consumes almost total
17b94a
6G RSS size in which 4G consumes by inode_tables
17b94a
17b94a
[cluster/replicate.test1-replicate-0 - usage-type gf_common_mt_list_head memusage]
17b94a
size=1273488
17b94a
num_allocs=2
17b94a
max_size=1273488
17b94a
max_num_allocs=2
17b94a
total_allocs=2
17b94a
17b94a
inode_new_table function allocates memory(1M) for a list of inode and dentry hash.
17b94a
For shd lru_limit size is 1 so we don't need to create a big hash table so to reduce
17b94a
RSS size for shd process pass optimum bucket count at the time of creating inode_table.
17b94a
17b94a
> Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb
17b94a
> Fixes: #1538
17b94a
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
17b94a
> (Cherry pick from commit ca6bbc486e76fdb9a8e07119bb10d7fa45b2e93b)
17b94a
> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1538)
17b94a
17b94a
Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb
17b94a
BUG: 1898777
17b94a
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/221191
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
17b94a
---
17b94a
 api/src/glfs-master.c                          |  2 +-
17b94a
 libglusterfs/src/glusterfs/inode.h             | 17 +++++----
17b94a
 libglusterfs/src/inode.c                       | 53 +++++++++++++++++---------
17b94a
 xlators/cluster/afr/src/afr.c                  | 10 ++++-
17b94a
 xlators/cluster/dht/src/dht-rebalance.c        |  3 +-
17b94a
 xlators/cluster/ec/src/ec.c                    |  2 +-
17b94a
 xlators/features/bit-rot/src/bitd/bit-rot.c    |  2 +-
17b94a
 xlators/features/quota/src/quotad-helpers.c    |  2 +-
17b94a
 xlators/features/trash/src/trash.c             |  4 +-
17b94a
 xlators/mount/fuse/src/fuse-bridge.c           |  6 +--
17b94a
 xlators/nfs/server/src/nfs.c                   |  2 +-
17b94a
 xlators/protocol/server/src/server-handshake.c |  3 +-
17b94a
 12 files changed, 66 insertions(+), 40 deletions(-)
17b94a
17b94a
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
17b94a
index b4473b1..9e604d3 100644
17b94a
--- a/api/src/glfs-master.c
17b94a
+++ b/api/src/glfs-master.c
17b94a
@@ -45,7 +45,7 @@ graph_setup(struct glfs *fs, glusterfs_graph_t *graph)
17b94a
         }
17b94a
 
17b94a
         if (!new_subvol->itable) {
17b94a
-            itable = inode_table_new(131072, new_subvol);
17b94a
+            itable = inode_table_new(131072, new_subvol, 0, 0);
17b94a
             if (!itable) {
17b94a
                 errno = ENOMEM;
17b94a
                 ret = -1;
17b94a
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
17b94a
index c875653..62c093d 100644
17b94a
--- a/libglusterfs/src/glusterfs/inode.h
17b94a
+++ b/libglusterfs/src/glusterfs/inode.h
17b94a
@@ -35,11 +35,12 @@ typedef struct _dentry dentry_t;
17b94a
 
17b94a
 struct _inode_table {
17b94a
     pthread_mutex_t lock;
17b94a
-    size_t hashsize;    /* bucket size of inode hash and dentry hash */
17b94a
-    char *name;         /* name of the inode table, just for gf_log() */
17b94a
-    inode_t *root;      /* root directory inode, with number 1 */
17b94a
-    xlator_t *xl;       /* xlator to be called to do purge */
17b94a
-    uint32_t lru_limit; /* maximum LRU cache size */
17b94a
+    size_t dentry_hashsize; /* Number of buckets for dentry hash*/
17b94a
+    size_t inode_hashsize;  /* Size of inode hash table */
17b94a
+    char *name;             /* name of the inode table, just for gf_log() */
17b94a
+    inode_t *root;          /* root directory inode, with number 1 */
17b94a
+    xlator_t *xl;           /* xlator to be called to do purge */
17b94a
+    uint32_t lru_limit;     /* maximum LRU cache size */
17b94a
     struct list_head *inode_hash; /* buckets for inode hash table */
17b94a
     struct list_head *name_hash;  /* buckets for dentry hash table */
17b94a
     struct list_head active; /* list of inodes currently active (in an fop) */
17b94a
@@ -116,12 +117,14 @@ struct _inode {
17b94a
 #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)
17b94a
 
17b94a
 inode_table_t *
17b94a
-inode_table_new(uint32_t lru_limit, xlator_t *xl);
17b94a
+inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dhash_size,
17b94a
+                uint32_t inodehash_size);
17b94a
 
17b94a
 inode_table_t *
17b94a
 inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
17b94a
                              int32_t (*invalidator_fn)(xlator_t *, inode_t *),
17b94a
-                             xlator_t *invalidator_xl);
17b94a
+                             xlator_t *invalidator_xl, uint32_t dentry_hashsize,
17b94a
+                             uint32_t inode_hashsize);
17b94a
 
17b94a
 void
17b94a
 inode_table_destroy_all(glusterfs_ctx_t *ctx);
17b94a
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
17b94a
index 71b2d2a..98f8ea6 100644
17b94a
--- a/libglusterfs/src/inode.c
17b94a
+++ b/libglusterfs/src/inode.c
17b94a
@@ -763,7 +763,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
17b94a
         return NULL;
17b94a
     }
17b94a
 
17b94a
-    int hash = hash_dentry(parent, name, table->hashsize);
17b94a
+    int hash = hash_dentry(parent, name, table->dentry_hashsize);
17b94a
 
17b94a
     pthread_mutex_lock(&table->lock);
17b94a
     {
17b94a
@@ -839,7 +839,7 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
17b94a
         return ret;
17b94a
     }
17b94a
 
17b94a
-    int hash = hash_dentry(parent, name, table->hashsize);
17b94a
+    int hash = hash_dentry(parent, name, table->dentry_hashsize);
17b94a
 
17b94a
     pthread_mutex_lock(&table->lock);
17b94a
     {
17b94a
@@ -903,7 +903,7 @@ inode_find(inode_table_t *table, uuid_t gfid)
17b94a
         return NULL;
17b94a
     }
17b94a
 
17b94a
-    int hash = hash_gfid(gfid, 65536);
17b94a
+    int hash = hash_gfid(gfid, table->inode_hashsize);
17b94a
 
17b94a
     pthread_mutex_lock(&table->lock);
17b94a
     {
17b94a
@@ -964,7 +964,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
17b94a
             return NULL;
17b94a
         }
17b94a
 
17b94a
-        int ihash = hash_gfid(iatt->ia_gfid, 65536);
17b94a
+        int ihash = hash_gfid(iatt->ia_gfid, table->inode_hashsize);
17b94a
 
17b94a
         old_inode = __inode_find(table, iatt->ia_gfid, ihash);
17b94a
 
17b94a
@@ -1043,7 +1043,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
17b94a
     table = inode->table;
17b94a
 
17b94a
     if (parent && name) {
17b94a
-        hash = hash_dentry(parent, name, table->hashsize);
17b94a
+        hash = hash_dentry(parent, name, table->dentry_hashsize);
17b94a
     }
17b94a
 
17b94a
     if (name && strchr(name, '/')) {
17b94a
@@ -1262,7 +1262,7 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
17b94a
     }
17b94a
 
17b94a
     if (dstdir && dstname) {
17b94a
-        hash = hash_dentry(dstdir, dstname, table->hashsize);
17b94a
+        hash = hash_dentry(dstdir, dstname, table->dentry_hashsize);
17b94a
     }
17b94a
 
17b94a
     pthread_mutex_lock(&table->lock);
17b94a
@@ -1626,7 +1626,8 @@ __inode_table_init_root(inode_table_t *table)
17b94a
 inode_table_t *
17b94a
 inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
17b94a
                              int32_t (*invalidator_fn)(xlator_t *, inode_t *),
17b94a
-                             xlator_t *invalidator_xl)
17b94a
+                             xlator_t *invalidator_xl, uint32_t dentry_hashsize,
17b94a
+                             uint32_t inode_hashsize)
17b94a
 {
17b94a
     inode_table_t *new = NULL;
17b94a
     uint32_t mem_pool_size = lru_limit;
17b94a
@@ -1644,7 +1645,19 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
17b94a
     new->invalidator_fn = invalidator_fn;
17b94a
     new->invalidator_xl = invalidator_xl;
17b94a
 
17b94a
-    new->hashsize = 14057; /* TODO: Random Number?? */
17b94a
+    if (dentry_hashsize == 0) {
17b94a
+        /* Prime number for uniform distribution */
17b94a
+        new->dentry_hashsize = 14057;
17b94a
+    } else {
17b94a
+        new->dentry_hashsize = dentry_hashsize;
17b94a
+    }
17b94a
+
17b94a
+    if (inode_hashsize == 0) {
17b94a
+        /* The size of hash table always should be power of 2 */
17b94a
+        new->inode_hashsize = 65536;
17b94a
+    } else {
17b94a
+        new->inode_hashsize = inode_hashsize;
17b94a
+    }
17b94a
 
17b94a
     /* In case FUSE is initing the inode table. */
17b94a
     if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
17b94a
@@ -1658,13 +1671,13 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
17b94a
     if (!new->dentry_pool)
17b94a
         goto out;
17b94a
 
17b94a
-    new->inode_hash = (void *)GF_CALLOC(65536, sizeof(struct list_head),
17b94a
-                                        gf_common_mt_list_head);
17b94a
+    new->inode_hash = (void *)GF_CALLOC(
17b94a
+        new->inode_hashsize, sizeof(struct list_head), gf_common_mt_list_head);
17b94a
     if (!new->inode_hash)
17b94a
         goto out;
17b94a
 
17b94a
-    new->name_hash = (void *)GF_CALLOC(new->hashsize, sizeof(struct list_head),
17b94a
-                                       gf_common_mt_list_head);
17b94a
+    new->name_hash = (void *)GF_CALLOC(
17b94a
+        new->dentry_hashsize, sizeof(struct list_head), gf_common_mt_list_head);
17b94a
     if (!new->name_hash)
17b94a
         goto out;
17b94a
 
17b94a
@@ -1675,11 +1688,11 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
17b94a
     if (!new->fd_mem_pool)
17b94a
         goto out;
17b94a
 
17b94a
-    for (i = 0; i < 65536; i++) {
17b94a
+    for (i = 0; i < new->inode_hashsize; i++) {
17b94a
         INIT_LIST_HEAD(&new->inode_hash[i]);
17b94a
     }
17b94a
 
17b94a
-    for (i = 0; i < new->hashsize; i++) {
17b94a
+    for (i = 0; i < new->dentry_hashsize; i++) {
17b94a
         INIT_LIST_HEAD(&new->name_hash[i]);
17b94a
     }
17b94a
 
17b94a
@@ -1717,10 +1730,12 @@ out:
17b94a
 }
17b94a
 
17b94a
 inode_table_t *
17b94a
-inode_table_new(uint32_t lru_limit, xlator_t *xl)
17b94a
+inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dentry_hashsize,
17b94a
+                uint32_t inode_hashsize)
17b94a
 {
17b94a
     /* Only fuse for now requires the inode table with invalidator */
17b94a
-    return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
17b94a
+    return inode_table_with_invalidator(lru_limit, xl, NULL, NULL,
17b94a
+                                        dentry_hashsize, inode_hashsize);
17b94a
 }
17b94a
 
17b94a
 int
17b94a
@@ -2439,8 +2454,10 @@ inode_table_dump(inode_table_t *itable, char *prefix)
17b94a
         return;
17b94a
     }
17b94a
 
17b94a
-    gf_proc_dump_build_key(key, prefix, "hashsize");
17b94a
-    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->hashsize);
17b94a
+    gf_proc_dump_build_key(key, prefix, "dentry_hashsize");
17b94a
+    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->dentry_hashsize);
17b94a
+    gf_proc_dump_build_key(key, prefix, "inode_hashsize");
17b94a
+    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->inode_hashsize);
17b94a
     gf_proc_dump_build_key(key, prefix, "name");
17b94a
     gf_proc_dump_write(key, "%s", itable->name);
17b94a
 
17b94a
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
17b94a
index 8f9e71f..bfa464f 100644
17b94a
--- a/xlators/cluster/afr/src/afr.c
17b94a
+++ b/xlators/cluster/afr/src/afr.c
17b94a
@@ -594,7 +594,15 @@ init(xlator_t *this)
17b94a
         goto out;
17b94a
     }
17b94a
 
17b94a
-    this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
17b94a
+    if (priv->shd.iamshd) {
17b94a
+        /* Number of hash bucket should be prime number so declare 131
17b94a
+           total dentry hash buckets
17b94a
+        */
17b94a
+        this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 131, 128);
17b94a
+    } else {
17b94a
+        this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 0, 0);
17b94a
+    }
17b94a
+
17b94a
     if (!this->itable) {
17b94a
         ret = -ENOMEM;
17b94a
         goto out;
17b94a
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
17b94a
index 16ac16c..072896d 100644
17b94a
--- a/xlators/cluster/dht/src/dht-rebalance.c
17b94a
+++ b/xlators/cluster/dht/src/dht-rebalance.c
17b94a
@@ -1168,7 +1168,6 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
17b94a
             break;
17b94a
         }
17b94a
 
17b94a
-
17b94a
         offset += ret;
17b94a
         total += ret;
17b94a
 
17b94a
@@ -2467,7 +2466,7 @@ dht_build_root_inode(xlator_t *this, inode_t **inode)
17b94a
         0,
17b94a
     };
17b94a
 
17b94a
-    itable = inode_table_new(0, this);
17b94a
+    itable = inode_table_new(0, this, 0, 0);
17b94a
     if (!itable)
17b94a
         return;
17b94a
 
17b94a
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
17b94a
index 3f31c74..4118c3b 100644
17b94a
--- a/xlators/cluster/ec/src/ec.c
17b94a
+++ b/xlators/cluster/ec/src/ec.c
17b94a
@@ -734,7 +734,7 @@ init(xlator_t *this)
17b94a
     GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
17b94a
     GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
17b94a
 
17b94a
-    this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
17b94a
+    this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this, 0, 0);
17b94a
     if (!this->itable)
17b94a
         goto failed;
17b94a
 
17b94a
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
17b94a
index 424c0d5..4e0e798 100644
17b94a
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
17b94a
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
17b94a
@@ -1658,7 +1658,7 @@ notify(xlator_t *this, int32_t event, void *data, ...)
17b94a
                 child->child_up = 1;
17b94a
                 child->xl = subvol;
17b94a
                 if (!child->table)
17b94a
-                    child->table = inode_table_new(4096, subvol);
17b94a
+                    child->table = inode_table_new(4096, subvol, 0, 0);
17b94a
 
17b94a
                 _br_qchild_event(this, child, br_brick_connect);
17b94a
                 pthread_cond_signal(&priv->cond);
17b94a
diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
17b94a
index d9f0351..46ac116 100644
17b94a
--- a/xlators/features/quota/src/quotad-helpers.c
17b94a
+++ b/xlators/features/quota/src/quotad-helpers.c
17b94a
@@ -32,7 +32,7 @@ get_quotad_aggregator_state(xlator_t *this, rpcsvc_request_t *req)
17b94a
     UNLOCK(&priv->lock);
17b94a
 
17b94a
     if (active_subvol->itable == NULL)
17b94a
-        active_subvol->itable = inode_table_new(4096, active_subvol);
17b94a
+        active_subvol->itable = inode_table_new(4096, active_subvol, 0, 0);
17b94a
 
17b94a
     state->itable = active_subvol->itable;
17b94a
 
17b94a
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
17b94a
index 93f020f..099c887 100644
17b94a
--- a/xlators/features/trash/src/trash.c
17b94a
+++ b/xlators/features/trash/src/trash.c
17b94a
@@ -2261,7 +2261,7 @@ reconfigure(xlator_t *this, dict_t *options)
17b94a
 
17b94a
     if (!active_earlier && active_now) {
17b94a
         if (!priv->trash_itable) {
17b94a
-            priv->trash_itable = inode_table_new(0, this);
17b94a
+            priv->trash_itable = inode_table_new(0, this, 0, 0);
17b94a
             if (!priv->trash_itable) {
17b94a
                 ret = -ENOMEM;
17b94a
                 gf_log(this->name, GF_LOG_ERROR,
17b94a
@@ -2533,7 +2533,7 @@ init(xlator_t *this)
17b94a
     }
17b94a
 
17b94a
     if (priv->state) {
17b94a
-        priv->trash_itable = inode_table_new(0, this);
17b94a
+        priv->trash_itable = inode_table_new(0, this, 0, 0);
17b94a
         if (!priv->trash_itable) {
17b94a
             ret = -ENOMEM;
17b94a
             priv->state = _gf_false;
17b94a
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
17b94a
index 1bddac2..919eea3 100644
17b94a
--- a/xlators/mount/fuse/src/fuse-bridge.c
17b94a
+++ b/xlators/mount/fuse/src/fuse-bridge.c
17b94a
@@ -6298,10 +6298,10 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph)
17b94a
         }
17b94a
 
17b94a
 #if FUSE_KERNEL_MINOR_VERSION >= 11
17b94a
-        itable = inode_table_with_invalidator(priv->lru_limit, graph->top,
17b94a
-                                              fuse_inode_invalidate_fn, this);
17b94a
+        itable = inode_table_with_invalidator(
17b94a
+            priv->lru_limit, graph->top, fuse_inode_invalidate_fn, this, 0, 0);
17b94a
 #else
17b94a
-        itable = inode_table_new(0, graph->top);
17b94a
+        itable = inode_table_new(0, graph->top, 0, 0);
17b94a
 #endif
17b94a
         if (!itable) {
17b94a
             ret = -1;
17b94a
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
17b94a
index ebded41..402be30 100644
17b94a
--- a/xlators/nfs/server/src/nfs.c
17b94a
+++ b/xlators/nfs/server/src/nfs.c
17b94a
@@ -564,7 +564,7 @@ nfs_init_subvolume(struct nfs_state *nfs, xlator_t *xl)
17b94a
         return -1;
17b94a
 
17b94a
     lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT;
17b94a
-    xl->itable = inode_table_new(lrusize, xl);
17b94a
+    xl->itable = inode_table_new(lrusize, xl, 0, 0);
17b94a
     if (!xl->itable) {
17b94a
         gf_msg(GF_NFS, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
17b94a
                "Failed to allocate inode table");
17b94a
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
17b94a
index 1d1177d..eeca73c 100644
17b94a
--- a/xlators/protocol/server/src/server-handshake.c
17b94a
+++ b/xlators/protocol/server/src/server-handshake.c
17b94a
@@ -36,7 +36,6 @@ gf_compare_client_version(rpcsvc_request_t *req, int fop_prognum,
17b94a
     return ret;
17b94a
 }
17b94a
 
17b94a
-
17b94a
 int
17b94a
 server_getspec(rpcsvc_request_t *req)
17b94a
 {
17b94a
@@ -629,7 +628,7 @@ server_setvolume(rpcsvc_request_t *req)
17b94a
 
17b94a
             /* TODO: what is this ? */
17b94a
             client->bound_xl->itable = inode_table_new(conf->inode_lru_limit,
17b94a
-                                                       client->bound_xl);
17b94a
+                                                       client->bound_xl, 0, 0);
17b94a
         }
17b94a
     }
17b94a
     UNLOCK(&conf->itable_lock);
17b94a
-- 
17b94a
1.8.3.1
17b94a