74096c
From a18f03cbf2b5652f8617cb4dd236bb4ca9838d96 Mon Sep 17 00:00:00 2001
74096c
From: Mohit Agrawal <moagrawa@redhat.com>
74096c
Date: Tue, 6 Oct 2020 16:54:15 +0530
74096c
Subject: [PATCH 509/511] core: configure optimum inode table hash_size for shd
74096c
74096c
In brick_mux environment a shd process consume high memory.
74096c
After print the statedump i have found it allocates 1M per afr xlator
74096c
for all bricks.In case of configure 4k volumes it consumes almost total
74096c
6G RSS size in which 4G consumes by inode_tables
74096c
74096c
[cluster/replicate.test1-replicate-0 - usage-type gf_common_mt_list_head memusage]
74096c
size=1273488
74096c
num_allocs=2
74096c
max_size=1273488
74096c
max_num_allocs=2
74096c
total_allocs=2
74096c
74096c
inode_new_table function allocates memory(1M) for a list of inode and dentry hash.
74096c
For shd lru_limit size is 1 so we don't need to create a big hash table so to reduce
74096c
RSS size for shd process pass optimum bucket count at the time of creating inode_table.
74096c
74096c
> Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb
74096c
> Fixes: #1538
74096c
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
74096c
> (Cherry pick from commit ca6bbc486e76fdb9a8e07119bb10d7fa45b2e93b)
74096c
> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1538)
74096c
74096c
Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb
74096c
BUG: 1898777
74096c
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/221191
74096c
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74096c
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74096c
---
74096c
 api/src/glfs-master.c                          |  2 +-
74096c
 libglusterfs/src/glusterfs/inode.h             | 17 +++++----
74096c
 libglusterfs/src/inode.c                       | 53 +++++++++++++++++---------
74096c
 xlators/cluster/afr/src/afr.c                  | 10 ++++-
74096c
 xlators/cluster/dht/src/dht-rebalance.c        |  3 +-
74096c
 xlators/cluster/ec/src/ec.c                    |  2 +-
74096c
 xlators/features/bit-rot/src/bitd/bit-rot.c    |  2 +-
74096c
 xlators/features/quota/src/quotad-helpers.c    |  2 +-
74096c
 xlators/features/trash/src/trash.c             |  4 +-
74096c
 xlators/mount/fuse/src/fuse-bridge.c           |  6 +--
74096c
 xlators/nfs/server/src/nfs.c                   |  2 +-
74096c
 xlators/protocol/server/src/server-handshake.c |  3 +-
74096c
 12 files changed, 66 insertions(+), 40 deletions(-)
74096c
74096c
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
74096c
index b4473b1..9e604d3 100644
74096c
--- a/api/src/glfs-master.c
74096c
+++ b/api/src/glfs-master.c
74096c
@@ -45,7 +45,7 @@ graph_setup(struct glfs *fs, glusterfs_graph_t *graph)
74096c
         }
74096c
 
74096c
         if (!new_subvol->itable) {
74096c
-            itable = inode_table_new(131072, new_subvol);
74096c
+            itable = inode_table_new(131072, new_subvol, 0, 0);
74096c
             if (!itable) {
74096c
                 errno = ENOMEM;
74096c
                 ret = -1;
74096c
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
74096c
index c875653..62c093d 100644
74096c
--- a/libglusterfs/src/glusterfs/inode.h
74096c
+++ b/libglusterfs/src/glusterfs/inode.h
74096c
@@ -35,11 +35,12 @@ typedef struct _dentry dentry_t;
74096c
 
74096c
 struct _inode_table {
74096c
     pthread_mutex_t lock;
74096c
-    size_t hashsize;    /* bucket size of inode hash and dentry hash */
74096c
-    char *name;         /* name of the inode table, just for gf_log() */
74096c
-    inode_t *root;      /* root directory inode, with number 1 */
74096c
-    xlator_t *xl;       /* xlator to be called to do purge */
74096c
-    uint32_t lru_limit; /* maximum LRU cache size */
74096c
+    size_t dentry_hashsize; /* Number of buckets for dentry hash*/
74096c
+    size_t inode_hashsize;  /* Size of inode hash table */
74096c
+    char *name;             /* name of the inode table, just for gf_log() */
74096c
+    inode_t *root;          /* root directory inode, with number 1 */
74096c
+    xlator_t *xl;           /* xlator to be called to do purge */
74096c
+    uint32_t lru_limit;     /* maximum LRU cache size */
74096c
     struct list_head *inode_hash; /* buckets for inode hash table */
74096c
     struct list_head *name_hash;  /* buckets for dentry hash table */
74096c
     struct list_head active; /* list of inodes currently active (in an fop) */
74096c
@@ -116,12 +117,14 @@ struct _inode {
74096c
 #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)
74096c
 
74096c
 inode_table_t *
74096c
-inode_table_new(uint32_t lru_limit, xlator_t *xl);
74096c
+inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dhash_size,
74096c
+                uint32_t inodehash_size);
74096c
 
74096c
 inode_table_t *
74096c
 inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
74096c
                              int32_t (*invalidator_fn)(xlator_t *, inode_t *),
74096c
-                             xlator_t *invalidator_xl);
74096c
+                             xlator_t *invalidator_xl, uint32_t dentry_hashsize,
74096c
+                             uint32_t inode_hashsize);
74096c
 
74096c
 void
74096c
 inode_table_destroy_all(glusterfs_ctx_t *ctx);
74096c
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
74096c
index 71b2d2a..98f8ea6 100644
74096c
--- a/libglusterfs/src/inode.c
74096c
+++ b/libglusterfs/src/inode.c
74096c
@@ -763,7 +763,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
74096c
         return NULL;
74096c
     }
74096c
 
74096c
-    int hash = hash_dentry(parent, name, table->hashsize);
74096c
+    int hash = hash_dentry(parent, name, table->dentry_hashsize);
74096c
 
74096c
     pthread_mutex_lock(&table->lock);
74096c
     {
74096c
@@ -839,7 +839,7 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
74096c
         return ret;
74096c
     }
74096c
 
74096c
-    int hash = hash_dentry(parent, name, table->hashsize);
74096c
+    int hash = hash_dentry(parent, name, table->dentry_hashsize);
74096c
 
74096c
     pthread_mutex_lock(&table->lock);
74096c
     {
74096c
@@ -903,7 +903,7 @@ inode_find(inode_table_t *table, uuid_t gfid)
74096c
         return NULL;
74096c
     }
74096c
 
74096c
-    int hash = hash_gfid(gfid, 65536);
74096c
+    int hash = hash_gfid(gfid, table->inode_hashsize);
74096c
 
74096c
     pthread_mutex_lock(&table->lock);
74096c
     {
74096c
@@ -964,7 +964,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
74096c
             return NULL;
74096c
         }
74096c
 
74096c
-        int ihash = hash_gfid(iatt->ia_gfid, 65536);
74096c
+        int ihash = hash_gfid(iatt->ia_gfid, table->inode_hashsize);
74096c
 
74096c
         old_inode = __inode_find(table, iatt->ia_gfid, ihash);
74096c
 
74096c
@@ -1043,7 +1043,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
74096c
     table = inode->table;
74096c
 
74096c
     if (parent && name) {
74096c
-        hash = hash_dentry(parent, name, table->hashsize);
74096c
+        hash = hash_dentry(parent, name, table->dentry_hashsize);
74096c
     }
74096c
 
74096c
     if (name && strchr(name, '/')) {
74096c
@@ -1262,7 +1262,7 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
74096c
     }
74096c
 
74096c
     if (dstdir && dstname) {
74096c
-        hash = hash_dentry(dstdir, dstname, table->hashsize);
74096c
+        hash = hash_dentry(dstdir, dstname, table->dentry_hashsize);
74096c
     }
74096c
 
74096c
     pthread_mutex_lock(&table->lock);
74096c
@@ -1626,7 +1626,8 @@ __inode_table_init_root(inode_table_t *table)
74096c
 inode_table_t *
74096c
 inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
74096c
                              int32_t (*invalidator_fn)(xlator_t *, inode_t *),
74096c
-                             xlator_t *invalidator_xl)
74096c
+                             xlator_t *invalidator_xl, uint32_t dentry_hashsize,
74096c
+                             uint32_t inode_hashsize)
74096c
 {
74096c
     inode_table_t *new = NULL;
74096c
     uint32_t mem_pool_size = lru_limit;
74096c
@@ -1644,7 +1645,19 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
74096c
     new->invalidator_fn = invalidator_fn;
74096c
     new->invalidator_xl = invalidator_xl;
74096c
 
74096c
-    new->hashsize = 14057; /* TODO: Random Number?? */
74096c
+    if (dentry_hashsize == 0) {
74096c
+        /* Prime number for uniform distribution */
74096c
+        new->dentry_hashsize = 14057;
74096c
+    } else {
74096c
+        new->dentry_hashsize = dentry_hashsize;
74096c
+    }
74096c
+
74096c
+    if (inode_hashsize == 0) {
74096c
+        /* The size of hash table always should be power of 2 */
74096c
+        new->inode_hashsize = 65536;
74096c
+    } else {
74096c
+        new->inode_hashsize = inode_hashsize;
74096c
+    }
74096c
 
74096c
     /* In case FUSE is initing the inode table. */
74096c
     if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
74096c
@@ -1658,13 +1671,13 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
74096c
     if (!new->dentry_pool)
74096c
         goto out;
74096c
 
74096c
-    new->inode_hash = (void *)GF_CALLOC(65536, sizeof(struct list_head),
74096c
-                                        gf_common_mt_list_head);
74096c
+    new->inode_hash = (void *)GF_CALLOC(
74096c
+        new->inode_hashsize, sizeof(struct list_head), gf_common_mt_list_head);
74096c
     if (!new->inode_hash)
74096c
         goto out;
74096c
 
74096c
-    new->name_hash = (void *)GF_CALLOC(new->hashsize, sizeof(struct list_head),
74096c
-                                       gf_common_mt_list_head);
74096c
+    new->name_hash = (void *)GF_CALLOC(
74096c
+        new->dentry_hashsize, sizeof(struct list_head), gf_common_mt_list_head);
74096c
     if (!new->name_hash)
74096c
         goto out;
74096c
 
74096c
@@ -1675,11 +1688,11 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
74096c
     if (!new->fd_mem_pool)
74096c
         goto out;
74096c
 
74096c
-    for (i = 0; i < 65536; i++) {
74096c
+    for (i = 0; i < new->inode_hashsize; i++) {
74096c
         INIT_LIST_HEAD(&new->inode_hash[i]);
74096c
     }
74096c
 
74096c
-    for (i = 0; i < new->hashsize; i++) {
74096c
+    for (i = 0; i < new->dentry_hashsize; i++) {
74096c
         INIT_LIST_HEAD(&new->name_hash[i]);
74096c
     }
74096c
 
74096c
@@ -1717,10 +1730,12 @@ out:
74096c
 }
74096c
 
74096c
 inode_table_t *
74096c
-inode_table_new(uint32_t lru_limit, xlator_t *xl)
74096c
+inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dentry_hashsize,
74096c
+                uint32_t inode_hashsize)
74096c
 {
74096c
     /* Only fuse for now requires the inode table with invalidator */
74096c
-    return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
74096c
+    return inode_table_with_invalidator(lru_limit, xl, NULL, NULL,
74096c
+                                        dentry_hashsize, inode_hashsize);
74096c
 }
74096c
 
74096c
 int
74096c
@@ -2439,8 +2454,10 @@ inode_table_dump(inode_table_t *itable, char *prefix)
74096c
         return;
74096c
     }
74096c
 
74096c
-    gf_proc_dump_build_key(key, prefix, "hashsize");
74096c
-    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->hashsize);
74096c
+    gf_proc_dump_build_key(key, prefix, "dentry_hashsize");
74096c
+    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->dentry_hashsize);
74096c
+    gf_proc_dump_build_key(key, prefix, "inode_hashsize");
74096c
+    gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->inode_hashsize);
74096c
     gf_proc_dump_build_key(key, prefix, "name");
74096c
     gf_proc_dump_write(key, "%s", itable->name);
74096c
 
74096c
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
74096c
index 8f9e71f..bfa464f 100644
74096c
--- a/xlators/cluster/afr/src/afr.c
74096c
+++ b/xlators/cluster/afr/src/afr.c
74096c
@@ -594,7 +594,15 @@ init(xlator_t *this)
74096c
         goto out;
74096c
     }
74096c
 
74096c
-    this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
74096c
+    if (priv->shd.iamshd) {
74096c
+        /* Number of hash bucket should be prime number so declare 131
74096c
+           total dentry hash buckets
74096c
+        */
74096c
+        this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 131, 128);
74096c
+    } else {
74096c
+        this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 0, 0);
74096c
+    }
74096c
+
74096c
     if (!this->itable) {
74096c
         ret = -ENOMEM;
74096c
         goto out;
74096c
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
74096c
index 16ac16c..072896d 100644
74096c
--- a/xlators/cluster/dht/src/dht-rebalance.c
74096c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
74096c
@@ -1168,7 +1168,6 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
74096c
             break;
74096c
         }
74096c
 
74096c
-
74096c
         offset += ret;
74096c
         total += ret;
74096c
 
74096c
@@ -2467,7 +2466,7 @@ dht_build_root_inode(xlator_t *this, inode_t **inode)
74096c
         0,
74096c
     };
74096c
 
74096c
-    itable = inode_table_new(0, this);
74096c
+    itable = inode_table_new(0, this, 0, 0);
74096c
     if (!itable)
74096c
         return;
74096c
 
74096c
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
74096c
index 3f31c74..4118c3b 100644
74096c
--- a/xlators/cluster/ec/src/ec.c
74096c
+++ b/xlators/cluster/ec/src/ec.c
74096c
@@ -734,7 +734,7 @@ init(xlator_t *this)
74096c
     GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
74096c
     GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
74096c
 
74096c
-    this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
74096c
+    this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this, 0, 0);
74096c
     if (!this->itable)
74096c
         goto failed;
74096c
 
74096c
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
74096c
index 424c0d5..4e0e798 100644
74096c
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
74096c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
74096c
@@ -1658,7 +1658,7 @@ notify(xlator_t *this, int32_t event, void *data, ...)
74096c
                 child->child_up = 1;
74096c
                 child->xl = subvol;
74096c
                 if (!child->table)
74096c
-                    child->table = inode_table_new(4096, subvol);
74096c
+                    child->table = inode_table_new(4096, subvol, 0, 0);
74096c
 
74096c
                 _br_qchild_event(this, child, br_brick_connect);
74096c
                 pthread_cond_signal(&priv->cond);
74096c
diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
74096c
index d9f0351..46ac116 100644
74096c
--- a/xlators/features/quota/src/quotad-helpers.c
74096c
+++ b/xlators/features/quota/src/quotad-helpers.c
74096c
@@ -32,7 +32,7 @@ get_quotad_aggregator_state(xlator_t *this, rpcsvc_request_t *req)
74096c
     UNLOCK(&priv->lock);
74096c
 
74096c
     if (active_subvol->itable == NULL)
74096c
-        active_subvol->itable = inode_table_new(4096, active_subvol);
74096c
+        active_subvol->itable = inode_table_new(4096, active_subvol, 0, 0);
74096c
 
74096c
     state->itable = active_subvol->itable;
74096c
 
74096c
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
74096c
index 93f020f..099c887 100644
74096c
--- a/xlators/features/trash/src/trash.c
74096c
+++ b/xlators/features/trash/src/trash.c
74096c
@@ -2261,7 +2261,7 @@ reconfigure(xlator_t *this, dict_t *options)
74096c
 
74096c
     if (!active_earlier && active_now) {
74096c
         if (!priv->trash_itable) {
74096c
-            priv->trash_itable = inode_table_new(0, this);
74096c
+            priv->trash_itable = inode_table_new(0, this, 0, 0);
74096c
             if (!priv->trash_itable) {
74096c
                 ret = -ENOMEM;
74096c
                 gf_log(this->name, GF_LOG_ERROR,
74096c
@@ -2533,7 +2533,7 @@ init(xlator_t *this)
74096c
     }
74096c
 
74096c
     if (priv->state) {
74096c
-        priv->trash_itable = inode_table_new(0, this);
74096c
+        priv->trash_itable = inode_table_new(0, this, 0, 0);
74096c
         if (!priv->trash_itable) {
74096c
             ret = -ENOMEM;
74096c
             priv->state = _gf_false;
74096c
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
74096c
index 1bddac2..919eea3 100644
74096c
--- a/xlators/mount/fuse/src/fuse-bridge.c
74096c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
74096c
@@ -6298,10 +6298,10 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph)
74096c
         }
74096c
 
74096c
 #if FUSE_KERNEL_MINOR_VERSION >= 11
74096c
-        itable = inode_table_with_invalidator(priv->lru_limit, graph->top,
74096c
-                                              fuse_inode_invalidate_fn, this);
74096c
+        itable = inode_table_with_invalidator(
74096c
+            priv->lru_limit, graph->top, fuse_inode_invalidate_fn, this, 0, 0);
74096c
 #else
74096c
-        itable = inode_table_new(0, graph->top);
74096c
+        itable = inode_table_new(0, graph->top, 0, 0);
74096c
 #endif
74096c
         if (!itable) {
74096c
             ret = -1;
74096c
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
74096c
index ebded41..402be30 100644
74096c
--- a/xlators/nfs/server/src/nfs.c
74096c
+++ b/xlators/nfs/server/src/nfs.c
74096c
@@ -564,7 +564,7 @@ nfs_init_subvolume(struct nfs_state *nfs, xlator_t *xl)
74096c
         return -1;
74096c
 
74096c
     lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT;
74096c
-    xl->itable = inode_table_new(lrusize, xl);
74096c
+    xl->itable = inode_table_new(lrusize, xl, 0, 0);
74096c
     if (!xl->itable) {
74096c
         gf_msg(GF_NFS, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY,
74096c
                "Failed to allocate inode table");
74096c
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
74096c
index 1d1177d..eeca73c 100644
74096c
--- a/xlators/protocol/server/src/server-handshake.c
74096c
+++ b/xlators/protocol/server/src/server-handshake.c
74096c
@@ -36,7 +36,6 @@ gf_compare_client_version(rpcsvc_request_t *req, int fop_prognum,
74096c
     return ret;
74096c
 }
74096c
 
74096c
-
74096c
 int
74096c
 server_getspec(rpcsvc_request_t *req)
74096c
 {
74096c
@@ -629,7 +628,7 @@ server_setvolume(rpcsvc_request_t *req)
74096c
 
74096c
             /* TODO: what is this ? */
74096c
             client->bound_xl->itable = inode_table_new(conf->inode_lru_limit,
74096c
-                                                       client->bound_xl);
74096c
+                                                       client->bound_xl, 0, 0);
74096c
         }
74096c
     }
74096c
     UNLOCK(&conf->itable_lock);
74096c
-- 
74096c
1.8.3.1
74096c