e7a346
From 7b12a7ea7a6b4945ad52f218b187ca440dfbef63 Mon Sep 17 00:00:00 2001
e7a346
From: Krutika Dhananjay <kdhananj@redhat.com>
e7a346
Date: Fri, 20 Jul 2018 10:52:22 +0530
e7a346
Subject: [PATCH 441/444] features/shard: Make lru limit of inode list
e7a346
 configurable
e7a346
e7a346
> Upstream: https://review.gluster.org/20544
e7a346
> BUG: 1605056
e7a346
> Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295
e7a346
e7a346
Currently this lru limit is hard-coded to 16384. This patch makes it
e7a346
configurable to make it easier to hit the lru limit and enable testing
e7a346
of different cases that arise when the limit is reached.
e7a346
e7a346
The option is features.shard-lru-limit. It is by design allowed to
e7a346
be configured only in init() but not in reconfigure(). This is to avoid
e7a346
all the complexity associated with eviction of least recently used shards
e7a346
when the list is shrunk.
e7a346
e7a346
Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295
e7a346
BUG: 1603118
e7a346
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
e7a346
Reviewed-on: https://code.engineering.redhat.com/gerrit/155126
e7a346
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
e7a346
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e7a346
---
e7a346
 libglusterfs/src/globals.h                      |  4 ++-
e7a346
 tests/bugs/shard/configure-lru-limit.t          | 48 +++++++++++++++++++++++++
e7a346
 xlators/features/shard/src/shard.c              | 19 ++++++++--
e7a346
 xlators/features/shard/src/shard.h              |  3 +-
e7a346
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |  6 ++++
e7a346
 5 files changed, 75 insertions(+), 5 deletions(-)
e7a346
 create mode 100644 tests/bugs/shard/configure-lru-limit.t
e7a346
e7a346
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
e7a346
index 97c4fad..555f44b 100644
e7a346
--- a/libglusterfs/src/globals.h
e7a346
+++ b/libglusterfs/src/globals.h
e7a346
@@ -43,7 +43,7 @@
e7a346
  */
e7a346
 #define GD_OP_VERSION_MIN  1 /* MIN is the fresh start op-version, mostly
e7a346
                                 should not change */
e7a346
-#define GD_OP_VERSION_MAX  GD_OP_VERSION_3_13_3 /* MAX VERSION is the maximum
e7a346
+#define GD_OP_VERSION_MAX  GD_OP_VERSION_4_2_0 /* MAX VERSION is the maximum
e7a346
                                                   count in VME table, should
e7a346
                                                   keep changing with
e7a346
                                                   introduction of newer
e7a346
@@ -111,6 +111,8 @@
e7a346
 
e7a346
 #define GD_OP_VERSION_3_13_3   31303 /* Op-version for GlusterFS 3.13.3 */
e7a346
 
e7a346
+#define GD_OP_VERSION_4_2_0    40200 /* Op-version for GlusterFS 4.2.0 */
e7a346
+
e7a346
 /* Downstream only change */
e7a346
 #define GD_OP_VERSION_3_11_2   31102 /* Op-version for RHGS 3.3.1-async */
e7a346
 #define GD_OP_VERSION_3_13_3   31303 /* Op-version for RHGS-3.4-Batch Update-1*/
e7a346
diff --git a/tests/bugs/shard/configure-lru-limit.t b/tests/bugs/shard/configure-lru-limit.t
e7a346
new file mode 100644
e7a346
index 0000000..a8ba8ed
e7a346
--- /dev/null
e7a346
+++ b/tests/bugs/shard/configure-lru-limit.t
e7a346
@@ -0,0 +1,48 @@
e7a346
+#!/bin/bash
e7a346
+
e7a346
+. $(dirname $0)/../../include.rc
e7a346
+. $(dirname $0)/../../volume.rc
e7a346
+cleanup
e7a346
+
e7a346
+TEST glusterd
e7a346
+TEST pidof glusterd
e7a346
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
e7a346
+TEST $CLI volume set $V0 features.shard on
e7a346
+TEST $CLI volume set $V0 features.shard-block-size 4MB
e7a346
+TEST $CLI volume set $V0 features.shard-lru-limit 25
e7a346
+TEST $CLI volume start $V0
e7a346
+
e7a346
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
e7a346
+
e7a346
+# Perform a write that would cause 25 shards to be created, 24 of them under .shard
e7a346
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=100
e7a346
+
e7a346
+statedump=$(generate_mount_statedump $V0)
e7a346
+sleep 1
e7a346
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
e7a346
+
e7a346
+# Base shard is never added to this list. So all other shards should make up for 24 inodes in lru list
e7a346
+EXPECT "24" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1)
e7a346
+
e7a346
+rm -f $statedump
e7a346
+
e7a346
+# Test to ensure there's no "reconfiguration" of the value once set.
e7a346
+TEST $CLI volume set $V0 features.shard-lru-limit 30
e7a346
+statedump=$(generate_mount_statedump $V0)
e7a346
+sleep 1
e7a346
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
e7a346
+rm -f $statedump
e7a346
+
e7a346
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
e7a346
+
e7a346
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
e7a346
+statedump=$(generate_mount_statedump $V0)
e7a346
+sleep 1
e7a346
+EXPECT "30" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
e7a346
+rm -f $statedump
e7a346
+
e7a346
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
e7a346
+TEST $CLI volume stop $V0
e7a346
+TEST $CLI volume delete $V0
e7a346
+
e7a346
+cleanup
e7a346
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
e7a346
index 6066a54..eb32168 100644
e7a346
--- a/xlators/features/shard/src/shard.c
e7a346
+++ b/xlators/features/shard/src/shard.c
e7a346
@@ -668,7 +668,7 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
e7a346
         shard_inode_ctx_get (linked_inode, this, &ctx;;
e7a346
 
e7a346
         if (list_empty (&ctx->ilist)) {
e7a346
-                if (priv->inode_count + 1 <= SHARD_MAX_INODES) {
e7a346
+                if (priv->inode_count + 1 <= priv->lru_limit) {
e7a346
                 /* If this inode was linked here for the first time (indicated
e7a346
                  * by empty list), and if there is still space in the priv list,
e7a346
                  * add this ctx to the tail of the list.
e7a346
@@ -6690,6 +6690,8 @@ init (xlator_t *this)
e7a346
 
e7a346
         GF_OPTION_INIT ("shard-deletion-rate", priv->deletion_rate, uint32, out);
e7a346
 
e7a346
+        GF_OPTION_INIT ("shard-lru-limit", priv->lru_limit, uint64, out);
e7a346
+
e7a346
         this->local_pool = mem_pool_new (shard_local_t, 128);
e7a346
         if (!this->local_pool) {
e7a346
                 ret = -1;
e7a346
@@ -6808,7 +6810,7 @@ shard_priv_dump (xlator_t *this)
e7a346
                             gf_uint64_2human_readable (priv->block_size));
e7a346
         gf_proc_dump_write ("inode-count", "%d", priv->inode_count);
e7a346
         gf_proc_dump_write ("ilist_head", "%p", &priv->ilist_head);
e7a346
-        gf_proc_dump_write ("lru-max-limit", "%d", SHARD_MAX_INODES);
e7a346
+        gf_proc_dump_write ("lru-max-limit", "%d", priv->lru_limit);
e7a346
 
e7a346
         return 0;
e7a346
 }
e7a346
@@ -6877,5 +6879,18 @@ struct volume_options options[] = {
e7a346
            .max = INT_MAX,
e7a346
            .description = "The number of shards to send deletes on at a time",
e7a346
         },
e7a346
+        {  .key = {"shard-lru-limit"},
e7a346
+           .type = GF_OPTION_TYPE_INT,
e7a346
+           .default_value = "16384",
e7a346
+           .min = 20,
e7a346
+           .max = INT_MAX,
e7a346
+           .description = "The number of resolved shard inodes to keep in "
e7a346
+                          "memory. A higher number means shards that are "
e7a346
+                          "resolved will remain in memory longer, avoiding "
e7a346
+                          "frequent lookups on them when they participate in "
e7a346
+                          "file operations. The option also has a bearing on "
e7a346
+                          "amount of memory consumed by these inodes and their "
e7a346
+                          "internal metadata",
e7a346
+        },
e7a346
         { .key = {NULL} },
e7a346
 };
e7a346
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
e7a346
index 5de098a..ac3813c 100644
e7a346
--- a/xlators/features/shard/src/shard.h
e7a346
+++ b/xlators/features/shard/src/shard.h
e7a346
@@ -23,8 +23,6 @@
e7a346
 #define SHARD_MAX_BLOCK_SIZE  (4 * GF_UNIT_TB)
e7a346
 #define SHARD_XATTR_PREFIX "trusted.glusterfs.shard."
e7a346
 #define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size"
e7a346
-#define SHARD_INODE_LRU_LIMIT 4096
e7a346
-#define SHARD_MAX_INODES 16384
e7a346
 /**
e7a346
  *  Bit masks for the valid flag, which is used while updating ctx
e7a346
 **/
e7a346
@@ -216,6 +214,7 @@ typedef struct shard_priv {
e7a346
         struct list_head ilist_head;
e7a346
         uint32_t deletion_rate;
e7a346
         shard_first_lookup_state_t first_lookup;
e7a346
+        uint64_t lru_limit;
e7a346
 } shard_priv_t;
e7a346
 
e7a346
 typedef struct {
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
e7a346
index a825f52..d442fe0 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
e7a346
@@ -3298,6 +3298,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
e7a346
           .op_version = GD_OP_VERSION_3_7_0,
e7a346
           .flags      = OPT_FLAG_CLIENT_OPT
e7a346
         },
e7a346
+        { .key        = "features.shard-lru-limit",
e7a346
+          .voltype    = "features/shard",
e7a346
+          .op_version = GD_OP_VERSION_4_2_0,
e7a346
+          .flags      = OPT_FLAG_CLIENT_OPT,
e7a346
+          .type       = NO_DOC,
e7a346
+        },
e7a346
         { .key        = "features.shard-deletion-rate",
e7a346
           .voltype    = "features/shard",
e7a346
           .op_version = GD_OP_VERSION_3_13_4,
e7a346
-- 
e7a346
1.8.3.1
e7a346