887953
From 7b12a7ea7a6b4945ad52f218b187ca440dfbef63 Mon Sep 17 00:00:00 2001
887953
From: Krutika Dhananjay <kdhananj@redhat.com>
887953
Date: Fri, 20 Jul 2018 10:52:22 +0530
887953
Subject: [PATCH 441/444] features/shard: Make lru limit of inode list
887953
 configurable
887953
887953
> Upstream: https://review.gluster.org/20544
887953
> BUG: 1605056
887953
> Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295
887953
887953
Currently this lru limit is hard-coded to 16384. This patch makes it
887953
configurable to make it easier to hit the lru limit and enable testing
887953
of different cases that arise when the limit is reached.
887953
887953
The option is features.shard-lru-limit. It is by design allowed to
887953
be configured only in init() but not in reconfigure(). This is to avoid
887953
all the complexity associated with eviction of least recently used shards
887953
when the list is shrunk.
887953
887953
Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295
887953
BUG: 1603118
887953
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
887953
Reviewed-on: https://code.engineering.redhat.com/gerrit/155126
887953
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
887953
Tested-by: RHGS Build Bot <nigelb@redhat.com>
887953
---
887953
 libglusterfs/src/globals.h                      |  4 ++-
887953
 tests/bugs/shard/configure-lru-limit.t          | 48 +++++++++++++++++++++++++
887953
 xlators/features/shard/src/shard.c              | 19 ++++++++--
887953
 xlators/features/shard/src/shard.h              |  3 +-
887953
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |  6 ++++
887953
 5 files changed, 75 insertions(+), 5 deletions(-)
887953
 create mode 100644 tests/bugs/shard/configure-lru-limit.t
887953
887953
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
887953
index 97c4fad..555f44b 100644
887953
--- a/libglusterfs/src/globals.h
887953
+++ b/libglusterfs/src/globals.h
887953
@@ -43,7 +43,7 @@
887953
  */
887953
 #define GD_OP_VERSION_MIN  1 /* MIN is the fresh start op-version, mostly
887953
                                 should not change */
887953
-#define GD_OP_VERSION_MAX  GD_OP_VERSION_3_13_3 /* MAX VERSION is the maximum
887953
+#define GD_OP_VERSION_MAX  GD_OP_VERSION_4_2_0 /* MAX VERSION is the maximum
887953
                                                   count in VME table, should
887953
                                                   keep changing with
887953
                                                   introduction of newer
887953
@@ -111,6 +111,8 @@
887953
 
887953
 #define GD_OP_VERSION_3_13_3   31303 /* Op-version for GlusterFS 3.13.3 */
887953
 
887953
+#define GD_OP_VERSION_4_2_0    40200 /* Op-version for GlusterFS 4.2.0 */
887953
+
887953
 /* Downstream only change */
887953
 #define GD_OP_VERSION_3_11_2   31102 /* Op-version for RHGS 3.3.1-async */
887953
 #define GD_OP_VERSION_3_13_3   31303 /* Op-version for RHGS-3.4-Batch Update-1*/
887953
diff --git a/tests/bugs/shard/configure-lru-limit.t b/tests/bugs/shard/configure-lru-limit.t
887953
new file mode 100644
887953
index 0000000..a8ba8ed
887953
--- /dev/null
887953
+++ b/tests/bugs/shard/configure-lru-limit.t
887953
@@ -0,0 +1,48 @@
887953
+#!/bin/bash
887953
+
887953
+. $(dirname $0)/../../include.rc
887953
+. $(dirname $0)/../../volume.rc
887953
+cleanup
887953
+
887953
+TEST glusterd
887953
+TEST pidof glusterd
887953
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
887953
+TEST $CLI volume set $V0 features.shard on
887953
+TEST $CLI volume set $V0 features.shard-block-size 4MB
887953
+TEST $CLI volume set $V0 features.shard-lru-limit 25
887953
+TEST $CLI volume start $V0
887953
+
887953
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
887953
+
887953
+# Perform a write that would cause 25 shards to be created, 24 of them under .shard
887953
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=100
887953
+
887953
+statedump=$(generate_mount_statedump $V0)
887953
+sleep 1
887953
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
887953
+
887953
+# Base shard is never added to this list. So all other shards should make up for 24 inodes in lru list
887953
+EXPECT "24" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1)
887953
+
887953
+rm -f $statedump
887953
+
887953
+# Test to ensure there's no "reconfiguration" of the value once set.
887953
+TEST $CLI volume set $V0 features.shard-lru-limit 30
887953
+statedump=$(generate_mount_statedump $V0)
887953
+sleep 1
887953
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
887953
+rm -f $statedump
887953
+
887953
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
887953
+
887953
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
887953
+statedump=$(generate_mount_statedump $V0)
887953
+sleep 1
887953
+EXPECT "30" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
887953
+rm -f $statedump
887953
+
887953
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
887953
+TEST $CLI volume stop $V0
887953
+TEST $CLI volume delete $V0
887953
+
887953
+cleanup
887953
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
887953
index 6066a54..eb32168 100644
887953
--- a/xlators/features/shard/src/shard.c
887953
+++ b/xlators/features/shard/src/shard.c
887953
@@ -668,7 +668,7 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
887953
         shard_inode_ctx_get (linked_inode, this, &ctx;;
887953
 
887953
         if (list_empty (&ctx->ilist)) {
887953
-                if (priv->inode_count + 1 <= SHARD_MAX_INODES) {
887953
+                if (priv->inode_count + 1 <= priv->lru_limit) {
887953
                 /* If this inode was linked here for the first time (indicated
887953
                  * by empty list), and if there is still space in the priv list,
887953
                  * add this ctx to the tail of the list.
887953
@@ -6690,6 +6690,8 @@ init (xlator_t *this)
887953
 
887953
         GF_OPTION_INIT ("shard-deletion-rate", priv->deletion_rate, uint32, out);
887953
 
887953
+        GF_OPTION_INIT ("shard-lru-limit", priv->lru_limit, uint64, out);
887953
+
887953
         this->local_pool = mem_pool_new (shard_local_t, 128);
887953
         if (!this->local_pool) {
887953
                 ret = -1;
887953
@@ -6808,7 +6810,7 @@ shard_priv_dump (xlator_t *this)
887953
                             gf_uint64_2human_readable (priv->block_size));
887953
         gf_proc_dump_write ("inode-count", "%d", priv->inode_count);
887953
         gf_proc_dump_write ("ilist_head", "%p", &priv->ilist_head);
887953
-        gf_proc_dump_write ("lru-max-limit", "%d", SHARD_MAX_INODES);
887953
+        gf_proc_dump_write ("lru-max-limit", "%d", priv->lru_limit);
887953
 
887953
         return 0;
887953
 }
887953
@@ -6877,5 +6879,18 @@ struct volume_options options[] = {
887953
            .max = INT_MAX,
887953
            .description = "The number of shards to send deletes on at a time",
887953
         },
887953
+        {  .key = {"shard-lru-limit"},
887953
+           .type = GF_OPTION_TYPE_INT,
887953
+           .default_value = "16384",
887953
+           .min = 20,
887953
+           .max = INT_MAX,
887953
+           .description = "The number of resolved shard inodes to keep in "
887953
+                          "memory. A higher number means shards that are "
887953
+                          "resolved will remain in memory longer, avoiding "
887953
+                          "frequent lookups on them when they participate in "
887953
+                          "file operations. The option also has a bearing on "
887953
+                          "amount of memory consumed by these inodes and their "
887953
+                          "internal metadata",
887953
+        },
887953
         { .key = {NULL} },
887953
 };
887953
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
887953
index 5de098a..ac3813c 100644
887953
--- a/xlators/features/shard/src/shard.h
887953
+++ b/xlators/features/shard/src/shard.h
887953
@@ -23,8 +23,6 @@
887953
 #define SHARD_MAX_BLOCK_SIZE  (4 * GF_UNIT_TB)
887953
 #define SHARD_XATTR_PREFIX "trusted.glusterfs.shard."
887953
 #define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size"
887953
-#define SHARD_INODE_LRU_LIMIT 4096
887953
-#define SHARD_MAX_INODES 16384
887953
 /**
887953
  *  Bit masks for the valid flag, which is used while updating ctx
887953
 **/
887953
@@ -216,6 +214,7 @@ typedef struct shard_priv {
887953
         struct list_head ilist_head;
887953
         uint32_t deletion_rate;
887953
         shard_first_lookup_state_t first_lookup;
887953
+        uint64_t lru_limit;
887953
 } shard_priv_t;
887953
 
887953
 typedef struct {
887953
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
887953
index a825f52..d442fe0 100644
887953
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
887953
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
887953
@@ -3298,6 +3298,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
887953
           .op_version = GD_OP_VERSION_3_7_0,
887953
           .flags      = OPT_FLAG_CLIENT_OPT
887953
         },
887953
+        { .key        = "features.shard-lru-limit",
887953
+          .voltype    = "features/shard",
887953
+          .op_version = GD_OP_VERSION_4_2_0,
887953
+          .flags      = OPT_FLAG_CLIENT_OPT,
887953
+          .type       = NO_DOC,
887953
+        },
887953
         { .key        = "features.shard-deletion-rate",
887953
           .voltype    = "features/shard",
887953
           .op_version = GD_OP_VERSION_3_13_4,
887953
-- 
887953
1.8.3.1
887953