Blob Blame History Raw
From 7b12a7ea7a6b4945ad52f218b187ca440dfbef63 Mon Sep 17 00:00:00 2001
From: Krutika Dhananjay <kdhananj@redhat.com>
Date: Fri, 20 Jul 2018 10:52:22 +0530
Subject: [PATCH 441/444] features/shard: Make lru limit of inode list
 configurable

> Upstream: https://review.gluster.org/20544
> BUG: 1605056
> Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295

Currently this lru limit is hard-coded to 16384. This patch makes it
configurable to make it easier to hit the lru limit and enable testing
of different cases that arise when the limit is reached.

The option is features.shard-lru-limit. It is by design allowed to
be configured only in init() but not in reconfigure(). This is to avoid
all the complexity associated with eviction of least recently used shards
when the list is shrunk.

Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295
BUG: 1603118
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/155126
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
 libglusterfs/src/globals.h                      |  4 ++-
 tests/bugs/shard/configure-lru-limit.t          | 48 +++++++++++++++++++++++++
 xlators/features/shard/src/shard.c              | 19 ++++++++--
 xlators/features/shard/src/shard.h              |  3 +-
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |  6 ++++
 5 files changed, 75 insertions(+), 5 deletions(-)
 create mode 100644 tests/bugs/shard/configure-lru-limit.t

diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index 97c4fad..555f44b 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -43,7 +43,7 @@
  */
 #define GD_OP_VERSION_MIN  1 /* MIN is the fresh start op-version, mostly
                                 should not change */
-#define GD_OP_VERSION_MAX  GD_OP_VERSION_3_13_3 /* MAX VERSION is the maximum
+#define GD_OP_VERSION_MAX  GD_OP_VERSION_4_2_0 /* MAX VERSION is the maximum
                                                   count in VME table, should
                                                   keep changing with
                                                   introduction of newer
@@ -111,6 +111,8 @@
 
 #define GD_OP_VERSION_3_13_3   31303 /* Op-version for GlusterFS 3.13.3 */
 
+#define GD_OP_VERSION_4_2_0    40200 /* Op-version for GlusterFS 4.2.0 */
+
 /* Downstream only change */
 #define GD_OP_VERSION_3_11_2   31102 /* Op-version for RHGS 3.3.1-async */
 #define GD_OP_VERSION_3_13_3   31303 /* Op-version for RHGS-3.4-Batch Update-1*/
diff --git a/tests/bugs/shard/configure-lru-limit.t b/tests/bugs/shard/configure-lru-limit.t
new file mode 100644
index 0000000..a8ba8ed
--- /dev/null
+++ b/tests/bugs/shard/configure-lru-limit.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Perform a write that would cause 25 shards to be created, 24 of them under .shard
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=100
+
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+
+# Base shard is never added to this list. So all other shards should make up for 24 inodes in lru list
+EXPECT "24" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1)
+
+rm -f $statedump
+
+# Test to ensure there's no "reconfiguration" of the value once set.
+TEST $CLI volume set $V0 features.shard-lru-limit 30
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+rm -f $statedump
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "30" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+rm -f $statedump
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 6066a54..eb32168 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -668,7 +668,7 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
         shard_inode_ctx_get (linked_inode, this, &ctx);
 
         if (list_empty (&ctx->ilist)) {
-                if (priv->inode_count + 1 <= SHARD_MAX_INODES) {
+                if (priv->inode_count + 1 <= priv->lru_limit) {
                 /* If this inode was linked here for the first time (indicated
                  * by empty list), and if there is still space in the priv list,
                  * add this ctx to the tail of the list.
@@ -6690,6 +6690,8 @@ init (xlator_t *this)
 
         GF_OPTION_INIT ("shard-deletion-rate", priv->deletion_rate, uint32, out);
 
+        GF_OPTION_INIT ("shard-lru-limit", priv->lru_limit, uint64, out);
+
         this->local_pool = mem_pool_new (shard_local_t, 128);
         if (!this->local_pool) {
                 ret = -1;
@@ -6808,7 +6810,7 @@ shard_priv_dump (xlator_t *this)
                             gf_uint64_2human_readable (priv->block_size));
         gf_proc_dump_write ("inode-count", "%d", priv->inode_count);
         gf_proc_dump_write ("ilist_head", "%p", &priv->ilist_head);
-        gf_proc_dump_write ("lru-max-limit", "%d", SHARD_MAX_INODES);
+        gf_proc_dump_write ("lru-max-limit", "%d", priv->lru_limit);
 
         return 0;
 }
@@ -6877,5 +6879,18 @@ struct volume_options options[] = {
            .max = INT_MAX,
            .description = "The number of shards to send deletes on at a time",
         },
+        {  .key = {"shard-lru-limit"},
+           .type = GF_OPTION_TYPE_INT,
+           .default_value = "16384",
+           .min = 20,
+           .max = INT_MAX,
+           .description = "The number of resolved shard inodes to keep in "
+                          "memory. A higher number means shards that are "
+                          "resolved will remain in memory longer, avoiding "
+                          "frequent lookups on them when they participate in "
+                          "file operations. The option also has a bearing on "
+                          "amount of memory consumed by these inodes and their "
+                          "internal metadata",
+        },
         { .key = {NULL} },
 };
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index 5de098a..ac3813c 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -23,8 +23,6 @@
 #define SHARD_MAX_BLOCK_SIZE  (4 * GF_UNIT_TB)
 #define SHARD_XATTR_PREFIX "trusted.glusterfs.shard."
 #define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size"
-#define SHARD_INODE_LRU_LIMIT 4096
-#define SHARD_MAX_INODES 16384
 /**
  *  Bit masks for the valid flag, which is used while updating ctx
 **/
@@ -216,6 +214,7 @@ typedef struct shard_priv {
         struct list_head ilist_head;
         uint32_t deletion_rate;
         shard_first_lookup_state_t first_lookup;
+        uint64_t lru_limit;
 } shard_priv_t;
 
 typedef struct {
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index a825f52..d442fe0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -3298,6 +3298,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
           .op_version = GD_OP_VERSION_3_7_0,
           .flags      = OPT_FLAG_CLIENT_OPT
         },
+        { .key        = "features.shard-lru-limit",
+          .voltype    = "features/shard",
+          .op_version = GD_OP_VERSION_4_2_0,
+          .flags      = OPT_FLAG_CLIENT_OPT,
+          .type       = NO_DOC,
+        },
         { .key        = "features.shard-deletion-rate",
           .voltype    = "features/shard",
           .op_version = GD_OP_VERSION_3_13_4,
-- 
1.8.3.1