|
|
887953 |
From 7b12a7ea7a6b4945ad52f218b187ca440dfbef63 Mon Sep 17 00:00:00 2001
|
|
|
887953 |
From: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
887953 |
Date: Fri, 20 Jul 2018 10:52:22 +0530
|
|
|
887953 |
Subject: [PATCH 441/444] features/shard: Make lru limit of inode list
|
|
|
887953 |
configurable
|
|
|
887953 |
|
|
|
887953 |
> Upstream: https://review.gluster.org/20544
|
|
|
887953 |
> BUG: 1605056
|
|
|
887953 |
> Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295
|
|
|
887953 |
|
|
|
887953 |
Currently this lru limit is hard-coded to 16384. This patch makes it
|
|
|
887953 |
configurable to make it easier to hit the lru limit and enable testing
|
|
|
887953 |
of different cases that arise when the limit is reached.
|
|
|
887953 |
|
|
|
887953 |
The option is features.shard-lru-limit. It is by design allowed to
|
|
|
887953 |
be configured only in init() but not in reconfigure(). This is to avoid
|
|
|
887953 |
all the complexity associated with eviction of least recently used shards
|
|
|
887953 |
when the list is shrunk.
|
|
|
887953 |
|
|
|
887953 |
Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295
|
|
|
887953 |
BUG: 1603118
|
|
|
887953 |
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
887953 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/155126
|
|
|
887953 |
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
887953 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
887953 |
---
|
|
|
887953 |
libglusterfs/src/globals.h | 4 ++-
|
|
|
887953 |
tests/bugs/shard/configure-lru-limit.t | 48 +++++++++++++++++++++++++
|
|
|
887953 |
xlators/features/shard/src/shard.c | 19 ++++++++--
|
|
|
887953 |
xlators/features/shard/src/shard.h | 3 +-
|
|
|
887953 |
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 ++++
|
|
|
887953 |
5 files changed, 75 insertions(+), 5 deletions(-)
|
|
|
887953 |
create mode 100644 tests/bugs/shard/configure-lru-limit.t
|
|
|
887953 |
|
|
|
887953 |
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
|
|
|
887953 |
index 97c4fad..555f44b 100644
|
|
|
887953 |
--- a/libglusterfs/src/globals.h
|
|
|
887953 |
+++ b/libglusterfs/src/globals.h
|
|
|
887953 |
@@ -43,7 +43,7 @@
|
|
|
887953 |
*/
|
|
|
887953 |
#define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly
|
|
|
887953 |
should not change */
|
|
|
887953 |
-#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_3 /* MAX VERSION is the maximum
|
|
|
887953 |
+#define GD_OP_VERSION_MAX GD_OP_VERSION_4_2_0 /* MAX VERSION is the maximum
|
|
|
887953 |
count in VME table, should
|
|
|
887953 |
keep changing with
|
|
|
887953 |
introduction of newer
|
|
|
887953 |
@@ -111,6 +111,8 @@
|
|
|
887953 |
|
|
|
887953 |
#define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */
|
|
|
887953 |
|
|
|
887953 |
+#define GD_OP_VERSION_4_2_0 40200 /* Op-version for GlusterFS 4.2.0 */
|
|
|
887953 |
+
|
|
|
887953 |
/* Downstream only change */
|
|
|
887953 |
#define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */
|
|
|
887953 |
#define GD_OP_VERSION_3_13_3 31303 /* Op-version for RHGS-3.4-Batch Update-1*/
|
|
|
887953 |
diff --git a/tests/bugs/shard/configure-lru-limit.t b/tests/bugs/shard/configure-lru-limit.t
|
|
|
887953 |
new file mode 100644
|
|
|
887953 |
index 0000000..a8ba8ed
|
|
|
887953 |
--- /dev/null
|
|
|
887953 |
+++ b/tests/bugs/shard/configure-lru-limit.t
|
|
|
887953 |
@@ -0,0 +1,48 @@
|
|
|
887953 |
+#!/bin/bash
|
|
|
887953 |
+
|
|
|
887953 |
+. $(dirname $0)/../../include.rc
|
|
|
887953 |
+. $(dirname $0)/../../volume.rc
|
|
|
887953 |
+cleanup
|
|
|
887953 |
+
|
|
|
887953 |
+TEST glusterd
|
|
|
887953 |
+TEST pidof glusterd
|
|
|
887953 |
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
|
|
|
887953 |
+TEST $CLI volume set $V0 features.shard on
|
|
|
887953 |
+TEST $CLI volume set $V0 features.shard-block-size 4MB
|
|
|
887953 |
+TEST $CLI volume set $V0 features.shard-lru-limit 25
|
|
|
887953 |
+TEST $CLI volume start $V0
|
|
|
887953 |
+
|
|
|
887953 |
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
|
|
|
887953 |
+
|
|
|
887953 |
+# Perform a write that would cause 25 shards to be created, 24 of them under .shard
|
|
|
887953 |
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=100
|
|
|
887953 |
+
|
|
|
887953 |
+statedump=$(generate_mount_statedump $V0)
|
|
|
887953 |
+sleep 1
|
|
|
887953 |
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
|
|
|
887953 |
+
|
|
|
887953 |
+# Base shard is never added to this list. So all other shards should make up for 24 inodes in lru list
|
|
|
887953 |
+EXPECT "24" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1)
|
|
|
887953 |
+
|
|
|
887953 |
+rm -f $statedump
|
|
|
887953 |
+
|
|
|
887953 |
+# Test to ensure there's no "reconfiguration" of the value once set.
|
|
|
887953 |
+TEST $CLI volume set $V0 features.shard-lru-limit 30
|
|
|
887953 |
+statedump=$(generate_mount_statedump $V0)
|
|
|
887953 |
+sleep 1
|
|
|
887953 |
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
|
|
|
887953 |
+rm -f $statedump
|
|
|
887953 |
+
|
|
|
887953 |
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
887953 |
+
|
|
|
887953 |
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
|
|
|
887953 |
+statedump=$(generate_mount_statedump $V0)
|
|
|
887953 |
+sleep 1
|
|
|
887953 |
+EXPECT "30" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
|
|
|
887953 |
+rm -f $statedump
|
|
|
887953 |
+
|
|
|
887953 |
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
887953 |
+TEST $CLI volume stop $V0
|
|
|
887953 |
+TEST $CLI volume delete $V0
|
|
|
887953 |
+
|
|
|
887953 |
+cleanup
|
|
|
887953 |
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
|
|
|
887953 |
index 6066a54..eb32168 100644
|
|
|
887953 |
--- a/xlators/features/shard/src/shard.c
|
|
|
887953 |
+++ b/xlators/features/shard/src/shard.c
|
|
|
887953 |
@@ -668,7 +668,7 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
|
|
|
887953 |
shard_inode_ctx_get (linked_inode, this, &ctx;;
|
|
|
887953 |
|
|
|
887953 |
if (list_empty (&ctx->ilist)) {
|
|
|
887953 |
- if (priv->inode_count + 1 <= SHARD_MAX_INODES) {
|
|
|
887953 |
+ if (priv->inode_count + 1 <= priv->lru_limit) {
|
|
|
887953 |
/* If this inode was linked here for the first time (indicated
|
|
|
887953 |
* by empty list), and if there is still space in the priv list,
|
|
|
887953 |
* add this ctx to the tail of the list.
|
|
|
887953 |
@@ -6690,6 +6690,8 @@ init (xlator_t *this)
|
|
|
887953 |
|
|
|
887953 |
GF_OPTION_INIT ("shard-deletion-rate", priv->deletion_rate, uint32, out);
|
|
|
887953 |
|
|
|
887953 |
+ GF_OPTION_INIT ("shard-lru-limit", priv->lru_limit, uint64, out);
|
|
|
887953 |
+
|
|
|
887953 |
this->local_pool = mem_pool_new (shard_local_t, 128);
|
|
|
887953 |
if (!this->local_pool) {
|
|
|
887953 |
ret = -1;
|
|
|
887953 |
@@ -6808,7 +6810,7 @@ shard_priv_dump (xlator_t *this)
|
|
|
887953 |
gf_uint64_2human_readable (priv->block_size));
|
|
|
887953 |
gf_proc_dump_write ("inode-count", "%d", priv->inode_count);
|
|
|
887953 |
gf_proc_dump_write ("ilist_head", "%p", &priv->ilist_head);
|
|
|
887953 |
- gf_proc_dump_write ("lru-max-limit", "%d", SHARD_MAX_INODES);
|
|
|
887953 |
+ gf_proc_dump_write ("lru-max-limit", "%d", priv->lru_limit);
|
|
|
887953 |
|
|
|
887953 |
return 0;
|
|
|
887953 |
}
|
|
|
887953 |
@@ -6877,5 +6879,18 @@ struct volume_options options[] = {
|
|
|
887953 |
.max = INT_MAX,
|
|
|
887953 |
.description = "The number of shards to send deletes on at a time",
|
|
|
887953 |
},
|
|
|
887953 |
+ { .key = {"shard-lru-limit"},
|
|
|
887953 |
+ .type = GF_OPTION_TYPE_INT,
|
|
|
887953 |
+ .default_value = "16384",
|
|
|
887953 |
+ .min = 20,
|
|
|
887953 |
+ .max = INT_MAX,
|
|
|
887953 |
+ .description = "The number of resolved shard inodes to keep in "
|
|
|
887953 |
+ "memory. A higher number means shards that are "
|
|
|
887953 |
+ "resolved will remain in memory longer, avoiding "
|
|
|
887953 |
+ "frequent lookups on them when they participate in "
|
|
|
887953 |
+ "file operations. The option also has a bearing on "
|
|
|
887953 |
+ "amount of memory consumed by these inodes and their "
|
|
|
887953 |
+ "internal metadata",
|
|
|
887953 |
+ },
|
|
|
887953 |
{ .key = {NULL} },
|
|
|
887953 |
};
|
|
|
887953 |
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
|
|
|
887953 |
index 5de098a..ac3813c 100644
|
|
|
887953 |
--- a/xlators/features/shard/src/shard.h
|
|
|
887953 |
+++ b/xlators/features/shard/src/shard.h
|
|
|
887953 |
@@ -23,8 +23,6 @@
|
|
|
887953 |
#define SHARD_MAX_BLOCK_SIZE (4 * GF_UNIT_TB)
|
|
|
887953 |
#define SHARD_XATTR_PREFIX "trusted.glusterfs.shard."
|
|
|
887953 |
#define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size"
|
|
|
887953 |
-#define SHARD_INODE_LRU_LIMIT 4096
|
|
|
887953 |
-#define SHARD_MAX_INODES 16384
|
|
|
887953 |
/**
|
|
|
887953 |
* Bit masks for the valid flag, which is used while updating ctx
|
|
|
887953 |
**/
|
|
|
887953 |
@@ -216,6 +214,7 @@ typedef struct shard_priv {
|
|
|
887953 |
struct list_head ilist_head;
|
|
|
887953 |
uint32_t deletion_rate;
|
|
|
887953 |
shard_first_lookup_state_t first_lookup;
|
|
|
887953 |
+ uint64_t lru_limit;
|
|
|
887953 |
} shard_priv_t;
|
|
|
887953 |
|
|
|
887953 |
typedef struct {
|
|
|
887953 |
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
887953 |
index a825f52..d442fe0 100644
|
|
|
887953 |
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
887953 |
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
887953 |
@@ -3298,6 +3298,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
|
|
|
887953 |
.op_version = GD_OP_VERSION_3_7_0,
|
|
|
887953 |
.flags = OPT_FLAG_CLIENT_OPT
|
|
|
887953 |
},
|
|
|
887953 |
+ { .key = "features.shard-lru-limit",
|
|
|
887953 |
+ .voltype = "features/shard",
|
|
|
887953 |
+ .op_version = GD_OP_VERSION_4_2_0,
|
|
|
887953 |
+ .flags = OPT_FLAG_CLIENT_OPT,
|
|
|
887953 |
+ .type = NO_DOC,
|
|
|
887953 |
+ },
|
|
|
887953 |
{ .key = "features.shard-deletion-rate",
|
|
|
887953 |
.voltype = "features/shard",
|
|
|
887953 |
.op_version = GD_OP_VERSION_3_13_4,
|
|
|
887953 |
--
|
|
|
887953 |
1.8.3.1
|
|
|
887953 |
|