|
|
21ab4e |
From fc94bc56c6ba0eb9876be606092d6aa8407af0ae Mon Sep 17 00:00:00 2001
|
|
|
21ab4e |
From: Poornima G <pgurusid@redhat.com>
|
|
|
21ab4e |
Date: Thu, 13 Apr 2017 16:20:29 +0530
|
|
|
21ab4e |
Subject: [PATCH 383/393] dht: Add readdir-ahead in rebalance graph if
|
|
|
21ab4e |
parallel-readdir is on
|
|
|
21ab4e |
|
|
|
21ab4e |
Issue:
|
|
|
21ab4e |
The value of linkto xattr is generally the name of the dht's
|
|
|
21ab4e |
next subvol, this requires that the next subvol of dht is not
|
|
|
21ab4e |
changed for the life time of the volume. But with parallel
|
|
|
21ab4e |
readdir enabled, the readdir-ahead loaded below dht, is optional.
|
|
|
21ab4e |
The linkto xattr for first subvol, when:
|
|
|
21ab4e |
- parallel readdir is enabled : "<volname>-readdir-head-0"
|
|
|
21ab4e |
- plain distribute volume : "<volname>-client-0"
|
|
|
21ab4e |
- distribute replicate volume : "<volname>-afr-0"
|
|
|
21ab4e |
|
|
|
21ab4e |
The value of linkto xattr is "<volname>-readdir-head-0" when
|
|
|
21ab4e |
parallel readdir is enabled, and is "<volname>-client-0" if
|
|
|
21ab4e |
its disabled. But the dht_lookup takes care of healing if it
|
|
|
21ab4e |
cannot identify which linkto subvol, the xattr points to.
|
|
|
21ab4e |
|
|
|
21ab4e |
In dht_lookup_cbk, if linkto xattr is found to be "<volname>-client-0"
|
|
|
21ab4e |
and parallel readdir is enabled, then it cannot understand the
|
|
|
21ab4e |
value "<volname>-client-0" as it expects "<volname>-readdir-head-0".
|
|
|
21ab4e |
In that case, dht_lookup_everywhere is issued and then the linkto file
|
|
|
21ab4e |
is unlinked and recreated with the right linkto xattr. The issue is
|
|
|
21ab4e |
when parallel readdir is enabled, mount point accesses the file
|
|
|
21ab4e |
that is currently being migrated. Since rebalance process doesn't
|
|
|
21ab4e |
have parallel-readdir feature, it expects "<volname>-client-0"
|
|
|
21ab4e |
where as mount expects "<volname>-readdir-head-0". Thus at some point
|
|
|
21ab4e |
either the mount or rebalance will fail.
|
|
|
21ab4e |
|
|
|
21ab4e |
Solution:
|
|
|
21ab4e |
Enable parallel-readdir for rebalance as well and then do not
|
|
|
21ab4e |
allow enabling/disabling parallel-readdir if rebalance is in
|
|
|
21ab4e |
progress.
|
|
|
21ab4e |
|
|
|
21ab4e |
>Reviewed-on: https://review.gluster.org/17056
|
|
|
21ab4e |
>Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
>Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
>Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
|
|
|
21ab4e |
>Signed-off-by: Poornima G <pgurusid@redhat.com>
|
|
|
21ab4e |
|
|
|
21ab4e |
Change-Id: I241ab966bdd850e667f7768840540546f5289483
|
|
|
21ab4e |
BUG: 1442026
|
|
|
21ab4e |
Signed-off-by: Poornima G <pgurusid@redhat.com>
|
|
|
21ab4e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/103637
|
|
|
21ab4e |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
---
|
|
|
21ab4e |
libglusterfs/src/inode.c | 9 +++++
|
|
|
21ab4e |
tests/bugs/distribute/bug-1161311.t | 1 +
|
|
|
21ab4e |
tests/bugs/readdir-ahead/bug-1436090.t | 44 +++++++++++++++++++++++++
|
|
|
21ab4e |
xlators/mgmt/glusterd/src/glusterd-volgen.c | 6 ++--
|
|
|
21ab4e |
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 26 ++++++++++++++-
|
|
|
21ab4e |
5 files changed, 81 insertions(+), 5 deletions(-)
|
|
|
21ab4e |
create mode 100755 tests/bugs/readdir-ahead/bug-1436090.t
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
|
|
|
21ab4e |
index 650a301..747c1f1 100644
|
|
|
21ab4e |
--- a/libglusterfs/src/inode.c
|
|
|
21ab4e |
+++ b/libglusterfs/src/inode.c
|
|
|
21ab4e |
@@ -2528,6 +2528,15 @@ inode_ctx_size (inode_t *inode)
|
|
|
21ab4e |
old_THIS = THIS;
|
|
|
21ab4e |
THIS = xl;
|
|
|
21ab4e |
|
|
|
21ab4e |
+ /* If inode ref is taken when THIS is global xlator,
|
|
|
21ab4e |
+ * the ctx xl_key is set, but the value is NULL.
|
|
|
21ab4e |
+ * For global xlator the cbks can be NULL, hence check
|
|
|
21ab4e |
+ * for the same */
|
|
|
21ab4e |
+ if (!xl->cbks) {
|
|
|
21ab4e |
+ THIS = old_THIS;
|
|
|
21ab4e |
+ continue;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
if (xl->cbks->ictxsize)
|
|
|
21ab4e |
size += xl->cbks->ictxsize (xl, inode);
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/tests/bugs/distribute/bug-1161311.t b/tests/bugs/distribute/bug-1161311.t
|
|
|
21ab4e |
index c5a7f04..93e9d03 100755
|
|
|
21ab4e |
--- a/tests/bugs/distribute/bug-1161311.t
|
|
|
21ab4e |
+++ b/tests/bugs/distribute/bug-1161311.t
|
|
|
21ab4e |
@@ -43,6 +43,7 @@ EXPECT "$V0" volinfo_field $V0 'Volume Name';
|
|
|
21ab4e |
EXPECT 'Created' volinfo_field $V0 'Status';
|
|
|
21ab4e |
EXPECT '3' brick_count $V0
|
|
|
21ab4e |
|
|
|
21ab4e |
+TEST $CLI volume set $V0 parallel-readdir on
|
|
|
21ab4e |
TEST $CLI volume start $V0;
|
|
|
21ab4e |
EXPECT 'Started' volinfo_field $V0 'Status';
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/tests/bugs/readdir-ahead/bug-1436090.t b/tests/bugs/readdir-ahead/bug-1436090.t
|
|
|
21ab4e |
new file mode 100755
|
|
|
21ab4e |
index 0000000..58e9093
|
|
|
21ab4e |
--- /dev/null
|
|
|
21ab4e |
+++ b/tests/bugs/readdir-ahead/bug-1436090.t
|
|
|
21ab4e |
@@ -0,0 +1,44 @@
|
|
|
21ab4e |
+#!/bin/bash
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+. $(dirname $0)/../../include.rc
|
|
|
21ab4e |
+. $(dirname $0)/../../volume.rc
|
|
|
21ab4e |
+. $(dirname $0)/../../cluster.rc
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+cleanup;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+TEST launch_cluster 2;
|
|
|
21ab4e |
+TEST $CLI_1 peer probe $H2;
|
|
|
21ab4e |
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
|
|
|
21ab4e |
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+$CLI_1 volume start $V0
|
|
|
21ab4e |
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
|
|
|
21ab4e |
+TEST mkdir $M0/dir1
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+# Create a large file (3.2 GB), so that rebalance takes time
|
|
|
21ab4e |
+# Reading from /dev/urandom is slow, so we will cat it together
|
|
|
21ab4e |
+dd if=/dev/urandom of=/tmp/FILE2 bs=64k count=10240
|
|
|
21ab4e |
+for i in {1..5}; do
|
|
|
21ab4e |
+ cat /tmp/FILE2 >> $M0/dir1/foo
|
|
|
21ab4e |
+done
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+TEST mv $M0/dir1/foo $M0/dir1/bar
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+TEST $CLI_1 volume rebalance $V0 start force
|
|
|
21ab4e |
+TEST ! $CLI_1 volume set $V0 parallel-readdir on
|
|
|
21ab4e |
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
|
|
|
21ab4e |
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 2 $V0
|
|
|
21ab4e |
+TEST $CLI_1 volume set $V0 parallel-readdir on
|
|
|
21ab4e |
+TEST mv $M0/dir1/bar $M0/dir1/foo
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
21ab4e |
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
|
|
|
21ab4e |
+TEST $CLI_1 volume rebalance $V0 start force
|
|
|
21ab4e |
+TEST ln $M0/dir1/foo $M0/dir1/bar
|
|
|
21ab4e |
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
|
|
|
21ab4e |
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 2 $V0
|
|
|
21ab4e |
+cleanup;
|
|
|
21ab4e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
|
|
|
21ab4e |
index 6e52d44..faa6c72 100644
|
|
|
21ab4e |
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
|
|
|
21ab4e |
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
|
|
|
21ab4e |
@@ -3308,8 +3308,7 @@ volgen_graph_build_readdir_ahead (volgen_graph_t *graph,
|
|
|
21ab4e |
{
|
|
|
21ab4e |
int32_t clusters = 0;
|
|
|
21ab4e |
|
|
|
21ab4e |
- if (graph->type == GF_REBALANCED ||
|
|
|
21ab4e |
- graph->type == GF_QUOTAD ||
|
|
|
21ab4e |
+ if (graph->type == GF_QUOTAD ||
|
|
|
21ab4e |
graph->type == GF_SNAPD ||
|
|
|
21ab4e |
!glusterd_volinfo_get_boolean (volinfo, VKEY_PARALLEL_READDIR) ||
|
|
|
21ab4e |
!glusterd_volinfo_get_boolean (volinfo, VKEY_READDIR_AHEAD))
|
|
|
21ab4e |
@@ -3617,8 +3616,7 @@ client_graph_set_rda_options (volgen_graph_t *graph,
|
|
|
21ab4e |
if (dist_count <= 1)
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
|
|
|
21ab4e |
- if (graph->type == GF_REBALANCED ||
|
|
|
21ab4e |
- graph->type == GF_QUOTAD ||
|
|
|
21ab4e |
+ if (graph->type == GF_QUOTAD ||
|
|
|
21ab4e |
graph->type == GF_SNAPD ||
|
|
|
21ab4e |
!glusterd_volinfo_get_boolean (volinfo, VKEY_PARALLEL_READDIR) ||
|
|
|
21ab4e |
!glusterd_volinfo_get_boolean (volinfo, VKEY_READDIR_AHEAD))
|
|
|
21ab4e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
21ab4e |
index 4a1c780..8e729da 100644
|
|
|
21ab4e |
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
21ab4e |
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
|
|
|
21ab4e |
@@ -919,6 +919,30 @@ out:
|
|
|
21ab4e |
|
|
|
21ab4e |
|
|
|
21ab4e |
static int
|
|
|
21ab4e |
+validate_parallel_readdir (glusterd_volinfo_t *volinfo, dict_t *dict,
|
|
|
21ab4e |
+ char *key, char *value, char **op_errstr)
|
|
|
21ab4e |
+{
|
|
|
21ab4e |
+ int ret = -1;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ ret = validate_boolean (volinfo, dict, key, value, op_errstr);
|
|
|
21ab4e |
+ if (ret)
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ ret = glusterd_is_defrag_on (volinfo);
|
|
|
21ab4e |
+ if (ret) {
|
|
|
21ab4e |
+ gf_asprintf (op_errstr, "%s option should be set "
|
|
|
21ab4e |
+ "after rebalance is complete", key);
|
|
|
21ab4e |
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
|
|
|
21ab4e |
+ GD_MSG_INVALID_ENTRY, "%s", *op_errstr);
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+out:
|
|
|
21ab4e |
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ return ret;
|
|
|
21ab4e |
+}
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+static int
|
|
|
21ab4e |
validate_worm_period (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
|
|
|
21ab4e |
char *value, char **op_errstr)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
@@ -3024,7 +3048,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
|
|
|
21ab4e |
.value = "off",
|
|
|
21ab4e |
.type = DOC,
|
|
|
21ab4e |
.op_version = GD_OP_VERSION_3_10_0,
|
|
|
21ab4e |
- .validate_fn = validate_boolean,
|
|
|
21ab4e |
+ .validate_fn = validate_parallel_readdir,
|
|
|
21ab4e |
.description = "If this option is enabled, the readdir operation is "
|
|
|
21ab4e |
"performed parallely on all the bricks, thus improving"
|
|
|
21ab4e |
" the performance of readdir. Note that the performance"
|
|
|
21ab4e |
--
|
|
|
21ab4e |
1.8.3.1
|
|
|
21ab4e |
|