21ab4e
From fc94bc56c6ba0eb9876be606092d6aa8407af0ae Mon Sep 17 00:00:00 2001
21ab4e
From: Poornima G <pgurusid@redhat.com>
21ab4e
Date: Thu, 13 Apr 2017 16:20:29 +0530
21ab4e
Subject: [PATCH 383/393] dht: Add readdir-ahead in rebalance graph if
21ab4e
 parallel-readdir is on
21ab4e
21ab4e
Issue:
21ab4e
The value of linkto xattr is generally the name of the dht's
21ab4e
next subvol, this requires that the next subvol of dht is not
21ab4e
changed for the life time of the volume. But with parallel
21ab4e
readdir enabled, the readdir-ahead loaded below dht, is optional.
21ab4e
The linkto xattr for first subvol, when:
21ab4e
- parallel readdir is enabled : "<volname>-readdir-head-0"
21ab4e
- plain distribute volume : "<volname>-client-0"
21ab4e
- distribute replicate volume : "<volname>-afr-0"
21ab4e
21ab4e
The value of linkto xattr is "<volname>-readdir-head-0" when
21ab4e
parallel readdir is enabled, and is "<volname>-client-0" if
21ab4e
its disabled. But the dht_lookup takes care of healing if it
21ab4e
cannot identify which linkto subvol, the xattr points to.
21ab4e
21ab4e
In dht_lookup_cbk, if linkto xattr is found to be "<volname>-client-0"
21ab4e
and parallel readdir is enabled, then it cannot understand the
21ab4e
value "<volname>-client-0" as it expects "<volname>-readdir-head-0".
21ab4e
In that case, dht_lookup_everywhere is issued and then the linkto file
21ab4e
is unlinked and recreated with the right linkto xattr. The issue is
21ab4e
when parallel readdir is enabled, mount point accesses the file
21ab4e
that is currently being migrated. Since rebalance process doesn't
21ab4e
have parallel-readdir feature, it expects "<volname>-client-0"
21ab4e
where as mount expects "<volname>-readdir-head-0". Thus at some point
21ab4e
either the mount or rebalance will fail.
21ab4e
21ab4e
Solution:
21ab4e
Enable parallel-readdir for rebalance as well and then do not
21ab4e
allow enabling/disabling parallel-readdir if rebalance is in
21ab4e
progress.
21ab4e
21ab4e
>Reviewed-on: https://review.gluster.org/17056
21ab4e
>Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
>Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
>Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
21ab4e
>Signed-off-by: Poornima G <pgurusid@redhat.com>
21ab4e
21ab4e
Change-Id: I241ab966bdd850e667f7768840540546f5289483
21ab4e
BUG: 1442026
21ab4e
Signed-off-by: Poornima G <pgurusid@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/103637
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 libglusterfs/src/inode.c                        |  9 +++++
21ab4e
 tests/bugs/distribute/bug-1161311.t             |  1 +
21ab4e
 tests/bugs/readdir-ahead/bug-1436090.t          | 44 +++++++++++++++++++++++++
21ab4e
 xlators/mgmt/glusterd/src/glusterd-volgen.c     |  6 ++--
21ab4e
 xlators/mgmt/glusterd/src/glusterd-volume-set.c | 26 ++++++++++++++-
21ab4e
 5 files changed, 81 insertions(+), 5 deletions(-)
21ab4e
 create mode 100755 tests/bugs/readdir-ahead/bug-1436090.t
21ab4e
21ab4e
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
21ab4e
index 650a301..747c1f1 100644
21ab4e
--- a/libglusterfs/src/inode.c
21ab4e
+++ b/libglusterfs/src/inode.c
21ab4e
@@ -2528,6 +2528,15 @@ inode_ctx_size (inode_t *inode)
21ab4e
                         old_THIS = THIS;
21ab4e
                         THIS = xl;
21ab4e
 
21ab4e
+                        /* If inode ref is taken when THIS is global xlator,
21ab4e
+                         * the ctx xl_key is set, but the value is NULL.
21ab4e
+                         * For global xlator the cbks can be NULL, hence check
21ab4e
+                         * for the same */
21ab4e
+                        if (!xl->cbks) {
21ab4e
+                                THIS = old_THIS;
21ab4e
+                                continue;
21ab4e
+                        }
21ab4e
+
21ab4e
                         if (xl->cbks->ictxsize)
21ab4e
                                 size += xl->cbks->ictxsize (xl, inode);
21ab4e
 
21ab4e
diff --git a/tests/bugs/distribute/bug-1161311.t b/tests/bugs/distribute/bug-1161311.t
21ab4e
index c5a7f04..93e9d03 100755
21ab4e
--- a/tests/bugs/distribute/bug-1161311.t
21ab4e
+++ b/tests/bugs/distribute/bug-1161311.t
21ab4e
@@ -43,6 +43,7 @@ EXPECT "$V0" volinfo_field $V0 'Volume Name';
21ab4e
 EXPECT 'Created' volinfo_field $V0 'Status';
21ab4e
 EXPECT '3' brick_count $V0
21ab4e
 
21ab4e
+TEST $CLI volume set $V0 parallel-readdir on
21ab4e
 TEST $CLI volume start $V0;
21ab4e
 EXPECT 'Started' volinfo_field $V0 'Status';
21ab4e
 
21ab4e
diff --git a/tests/bugs/readdir-ahead/bug-1436090.t b/tests/bugs/readdir-ahead/bug-1436090.t
21ab4e
new file mode 100755
21ab4e
index 0000000..58e9093
21ab4e
--- /dev/null
21ab4e
+++ b/tests/bugs/readdir-ahead/bug-1436090.t
21ab4e
@@ -0,0 +1,44 @@
21ab4e
+#!/bin/bash
21ab4e
+
21ab4e
+. $(dirname $0)/../../include.rc
21ab4e
+. $(dirname $0)/../../volume.rc
21ab4e
+. $(dirname $0)/../../cluster.rc
21ab4e
+
21ab4e
+cleanup;
21ab4e
+
21ab4e
+TEST launch_cluster 2;
21ab4e
+TEST $CLI_1 peer probe $H2;
21ab4e
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
21ab4e
+
21ab4e
+$CLI_1 volume create $V0 $H1:$B1/$V0  $H2:$B2/$V0
21ab4e
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
21ab4e
+
21ab4e
+$CLI_1 volume start $V0
21ab4e
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
21ab4e
+
21ab4e
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
21ab4e
+TEST mkdir $M0/dir1
21ab4e
+
21ab4e
+# Create a large file (3.2 GB), so that rebalance takes time
21ab4e
+# Reading from /dev/urandom is slow, so we will cat it together
21ab4e
+dd if=/dev/urandom of=/tmp/FILE2 bs=64k count=10240
21ab4e
+for i in {1..5}; do
21ab4e
+  cat /tmp/FILE2 >> $M0/dir1/foo
21ab4e
+done
21ab4e
+
21ab4e
+TEST mv $M0/dir1/foo $M0/dir1/bar
21ab4e
+
21ab4e
+TEST $CLI_1 volume rebalance $V0 start force
21ab4e
+TEST ! $CLI_1 volume set $V0 parallel-readdir on
21ab4e
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
21ab4e
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 2 $V0
21ab4e
+TEST $CLI_1 volume set $V0 parallel-readdir on
21ab4e
+TEST mv $M0/dir1/bar $M0/dir1/foo
21ab4e
+
21ab4e
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
21ab4e
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
21ab4e
+TEST $CLI_1 volume rebalance $V0 start force
21ab4e
+TEST ln $M0/dir1/foo $M0/dir1/bar
21ab4e
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
21ab4e
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 2 $V0
21ab4e
+cleanup;
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
21ab4e
index 6e52d44..faa6c72 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
21ab4e
@@ -3308,8 +3308,7 @@ volgen_graph_build_readdir_ahead (volgen_graph_t *graph,
21ab4e
 {
21ab4e
         int32_t                 clusters                 = 0;
21ab4e
 
21ab4e
-        if (graph->type == GF_REBALANCED ||
21ab4e
-            graph->type == GF_QUOTAD ||
21ab4e
+        if (graph->type == GF_QUOTAD ||
21ab4e
             graph->type == GF_SNAPD ||
21ab4e
             !glusterd_volinfo_get_boolean (volinfo, VKEY_PARALLEL_READDIR) ||
21ab4e
             !glusterd_volinfo_get_boolean (volinfo, VKEY_READDIR_AHEAD))
21ab4e
@@ -3617,8 +3616,7 @@ client_graph_set_rda_options (volgen_graph_t *graph,
21ab4e
         if (dist_count <= 1)
21ab4e
                 goto out;
21ab4e
 
21ab4e
-        if (graph->type == GF_REBALANCED ||
21ab4e
-            graph->type == GF_QUOTAD ||
21ab4e
+        if (graph->type == GF_QUOTAD ||
21ab4e
             graph->type == GF_SNAPD ||
21ab4e
             !glusterd_volinfo_get_boolean (volinfo, VKEY_PARALLEL_READDIR) ||
21ab4e
             !glusterd_volinfo_get_boolean (volinfo, VKEY_READDIR_AHEAD))
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
21ab4e
index 4a1c780..8e729da 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
21ab4e
@@ -919,6 +919,30 @@ out:
21ab4e
 
21ab4e
 
21ab4e
 static int
21ab4e
+validate_parallel_readdir (glusterd_volinfo_t *volinfo, dict_t *dict,
21ab4e
+                           char *key, char *value, char **op_errstr)
21ab4e
+{
21ab4e
+        int ret             =       -1;
21ab4e
+
21ab4e
+        ret = validate_boolean (volinfo, dict, key, value, op_errstr);
21ab4e
+        if (ret)
21ab4e
+                goto out;
21ab4e
+
21ab4e
+        ret = glusterd_is_defrag_on (volinfo);
21ab4e
+        if (ret) {
21ab4e
+                gf_asprintf (op_errstr, "%s option should be set "
21ab4e
+                             "after rebalance is complete", key);
21ab4e
+                gf_msg ("glusterd", GF_LOG_ERROR, 0,
21ab4e
+                        GD_MSG_INVALID_ENTRY, "%s", *op_errstr);
21ab4e
+        }
21ab4e
+out:
21ab4e
+        gf_msg_debug ("glusterd", 0, "Returning %d", ret);
21ab4e
+
21ab4e
+        return ret;
21ab4e
+}
21ab4e
+
21ab4e
+
21ab4e
+static int
21ab4e
 validate_worm_period (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
21ab4e
                char *value, char **op_errstr)
21ab4e
 {
21ab4e
@@ -3024,7 +3048,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
21ab4e
           .value       = "off",
21ab4e
           .type        = DOC,
21ab4e
           .op_version  = GD_OP_VERSION_3_10_0,
21ab4e
-          .validate_fn = validate_boolean,
21ab4e
+          .validate_fn = validate_parallel_readdir,
21ab4e
           .description = "If this option is enabled, the readdir operation is "
21ab4e
                          "performed parallely on all the bricks, thus improving"
21ab4e
                          " the performance of readdir. Note that the performance"
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e