From 68d405c9cc702965217ad781ff33bbf4736ebbbb Mon Sep 17 00:00:00 2001
From: Nithya Balachandran <nbalacha@redhat.com>
Date: Fri, 5 Jun 2015 15:28:19 +0530
Subject: [PATCH 10/18] dht/rebalance : Fixed rebalance failure
The rebalance process determines the local subvols for the
node it is running on and only acts on files in those subvols.
If a dist-rep or dist-disperse volume is created on 2 nodes by
dividing the bricks equally across the nodes, one process might
determine it has no local_subvols.
When trying to update the commit hash, the function attempts to
lock all local subvols. On the node with no local_subvols the dht
inode lock operation fails, in turn causing the rebalance to fail.
In a dist-rep volume with 2 nodes, if brick 0 of each replica
set is on node1 and brick 1 is on node2, node2 will find that it has
no local subvols.
Change-Id: I7d73b5b4bf1c822eae6df2e6f79bd6a1606f4d1c
BUG: 1227262
Signed-off-by: Nithya Balachandran <nbalacha@redhat.com>
Reviewed-on: http://review.gluster.org/10786
Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
Reviewed-by: Susant Palai <spalai@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/50105
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
Tested-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
---
xlators/cluster/dht/src/dht-common.c | 5 +++--
xlators/cluster/dht/src/dht-rebalance.c | 15 ++++++++++++++-
2 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index ca68e60..e2749c9 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -3535,8 +3535,6 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);
if (tmp) {
- gf_log (this->name, GF_LOG_INFO,
- "fixing the layout of %s", loc->path);
ret = dict_get_uint32(xattr, "new-commit-hash", &new_hash);
if (ret == 0) {
@@ -3554,6 +3552,9 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
return ret;
}
+ gf_log (this->name, GF_LOG_INFO,
+ "fixing the layout of %s", loc->path);
+
ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
layout);
if (ret) {
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 6ba9dd1..3ab73d4 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2341,7 +2341,7 @@ gf_defrag_settle_hash (xlator_t *this, gf_defrag_info_t *defrag,
loc_t *loc, dict_t *fix_layout)
{
int ret;
-
+ dht_conf_t *conf = NULL;
/*
* Now we're ready to update the directory commit hash for the volume
* root, so that hash miscompares and broadcast lookups can stop.
@@ -2355,6 +2355,19 @@ gf_defrag_settle_hash (xlator_t *this, gf_defrag_info_t *defrag,
return 0;
}
+ conf = this->private;
+ if (!conf) {
+ /*Uh oh
+ */
+ return -1;
+ }
+
+ if (conf->local_subvols_cnt == 0) {
+ /* Commit hash updates are only done on local subvolumes
+ */
+ return 0;
+ }
+
ret = dict_set_uint32 (fix_layout, "new-commit-hash",
defrag->new_commit_hash);
if (ret) {
--
1.9.3