Blob Blame History Raw
From 68d405c9cc702965217ad781ff33bbf4736ebbbb Mon Sep 17 00:00:00 2001
From: Nithya Balachandran <nbalacha@redhat.com>
Date: Fri, 5 Jun 2015 15:28:19 +0530
Subject: [PATCH 10/18] dht/rebalance : Fixed rebalance failure

The rebalance process determines the local subvols for the
node it is running on and only acts on files in those subvols.
If a dist-rep or dist-disperse volume is created on 2 nodes by
dividing the bricks equally across the nodes, one process might
determine it has no local_subvols.

When trying to update the commit hash, the function attempts to
lock all local subvols. On the node with no local_subvols the dht
inode lock operation fails, in turn causing the rebalance to fail.

In a dist-rep volume with 2 nodes, if brick 0 of each replica
set is on node1 and brick 1 is on node2, node2 will find that it has
no local subvols.

Change-Id: I7d73b5b4bf1c822eae6df2e6f79bd6a1606f4d1c
BUG: 1227262
Signed-off-by: Nithya Balachandran <nbalacha@redhat.com>
Reviewed-on: http://review.gluster.org/10786
Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
Reviewed-by: Susant Palai <spalai@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/50105
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
Tested-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
---
 xlators/cluster/dht/src/dht-common.c    |  5 +++--
 xlators/cluster/dht/src/dht-rebalance.c | 15 ++++++++++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index ca68e60..e2749c9 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -3535,8 +3535,6 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
 
         tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);
         if (tmp) {
-                gf_log (this->name, GF_LOG_INFO,
-                        "fixing the layout of %s", loc->path);
 
                 ret = dict_get_uint32(xattr, "new-commit-hash", &new_hash);
                 if (ret == 0) {
@@ -3554,6 +3552,9 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
                         return ret;
                 }
 
+                gf_log (this->name, GF_LOG_INFO,
+                        "fixing the layout of %s", loc->path);
+
                 ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
                                                 layout);
                 if (ret) {
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 6ba9dd1..3ab73d4 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2341,7 +2341,7 @@ gf_defrag_settle_hash (xlator_t *this, gf_defrag_info_t *defrag,
                        loc_t *loc, dict_t *fix_layout)
 {
         int     ret;
-
+        dht_conf_t *conf = NULL;
         /*
          * Now we're ready to update the directory commit hash for the volume
          * root, so that hash miscompares and broadcast lookups can stop.
@@ -2355,6 +2355,19 @@ gf_defrag_settle_hash (xlator_t *this, gf_defrag_info_t *defrag,
                 return 0;
         }
 
+        conf = this->private;
+        if (!conf) {
+                /*Uh oh
+                 */
+                return -1;
+        }
+
+        if (conf->local_subvols_cnt == 0) {
+                /* Commit hash updates are only done on local subvolumes
+                 */
+                return 0;
+        }
+
         ret = dict_set_uint32 (fix_layout, "new-commit-hash",
                                defrag->new_commit_hash);
         if (ret) {
-- 
1.9.3