74096c
From 759c12fc016a6399bb179aa0f930602c87d1e0f8 Mon Sep 17 00:00:00 2001
74096c
From: Barak Sason Rofman <bsasonro@redhat.com>
74096c
Date: Tue, 24 Nov 2020 12:56:10 +0200
74096c
Subject: [PATCH 480/480] DHT/Rebalance - Ensure Rebalance reports status only
74096c
 once upon stopping
74096c
74096c
Upon issuing rebalance stop command, the status of rebalance is being
74096c
logged twice to the log file, which can sometime result in an
74096c
inconsistent reports (one report states status stopped, while the other
74096c
may report something else).
74096c
74096c
This fix ensures rebalance reports it's status only once and that the
74096c
correct status is being reported.
74096c
74096c
Upstream:
74096c
> Reviewed-on: https://github.com/gluster/glusterfs/pull/1783
74096c
> fixes: #1782
74096c
> Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37
74096c
> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com
74096c
74096c
BUG: 1286171
74096c
Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37
74096c
Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/218953
74096c
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74096c
Reviewed-by: Csaba Henk <chenk@redhat.com>
74096c
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74096c
---
74096c
 tests/bugs/distribute/bug-1286171.t     | 75 +++++++++++++++++++++++++++++++++
74096c
 xlators/cluster/dht/src/dht-common.c    |  2 +-
74096c
 xlators/cluster/dht/src/dht-common.h    |  2 +-
74096c
 xlators/cluster/dht/src/dht-rebalance.c | 63 ++++++++++++++-------------
74096c
 4 files changed, 108 insertions(+), 34 deletions(-)
74096c
 create mode 100644 tests/bugs/distribute/bug-1286171.t
74096c
74096c
diff --git a/tests/bugs/distribute/bug-1286171.t b/tests/bugs/distribute/bug-1286171.t
74096c
new file mode 100644
74096c
index 0000000..a2ca36f
74096c
--- /dev/null
74096c
+++ b/tests/bugs/distribute/bug-1286171.t
74096c
@@ -0,0 +1,75 @@
74096c
+#!/bin/bash
74096c
+
74096c
+. $(dirname $0)/../../include.rc
74096c
+. $(dirname $0)/../../cluster.rc
74096c
+. $(dirname $0)/../../volume.rc
74096c
+
74096c
+# Initialize
74096c
+#------------------------------------------------------------
74096c
+cleanup;
74096c
+
74096c
+volname=bug-1286171
74096c
+
74096c
+# Start glusterd
74096c
+TEST glusterd;
74096c
+TEST pidof glusterd;
74096c
+TEST $CLI volume info;
74096c
+
74096c
+# Create a volume
74096c
+TEST $CLI volume create $volname $H0:$B0/${volname}{1,2}
74096c
+
74096c
+# Verify volume creation
74096c
+EXPECT "$volname" volinfo_field $volname 'Volume Name';
74096c
+EXPECT 'Created' volinfo_field $volname 'Status';
74096c
+
74096c
+# Start volume and verify successful start
74096c
+TEST $CLI volume start $volname;
74096c
+EXPECT 'Started' volinfo_field $volname 'Status';
74096c
+TEST glusterfs --volfile-id=$volname --volfile-server=$H0 --entry-timeout=0 $M0;
74096c
+#------------------------------------------------------------
74096c
+
74096c
+# Create a nested dir structure and some file under MP
74096c
+cd $M0;
74096c
+for i in {1..5}
74096c
+do
74096c
+	mkdir dir$i
74096c
+	cd dir$i
74096c
+	for j in {1..5}
74096c
+	do
74096c
+		mkdir dir$i$j
74096c
+		cd dir$i$j
74096c
+		for k in {1..5}
74096c
+		do
74096c
+			mkdir dir$i$j$k
74096c
+			cd dir$i$j$k
74096c
+			touch {1..300}
74096c
+			cd ..
74096c
+		done
74096c
+		touch {1..300}
74096c
+		cd ..
74096c
+	done
74096c
+	touch {1..300}
74096c
+	cd ..
74096c
+done
74096c
+touch {1..300}
74096c
+
74096c
+# Add-brick and start rebalance
74096c
+TEST $CLI volume add-brick $volname $H0:$B0/${volname}4;
74096c
+TEST $CLI volume rebalance $volname start;
74096c
+
74096c
+# Let rebalance run for a while
74096c
+sleep 5
74096c
+
74096c
+# Stop rebalance
74096c
+TEST $CLI volume rebalance $volname stop;
74096c
+
74096c
+# Allow rebalance to stop
74096c
+sleep 5
74096c
+
74096c
+# Examine the logfile for errors
74096c
+cd /var/log/glusterfs;
74096c
+failures=`grep "failures:" ${volname}-rebalance.log | tail -1 | sed 's/.*failures: //; s/,.*//'`;
74096c
+
74096c
+TEST [ $failures == 0 ];
74096c
+
74096c
+cleanup;
74096c
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
74096c
index 23cc80c..4db89df 100644
74096c
--- a/xlators/cluster/dht/src/dht-common.c
74096c
+++ b/xlators/cluster/dht/src/dht-common.c
74096c
@@ -10969,7 +10969,7 @@ dht_notify(xlator_t *this, int event, void *data, ...)
74096c
                 if ((cmd == GF_DEFRAG_CMD_STATUS) ||
74096c
                     (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
74096c
                     (cmd == GF_DEFRAG_CMD_DETACH_STATUS))
74096c
-                    gf_defrag_status_get(conf, output);
74096c
+                	gf_defrag_status_get(conf, output, _gf_false);
74096c
                 else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
74096c
                     gf_defrag_start_detach_tier(defrag);
74096c
                 else if (cmd == GF_DEFRAG_CMD_DETACH_START)
74096c
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
74096c
index 9ec5b51..92f1b89 100644
74096c
--- a/xlators/cluster/dht/src/dht-common.h
74096c
+++ b/xlators/cluster/dht/src/dht-common.h
74096c
@@ -1252,7 +1252,7 @@ dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
74096c
                  int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
74096c
 
74096c
 int
74096c
-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
74096c
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status);
74096c
 
74096c
 void
74096c
 gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state);
74096c
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
74096c
index d49a719..16ac16c 100644
74096c
--- a/xlators/cluster/dht/src/dht-rebalance.c
74096c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
74096c
@@ -2720,7 +2720,6 @@ gf_defrag_migrate_single_file(void *opaque)
74096c
     iatt_ptr = &entry->d_stat;
74096c
 
74096c
     if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
74096c
-        ret = -1;
74096c
         goto out;
74096c
     }
74096c
 
74096c
@@ -3833,7 +3832,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
74096c
         list_for_each_entry_safe(entry, tmp, &entries.list, list)
74096c
         {
74096c
             if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
74096c
-                ret = 1;
74096c
                 goto out;
74096c
             }
74096c
 
74096c
@@ -4863,7 +4861,7 @@ out:
74096c
     LOCK(&defrag->lock);
74096c
     {
74096c
         status = dict_new();
74096c
-        gf_defrag_status_get(conf, status);
74096c
+        gf_defrag_status_get(conf, status, _gf_true);
74096c
         if (ctx && ctx->notify)
74096c
             ctx->notify(GF_EN_DEFRAG_STATUS, status);
74096c
         if (status)
74096c
@@ -4998,7 +4996,7 @@ out:
74096c
 }
74096c
 
74096c
 int
74096c
-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
74096c
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status)
74096c
 {
74096c
     int ret = 0;
74096c
     uint64_t files = 0;
74096c
@@ -5095,34 +5093,35 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
74096c
         gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left");
74096c
 
74096c
 log:
74096c
-    switch (defrag->defrag_status) {
74096c
-        case GF_DEFRAG_STATUS_NOT_STARTED:
74096c
-            status = "not started";
74096c
-            break;
74096c
-        case GF_DEFRAG_STATUS_STARTED:
74096c
-            status = "in progress";
74096c
-            break;
74096c
-        case GF_DEFRAG_STATUS_STOPPED:
74096c
-            status = "stopped";
74096c
-            break;
74096c
-        case GF_DEFRAG_STATUS_COMPLETE:
74096c
-            status = "completed";
74096c
-            break;
74096c
-        case GF_DEFRAG_STATUS_FAILED:
74096c
-            status = "failed";
74096c
-            break;
74096c
-        default:
74096c
-            break;
74096c
-    }
74096c
+    if (log_status) {
74096c
+        switch (defrag->defrag_status) {
74096c
+            case GF_DEFRAG_STATUS_NOT_STARTED:
74096c
+                status = "not started";
74096c
+                break;
74096c
+            case GF_DEFRAG_STATUS_STARTED:
74096c
+                status = "in progress";
74096c
+                break;
74096c
+            case GF_DEFRAG_STATUS_STOPPED:
74096c
+                status = "stopped";
74096c
+                break;
74096c
+            case GF_DEFRAG_STATUS_COMPLETE:
74096c
+                status = "completed";
74096c
+                break;
74096c
+            case GF_DEFRAG_STATUS_FAILED:
74096c
+                status = "failed";
74096c
+                break;
74096c
+            default:
74096c
+                break;
74096c
+        }
74096c
 
74096c
-    gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
74096c
-           "Rebalance is %s. Time taken is %.2f secs", status, elapsed);
74096c
-    gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
74096c
-           "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64
74096c
-           ", failures: %" PRIu64
74096c
-           ", skipped: "
74096c
-           "%" PRIu64,
74096c
-           files, size, lookup, failures, skipped);
74096c
+        gf_msg("DHT", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
74096c
+               "Rebalance is %s. Time taken is %.2f secs "
74096c
+               "Files migrated: %" PRIu64 ", size: %" PRIu64
74096c
+               ", lookups: %" PRIu64 ", failures: %" PRIu64
74096c
+               ", skipped: "
74096c
+               "%" PRIu64,
74096c
+               status, elapsed, files, size, lookup, failures, skipped);
74096c
+    }
74096c
 out:
74096c
     return 0;
74096c
 }
74096c
@@ -5299,7 +5298,7 @@ gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
74096c
     defrag->defrag_status = status;
74096c
 
74096c
     if (output)
74096c
-        gf_defrag_status_get(conf, output);
74096c
+        gf_defrag_status_get(conf, output, _gf_false);
74096c
     ret = 0;
74096c
 out:
74096c
     gf_msg_debug("", 0, "Returning %d", ret);
74096c
-- 
74096c
1.8.3.1
74096c