190130
From 759c12fc016a6399bb179aa0f930602c87d1e0f8 Mon Sep 17 00:00:00 2001
190130
From: Barak Sason Rofman <bsasonro@redhat.com>
190130
Date: Tue, 24 Nov 2020 12:56:10 +0200
190130
Subject: [PATCH 480/480] DHT/Rebalance - Ensure Rebalance reports status only
190130
 once upon stopping
190130
190130
Upon issuing rebalance stop command, the status of rebalance is being
190130
logged twice to the log file, which can sometime result in an
190130
inconsistent reports (one report states status stopped, while the other
190130
may report something else).
190130
190130
This fix ensures rebalance reports it's status only once and that the
190130
correct status is being reported.
190130
190130
Upstream:
190130
> Reviewed-on: https://github.com/gluster/glusterfs/pull/1783
190130
> fixes: #1782
190130
> Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37
190130
> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com
190130
190130
BUG: 1286171
190130
Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37
190130
Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
190130
Reviewed-on: https://code.engineering.redhat.com/gerrit/218953
190130
Tested-by: RHGS Build Bot <nigelb@redhat.com>
190130
Reviewed-by: Csaba Henk <chenk@redhat.com>
190130
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
190130
---
190130
 tests/bugs/distribute/bug-1286171.t     | 75 +++++++++++++++++++++++++++++++++
190130
 xlators/cluster/dht/src/dht-common.c    |  2 +-
190130
 xlators/cluster/dht/src/dht-common.h    |  2 +-
190130
 xlators/cluster/dht/src/dht-rebalance.c | 63 ++++++++++++++-------------
190130
 4 files changed, 108 insertions(+), 34 deletions(-)
190130
 create mode 100644 tests/bugs/distribute/bug-1286171.t
190130
190130
diff --git a/tests/bugs/distribute/bug-1286171.t b/tests/bugs/distribute/bug-1286171.t
190130
new file mode 100644
190130
index 0000000..a2ca36f
190130
--- /dev/null
190130
+++ b/tests/bugs/distribute/bug-1286171.t
190130
@@ -0,0 +1,75 @@
190130
+#!/bin/bash
190130
+
190130
+. $(dirname $0)/../../include.rc
190130
+. $(dirname $0)/../../cluster.rc
190130
+. $(dirname $0)/../../volume.rc
190130
+
190130
+# Initialize
190130
+#------------------------------------------------------------
190130
+cleanup;
190130
+
190130
+volname=bug-1286171
190130
+
190130
+# Start glusterd
190130
+TEST glusterd;
190130
+TEST pidof glusterd;
190130
+TEST $CLI volume info;
190130
+
190130
+# Create a volume
190130
+TEST $CLI volume create $volname $H0:$B0/${volname}{1,2}
190130
+
190130
+# Verify volume creation
190130
+EXPECT "$volname" volinfo_field $volname 'Volume Name';
190130
+EXPECT 'Created' volinfo_field $volname 'Status';
190130
+
190130
+# Start volume and verify successful start
190130
+TEST $CLI volume start $volname;
190130
+EXPECT 'Started' volinfo_field $volname 'Status';
190130
+TEST glusterfs --volfile-id=$volname --volfile-server=$H0 --entry-timeout=0 $M0;
190130
+#------------------------------------------------------------
190130
+
190130
+# Create a nested dir structure and some file under MP
190130
+cd $M0;
190130
+for i in {1..5}
190130
+do
190130
+	mkdir dir$i
190130
+	cd dir$i
190130
+	for j in {1..5}
190130
+	do
190130
+		mkdir dir$i$j
190130
+		cd dir$i$j
190130
+		for k in {1..5}
190130
+		do
190130
+			mkdir dir$i$j$k
190130
+			cd dir$i$j$k
190130
+			touch {1..300}
190130
+			cd ..
190130
+		done
190130
+		touch {1..300}
190130
+		cd ..
190130
+	done
190130
+	touch {1..300}
190130
+	cd ..
190130
+done
190130
+touch {1..300}
190130
+
190130
+# Add-brick and start rebalance
190130
+TEST $CLI volume add-brick $volname $H0:$B0/${volname}4;
190130
+TEST $CLI volume rebalance $volname start;
190130
+
190130
+# Let rebalance run for a while
190130
+sleep 5
190130
+
190130
+# Stop rebalance
190130
+TEST $CLI volume rebalance $volname stop;
190130
+
190130
+# Allow rebalance to stop
190130
+sleep 5
190130
+
190130
+# Examine the logfile for errors
190130
+cd /var/log/glusterfs;
190130
+failures=`grep "failures:" ${volname}-rebalance.log | tail -1 | sed 's/.*failures: //; s/,.*//'`;
190130
+
190130
+TEST [ $failures == 0 ];
190130
+
190130
+cleanup;
190130
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
190130
index 23cc80c..4db89df 100644
190130
--- a/xlators/cluster/dht/src/dht-common.c
190130
+++ b/xlators/cluster/dht/src/dht-common.c
190130
@@ -10969,7 +10969,7 @@ dht_notify(xlator_t *this, int event, void *data, ...)
190130
                 if ((cmd == GF_DEFRAG_CMD_STATUS) ||
190130
                     (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
190130
                     (cmd == GF_DEFRAG_CMD_DETACH_STATUS))
190130
-                    gf_defrag_status_get(conf, output);
190130
+                	gf_defrag_status_get(conf, output, _gf_false);
190130
                 else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
190130
                     gf_defrag_start_detach_tier(defrag);
190130
                 else if (cmd == GF_DEFRAG_CMD_DETACH_START)
190130
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
190130
index 9ec5b51..92f1b89 100644
190130
--- a/xlators/cluster/dht/src/dht-common.h
190130
+++ b/xlators/cluster/dht/src/dht-common.h
190130
@@ -1252,7 +1252,7 @@ dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
190130
                  int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
190130
 
190130
 int
190130
-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
190130
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status);
190130
 
190130
 void
190130
 gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state);
190130
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
190130
index d49a719..16ac16c 100644
190130
--- a/xlators/cluster/dht/src/dht-rebalance.c
190130
+++ b/xlators/cluster/dht/src/dht-rebalance.c
190130
@@ -2720,7 +2720,6 @@ gf_defrag_migrate_single_file(void *opaque)
190130
     iatt_ptr = &entry->d_stat;
190130
 
190130
     if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
190130
-        ret = -1;
190130
         goto out;
190130
     }
190130
 
190130
@@ -3833,7 +3832,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
190130
         list_for_each_entry_safe(entry, tmp, &entries.list, list)
190130
         {
190130
             if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
190130
-                ret = 1;
190130
                 goto out;
190130
             }
190130
 
190130
@@ -4863,7 +4861,7 @@ out:
190130
     LOCK(&defrag->lock);
190130
     {
190130
         status = dict_new();
190130
-        gf_defrag_status_get(conf, status);
190130
+        gf_defrag_status_get(conf, status, _gf_true);
190130
         if (ctx && ctx->notify)
190130
             ctx->notify(GF_EN_DEFRAG_STATUS, status);
190130
         if (status)
190130
@@ -4998,7 +4996,7 @@ out:
190130
 }
190130
 
190130
 int
190130
-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
190130
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status)
190130
 {
190130
     int ret = 0;
190130
     uint64_t files = 0;
190130
@@ -5095,34 +5093,35 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
190130
         gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left");
190130
 
190130
 log:
190130
-    switch (defrag->defrag_status) {
190130
-        case GF_DEFRAG_STATUS_NOT_STARTED:
190130
-            status = "not started";
190130
-            break;
190130
-        case GF_DEFRAG_STATUS_STARTED:
190130
-            status = "in progress";
190130
-            break;
190130
-        case GF_DEFRAG_STATUS_STOPPED:
190130
-            status = "stopped";
190130
-            break;
190130
-        case GF_DEFRAG_STATUS_COMPLETE:
190130
-            status = "completed";
190130
-            break;
190130
-        case GF_DEFRAG_STATUS_FAILED:
190130
-            status = "failed";
190130
-            break;
190130
-        default:
190130
-            break;
190130
-    }
190130
+    if (log_status) {
190130
+        switch (defrag->defrag_status) {
190130
+            case GF_DEFRAG_STATUS_NOT_STARTED:
190130
+                status = "not started";
190130
+                break;
190130
+            case GF_DEFRAG_STATUS_STARTED:
190130
+                status = "in progress";
190130
+                break;
190130
+            case GF_DEFRAG_STATUS_STOPPED:
190130
+                status = "stopped";
190130
+                break;
190130
+            case GF_DEFRAG_STATUS_COMPLETE:
190130
+                status = "completed";
190130
+                break;
190130
+            case GF_DEFRAG_STATUS_FAILED:
190130
+                status = "failed";
190130
+                break;
190130
+            default:
190130
+                break;
190130
+        }
190130
 
190130
-    gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
190130
-           "Rebalance is %s. Time taken is %.2f secs", status, elapsed);
190130
-    gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
190130
-           "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64
190130
-           ", failures: %" PRIu64
190130
-           ", skipped: "
190130
-           "%" PRIu64,
190130
-           files, size, lookup, failures, skipped);
190130
+        gf_msg("DHT", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
190130
+               "Rebalance is %s. Time taken is %.2f secs "
190130
+               "Files migrated: %" PRIu64 ", size: %" PRIu64
190130
+               ", lookups: %" PRIu64 ", failures: %" PRIu64
190130
+               ", skipped: "
190130
+               "%" PRIu64,
190130
+               status, elapsed, files, size, lookup, failures, skipped);
190130
+    }
190130
 out:
190130
     return 0;
190130
 }
190130
@@ -5299,7 +5298,7 @@ gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
190130
     defrag->defrag_status = status;
190130
 
190130
     if (output)
190130
-        gf_defrag_status_get(conf, output);
190130
+        gf_defrag_status_get(conf, output, _gf_false);
190130
     ret = 0;
190130
 out:
190130
     gf_msg_debug("", 0, "Returning %d", ret);
190130
-- 
190130
1.8.3.1
190130