17b94a
From 759c12fc016a6399bb179aa0f930602c87d1e0f8 Mon Sep 17 00:00:00 2001
17b94a
From: Barak Sason Rofman <bsasonro@redhat.com>
17b94a
Date: Tue, 24 Nov 2020 12:56:10 +0200
17b94a
Subject: [PATCH 480/480] DHT/Rebalance - Ensure Rebalance reports status only
17b94a
 once upon stopping
17b94a
17b94a
Upon issuing rebalance stop command, the status of rebalance is being
17b94a
logged twice to the log file, which can sometime result in an
17b94a
inconsistent reports (one report states status stopped, while the other
17b94a
may report something else).
17b94a
17b94a
This fix ensures rebalance reports it's status only once and that the
17b94a
correct status is being reported.
17b94a
17b94a
Upstream:
17b94a
> Reviewed-on: https://github.com/gluster/glusterfs/pull/1783
17b94a
> fixes: #1782
17b94a
> Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37
17b94a
> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com
17b94a
17b94a
BUG: 1286171
17b94a
Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37
17b94a
Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/218953
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Csaba Henk <chenk@redhat.com>
17b94a
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
17b94a
---
17b94a
 tests/bugs/distribute/bug-1286171.t     | 75 +++++++++++++++++++++++++++++++++
17b94a
 xlators/cluster/dht/src/dht-common.c    |  2 +-
17b94a
 xlators/cluster/dht/src/dht-common.h    |  2 +-
17b94a
 xlators/cluster/dht/src/dht-rebalance.c | 63 ++++++++++++++-------------
17b94a
 4 files changed, 108 insertions(+), 34 deletions(-)
17b94a
 create mode 100644 tests/bugs/distribute/bug-1286171.t
17b94a
17b94a
diff --git a/tests/bugs/distribute/bug-1286171.t b/tests/bugs/distribute/bug-1286171.t
17b94a
new file mode 100644
17b94a
index 0000000..a2ca36f
17b94a
--- /dev/null
17b94a
+++ b/tests/bugs/distribute/bug-1286171.t
17b94a
@@ -0,0 +1,75 @@
17b94a
+#!/bin/bash
17b94a
+
17b94a
+. $(dirname $0)/../../include.rc
17b94a
+. $(dirname $0)/../../cluster.rc
17b94a
+. $(dirname $0)/../../volume.rc
17b94a
+
17b94a
+# Initialize
17b94a
+#------------------------------------------------------------
17b94a
+cleanup;
17b94a
+
17b94a
+volname=bug-1286171
17b94a
+
17b94a
+# Start glusterd
17b94a
+TEST glusterd;
17b94a
+TEST pidof glusterd;
17b94a
+TEST $CLI volume info;
17b94a
+
17b94a
+# Create a volume
17b94a
+TEST $CLI volume create $volname $H0:$B0/${volname}{1,2}
17b94a
+
17b94a
+# Verify volume creation
17b94a
+EXPECT "$volname" volinfo_field $volname 'Volume Name';
17b94a
+EXPECT 'Created' volinfo_field $volname 'Status';
17b94a
+
17b94a
+# Start volume and verify successful start
17b94a
+TEST $CLI volume start $volname;
17b94a
+EXPECT 'Started' volinfo_field $volname 'Status';
17b94a
+TEST glusterfs --volfile-id=$volname --volfile-server=$H0 --entry-timeout=0 $M0;
17b94a
+#------------------------------------------------------------
17b94a
+
17b94a
+# Create a nested dir structure and some file under MP
17b94a
+cd $M0;
17b94a
+for i in {1..5}
17b94a
+do
17b94a
+	mkdir dir$i
17b94a
+	cd dir$i
17b94a
+	for j in {1..5}
17b94a
+	do
17b94a
+		mkdir dir$i$j
17b94a
+		cd dir$i$j
17b94a
+		for k in {1..5}
17b94a
+		do
17b94a
+			mkdir dir$i$j$k
17b94a
+			cd dir$i$j$k
17b94a
+			touch {1..300}
17b94a
+			cd ..
17b94a
+		done
17b94a
+		touch {1..300}
17b94a
+		cd ..
17b94a
+	done
17b94a
+	touch {1..300}
17b94a
+	cd ..
17b94a
+done
17b94a
+touch {1..300}
17b94a
+
17b94a
+# Add-brick and start rebalance
17b94a
+TEST $CLI volume add-brick $volname $H0:$B0/${volname}4;
17b94a
+TEST $CLI volume rebalance $volname start;
17b94a
+
17b94a
+# Let rebalance run for a while
17b94a
+sleep 5
17b94a
+
17b94a
+# Stop rebalance
17b94a
+TEST $CLI volume rebalance $volname stop;
17b94a
+
17b94a
+# Allow rebalance to stop
17b94a
+sleep 5
17b94a
+
17b94a
+# Examine the logfile for errors
17b94a
+cd /var/log/glusterfs;
17b94a
+failures=`grep "failures:" ${volname}-rebalance.log | tail -1 | sed 's/.*failures: //; s/,.*//'`;
17b94a
+
17b94a
+TEST [ $failures == 0 ];
17b94a
+
17b94a
+cleanup;
17b94a
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
17b94a
index 23cc80c..4db89df 100644
17b94a
--- a/xlators/cluster/dht/src/dht-common.c
17b94a
+++ b/xlators/cluster/dht/src/dht-common.c
17b94a
@@ -10969,7 +10969,7 @@ dht_notify(xlator_t *this, int event, void *data, ...)
17b94a
                 if ((cmd == GF_DEFRAG_CMD_STATUS) ||
17b94a
                     (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
17b94a
                     (cmd == GF_DEFRAG_CMD_DETACH_STATUS))
17b94a
-                    gf_defrag_status_get(conf, output);
17b94a
+                	gf_defrag_status_get(conf, output, _gf_false);
17b94a
                 else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
17b94a
                     gf_defrag_start_detach_tier(defrag);
17b94a
                 else if (cmd == GF_DEFRAG_CMD_DETACH_START)
17b94a
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
17b94a
index 9ec5b51..92f1b89 100644
17b94a
--- a/xlators/cluster/dht/src/dht-common.h
17b94a
+++ b/xlators/cluster/dht/src/dht-common.h
17b94a
@@ -1252,7 +1252,7 @@ dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
17b94a
                  int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
17b94a
 
17b94a
 int
17b94a
-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
17b94a
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status);
17b94a
 
17b94a
 void
17b94a
 gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state);
17b94a
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
17b94a
index d49a719..16ac16c 100644
17b94a
--- a/xlators/cluster/dht/src/dht-rebalance.c
17b94a
+++ b/xlators/cluster/dht/src/dht-rebalance.c
17b94a
@@ -2720,7 +2720,6 @@ gf_defrag_migrate_single_file(void *opaque)
17b94a
     iatt_ptr = &entry->d_stat;
17b94a
 
17b94a
     if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
17b94a
-        ret = -1;
17b94a
         goto out;
17b94a
     }
17b94a
 
17b94a
@@ -3833,7 +3832,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
17b94a
         list_for_each_entry_safe(entry, tmp, &entries.list, list)
17b94a
         {
17b94a
             if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
17b94a
-                ret = 1;
17b94a
                 goto out;
17b94a
             }
17b94a
 
17b94a
@@ -4863,7 +4861,7 @@ out:
17b94a
     LOCK(&defrag->lock);
17b94a
     {
17b94a
         status = dict_new();
17b94a
-        gf_defrag_status_get(conf, status);
17b94a
+        gf_defrag_status_get(conf, status, _gf_true);
17b94a
         if (ctx && ctx->notify)
17b94a
             ctx->notify(GF_EN_DEFRAG_STATUS, status);
17b94a
         if (status)
17b94a
@@ -4998,7 +4996,7 @@ out:
17b94a
 }
17b94a
 
17b94a
 int
17b94a
-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
17b94a
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status)
17b94a
 {
17b94a
     int ret = 0;
17b94a
     uint64_t files = 0;
17b94a
@@ -5095,34 +5093,35 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
17b94a
         gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left");
17b94a
 
17b94a
 log:
17b94a
-    switch (defrag->defrag_status) {
17b94a
-        case GF_DEFRAG_STATUS_NOT_STARTED:
17b94a
-            status = "not started";
17b94a
-            break;
17b94a
-        case GF_DEFRAG_STATUS_STARTED:
17b94a
-            status = "in progress";
17b94a
-            break;
17b94a
-        case GF_DEFRAG_STATUS_STOPPED:
17b94a
-            status = "stopped";
17b94a
-            break;
17b94a
-        case GF_DEFRAG_STATUS_COMPLETE:
17b94a
-            status = "completed";
17b94a
-            break;
17b94a
-        case GF_DEFRAG_STATUS_FAILED:
17b94a
-            status = "failed";
17b94a
-            break;
17b94a
-        default:
17b94a
-            break;
17b94a
-    }
17b94a
+    if (log_status) {
17b94a
+        switch (defrag->defrag_status) {
17b94a
+            case GF_DEFRAG_STATUS_NOT_STARTED:
17b94a
+                status = "not started";
17b94a
+                break;
17b94a
+            case GF_DEFRAG_STATUS_STARTED:
17b94a
+                status = "in progress";
17b94a
+                break;
17b94a
+            case GF_DEFRAG_STATUS_STOPPED:
17b94a
+                status = "stopped";
17b94a
+                break;
17b94a
+            case GF_DEFRAG_STATUS_COMPLETE:
17b94a
+                status = "completed";
17b94a
+                break;
17b94a
+            case GF_DEFRAG_STATUS_FAILED:
17b94a
+                status = "failed";
17b94a
+                break;
17b94a
+            default:
17b94a
+                break;
17b94a
+        }
17b94a
 
17b94a
-    gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
17b94a
-           "Rebalance is %s. Time taken is %.2f secs", status, elapsed);
17b94a
-    gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
17b94a
-           "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64
17b94a
-           ", failures: %" PRIu64
17b94a
-           ", skipped: "
17b94a
-           "%" PRIu64,
17b94a
-           files, size, lookup, failures, skipped);
17b94a
+        gf_msg("DHT", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
17b94a
+               "Rebalance is %s. Time taken is %.2f secs "
17b94a
+               "Files migrated: %" PRIu64 ", size: %" PRIu64
17b94a
+               ", lookups: %" PRIu64 ", failures: %" PRIu64
17b94a
+               ", skipped: "
17b94a
+               "%" PRIu64,
17b94a
+               status, elapsed, files, size, lookup, failures, skipped);
17b94a
+    }
17b94a
 out:
17b94a
     return 0;
17b94a
 }
17b94a
@@ -5299,7 +5298,7 @@ gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
17b94a
     defrag->defrag_status = status;
17b94a
 
17b94a
     if (output)
17b94a
-        gf_defrag_status_get(conf, output);
17b94a
+        gf_defrag_status_get(conf, output, _gf_false);
17b94a
     ret = 0;
17b94a
 out:
17b94a
     gf_msg_debug("", 0, "Returning %d", ret);
17b94a
-- 
17b94a
1.8.3.1
17b94a