|
|
190130 |
From 759c12fc016a6399bb179aa0f930602c87d1e0f8 Mon Sep 17 00:00:00 2001
|
|
|
190130 |
From: Barak Sason Rofman <bsasonro@redhat.com>
|
|
|
190130 |
Date: Tue, 24 Nov 2020 12:56:10 +0200
|
|
|
190130 |
Subject: [PATCH 480/480] DHT/Rebalance - Ensure Rebalance reports status only
|
|
|
190130 |
once upon stopping
|
|
|
190130 |
|
|
|
190130 |
Upon issuing rebalance stop command, the status of rebalance is being
|
|
|
190130 |
logged twice to the log file, which can sometime result in an
|
|
|
190130 |
inconsistent reports (one report states status stopped, while the other
|
|
|
190130 |
may report something else).
|
|
|
190130 |
|
|
|
190130 |
This fix ensures rebalance reports it's status only once and that the
|
|
|
190130 |
correct status is being reported.
|
|
|
190130 |
|
|
|
190130 |
Upstream:
|
|
|
190130 |
> Reviewed-on: https://github.com/gluster/glusterfs/pull/1783
|
|
|
190130 |
> fixes: #1782
|
|
|
190130 |
> Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37
|
|
|
190130 |
> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com
|
|
|
190130 |
|
|
|
190130 |
BUG: 1286171
|
|
|
190130 |
Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37
|
|
|
190130 |
Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
|
|
|
190130 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/218953
|
|
|
190130 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
190130 |
Reviewed-by: Csaba Henk <chenk@redhat.com>
|
|
|
190130 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
190130 |
---
|
|
|
190130 |
tests/bugs/distribute/bug-1286171.t | 75 +++++++++++++++++++++++++++++++++
|
|
|
190130 |
xlators/cluster/dht/src/dht-common.c | 2 +-
|
|
|
190130 |
xlators/cluster/dht/src/dht-common.h | 2 +-
|
|
|
190130 |
xlators/cluster/dht/src/dht-rebalance.c | 63 ++++++++++++++-------------
|
|
|
190130 |
4 files changed, 108 insertions(+), 34 deletions(-)
|
|
|
190130 |
create mode 100644 tests/bugs/distribute/bug-1286171.t
|
|
|
190130 |
|
|
|
190130 |
diff --git a/tests/bugs/distribute/bug-1286171.t b/tests/bugs/distribute/bug-1286171.t
|
|
|
190130 |
new file mode 100644
|
|
|
190130 |
index 0000000..a2ca36f
|
|
|
190130 |
--- /dev/null
|
|
|
190130 |
+++ b/tests/bugs/distribute/bug-1286171.t
|
|
|
190130 |
@@ -0,0 +1,75 @@
|
|
|
190130 |
+#!/bin/bash
|
|
|
190130 |
+
|
|
|
190130 |
+. $(dirname $0)/../../include.rc
|
|
|
190130 |
+. $(dirname $0)/../../cluster.rc
|
|
|
190130 |
+. $(dirname $0)/../../volume.rc
|
|
|
190130 |
+
|
|
|
190130 |
+# Initialize
|
|
|
190130 |
+#------------------------------------------------------------
|
|
|
190130 |
+cleanup;
|
|
|
190130 |
+
|
|
|
190130 |
+volname=bug-1286171
|
|
|
190130 |
+
|
|
|
190130 |
+# Start glusterd
|
|
|
190130 |
+TEST glusterd;
|
|
|
190130 |
+TEST pidof glusterd;
|
|
|
190130 |
+TEST $CLI volume info;
|
|
|
190130 |
+
|
|
|
190130 |
+# Create a volume
|
|
|
190130 |
+TEST $CLI volume create $volname $H0:$B0/${volname}{1,2}
|
|
|
190130 |
+
|
|
|
190130 |
+# Verify volume creation
|
|
|
190130 |
+EXPECT "$volname" volinfo_field $volname 'Volume Name';
|
|
|
190130 |
+EXPECT 'Created' volinfo_field $volname 'Status';
|
|
|
190130 |
+
|
|
|
190130 |
+# Start volume and verify successful start
|
|
|
190130 |
+TEST $CLI volume start $volname;
|
|
|
190130 |
+EXPECT 'Started' volinfo_field $volname 'Status';
|
|
|
190130 |
+TEST glusterfs --volfile-id=$volname --volfile-server=$H0 --entry-timeout=0 $M0;
|
|
|
190130 |
+#------------------------------------------------------------
|
|
|
190130 |
+
|
|
|
190130 |
+# Create a nested dir structure and some file under MP
|
|
|
190130 |
+cd $M0;
|
|
|
190130 |
+for i in {1..5}
|
|
|
190130 |
+do
|
|
|
190130 |
+ mkdir dir$i
|
|
|
190130 |
+ cd dir$i
|
|
|
190130 |
+ for j in {1..5}
|
|
|
190130 |
+ do
|
|
|
190130 |
+ mkdir dir$i$j
|
|
|
190130 |
+ cd dir$i$j
|
|
|
190130 |
+ for k in {1..5}
|
|
|
190130 |
+ do
|
|
|
190130 |
+ mkdir dir$i$j$k
|
|
|
190130 |
+ cd dir$i$j$k
|
|
|
190130 |
+ touch {1..300}
|
|
|
190130 |
+ cd ..
|
|
|
190130 |
+ done
|
|
|
190130 |
+ touch {1..300}
|
|
|
190130 |
+ cd ..
|
|
|
190130 |
+ done
|
|
|
190130 |
+ touch {1..300}
|
|
|
190130 |
+ cd ..
|
|
|
190130 |
+done
|
|
|
190130 |
+touch {1..300}
|
|
|
190130 |
+
|
|
|
190130 |
+# Add-brick and start rebalance
|
|
|
190130 |
+TEST $CLI volume add-brick $volname $H0:$B0/${volname}4;
|
|
|
190130 |
+TEST $CLI volume rebalance $volname start;
|
|
|
190130 |
+
|
|
|
190130 |
+# Let rebalance run for a while
|
|
|
190130 |
+sleep 5
|
|
|
190130 |
+
|
|
|
190130 |
+# Stop rebalance
|
|
|
190130 |
+TEST $CLI volume rebalance $volname stop;
|
|
|
190130 |
+
|
|
|
190130 |
+# Allow rebalance to stop
|
|
|
190130 |
+sleep 5
|
|
|
190130 |
+
|
|
|
190130 |
+# Examine the logfile for errors
|
|
|
190130 |
+cd /var/log/glusterfs;
|
|
|
190130 |
+failures=`grep "failures:" ${volname}-rebalance.log | tail -1 | sed 's/.*failures: //; s/,.*//'`;
|
|
|
190130 |
+
|
|
|
190130 |
+TEST [ $failures == 0 ];
|
|
|
190130 |
+
|
|
|
190130 |
+cleanup;
|
|
|
190130 |
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
|
|
|
190130 |
index 23cc80c..4db89df 100644
|
|
|
190130 |
--- a/xlators/cluster/dht/src/dht-common.c
|
|
|
190130 |
+++ b/xlators/cluster/dht/src/dht-common.c
|
|
|
190130 |
@@ -10969,7 +10969,7 @@ dht_notify(xlator_t *this, int event, void *data, ...)
|
|
|
190130 |
if ((cmd == GF_DEFRAG_CMD_STATUS) ||
|
|
|
190130 |
(cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
|
|
|
190130 |
(cmd == GF_DEFRAG_CMD_DETACH_STATUS))
|
|
|
190130 |
- gf_defrag_status_get(conf, output);
|
|
|
190130 |
+ gf_defrag_status_get(conf, output, _gf_false);
|
|
|
190130 |
else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
|
|
|
190130 |
gf_defrag_start_detach_tier(defrag);
|
|
|
190130 |
else if (cmd == GF_DEFRAG_CMD_DETACH_START)
|
|
|
190130 |
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
|
|
|
190130 |
index 9ec5b51..92f1b89 100644
|
|
|
190130 |
--- a/xlators/cluster/dht/src/dht-common.h
|
|
|
190130 |
+++ b/xlators/cluster/dht/src/dht-common.h
|
|
|
190130 |
@@ -1252,7 +1252,7 @@ dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
190130 |
int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
|
|
|
190130 |
|
|
|
190130 |
int
|
|
|
190130 |
-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
|
|
|
190130 |
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status);
|
|
|
190130 |
|
|
|
190130 |
void
|
|
|
190130 |
gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state);
|
|
|
190130 |
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
190130 |
index d49a719..16ac16c 100644
|
|
|
190130 |
--- a/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
190130 |
+++ b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
190130 |
@@ -2720,7 +2720,6 @@ gf_defrag_migrate_single_file(void *opaque)
|
|
|
190130 |
iatt_ptr = &entry->d_stat;
|
|
|
190130 |
|
|
|
190130 |
if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
|
|
|
190130 |
- ret = -1;
|
|
|
190130 |
goto out;
|
|
|
190130 |
}
|
|
|
190130 |
|
|
|
190130 |
@@ -3833,7 +3832,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
|
|
|
190130 |
list_for_each_entry_safe(entry, tmp, &entries.list, list)
|
|
|
190130 |
{
|
|
|
190130 |
if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
|
|
|
190130 |
- ret = 1;
|
|
|
190130 |
goto out;
|
|
|
190130 |
}
|
|
|
190130 |
|
|
|
190130 |
@@ -4863,7 +4861,7 @@ out:
|
|
|
190130 |
LOCK(&defrag->lock);
|
|
|
190130 |
{
|
|
|
190130 |
status = dict_new();
|
|
|
190130 |
- gf_defrag_status_get(conf, status);
|
|
|
190130 |
+ gf_defrag_status_get(conf, status, _gf_true);
|
|
|
190130 |
if (ctx && ctx->notify)
|
|
|
190130 |
ctx->notify(GF_EN_DEFRAG_STATUS, status);
|
|
|
190130 |
if (status)
|
|
|
190130 |
@@ -4998,7 +4996,7 @@ out:
|
|
|
190130 |
}
|
|
|
190130 |
|
|
|
190130 |
int
|
|
|
190130 |
-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
|
|
|
190130 |
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status)
|
|
|
190130 |
{
|
|
|
190130 |
int ret = 0;
|
|
|
190130 |
uint64_t files = 0;
|
|
|
190130 |
@@ -5095,34 +5093,35 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
|
|
|
190130 |
gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left");
|
|
|
190130 |
|
|
|
190130 |
log:
|
|
|
190130 |
- switch (defrag->defrag_status) {
|
|
|
190130 |
- case GF_DEFRAG_STATUS_NOT_STARTED:
|
|
|
190130 |
- status = "not started";
|
|
|
190130 |
- break;
|
|
|
190130 |
- case GF_DEFRAG_STATUS_STARTED:
|
|
|
190130 |
- status = "in progress";
|
|
|
190130 |
- break;
|
|
|
190130 |
- case GF_DEFRAG_STATUS_STOPPED:
|
|
|
190130 |
- status = "stopped";
|
|
|
190130 |
- break;
|
|
|
190130 |
- case GF_DEFRAG_STATUS_COMPLETE:
|
|
|
190130 |
- status = "completed";
|
|
|
190130 |
- break;
|
|
|
190130 |
- case GF_DEFRAG_STATUS_FAILED:
|
|
|
190130 |
- status = "failed";
|
|
|
190130 |
- break;
|
|
|
190130 |
- default:
|
|
|
190130 |
- break;
|
|
|
190130 |
- }
|
|
|
190130 |
+ if (log_status) {
|
|
|
190130 |
+ switch (defrag->defrag_status) {
|
|
|
190130 |
+ case GF_DEFRAG_STATUS_NOT_STARTED:
|
|
|
190130 |
+ status = "not started";
|
|
|
190130 |
+ break;
|
|
|
190130 |
+ case GF_DEFRAG_STATUS_STARTED:
|
|
|
190130 |
+ status = "in progress";
|
|
|
190130 |
+ break;
|
|
|
190130 |
+ case GF_DEFRAG_STATUS_STOPPED:
|
|
|
190130 |
+ status = "stopped";
|
|
|
190130 |
+ break;
|
|
|
190130 |
+ case GF_DEFRAG_STATUS_COMPLETE:
|
|
|
190130 |
+ status = "completed";
|
|
|
190130 |
+ break;
|
|
|
190130 |
+ case GF_DEFRAG_STATUS_FAILED:
|
|
|
190130 |
+ status = "failed";
|
|
|
190130 |
+ break;
|
|
|
190130 |
+ default:
|
|
|
190130 |
+ break;
|
|
|
190130 |
+ }
|
|
|
190130 |
|
|
|
190130 |
- gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
|
|
|
190130 |
- "Rebalance is %s. Time taken is %.2f secs", status, elapsed);
|
|
|
190130 |
- gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
|
|
|
190130 |
- "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64
|
|
|
190130 |
- ", failures: %" PRIu64
|
|
|
190130 |
- ", skipped: "
|
|
|
190130 |
- "%" PRIu64,
|
|
|
190130 |
- files, size, lookup, failures, skipped);
|
|
|
190130 |
+ gf_msg("DHT", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
|
|
|
190130 |
+ "Rebalance is %s. Time taken is %.2f secs "
|
|
|
190130 |
+ "Files migrated: %" PRIu64 ", size: %" PRIu64
|
|
|
190130 |
+ ", lookups: %" PRIu64 ", failures: %" PRIu64
|
|
|
190130 |
+ ", skipped: "
|
|
|
190130 |
+ "%" PRIu64,
|
|
|
190130 |
+ status, elapsed, files, size, lookup, failures, skipped);
|
|
|
190130 |
+ }
|
|
|
190130 |
out:
|
|
|
190130 |
return 0;
|
|
|
190130 |
}
|
|
|
190130 |
@@ -5299,7 +5298,7 @@ gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
|
|
|
190130 |
defrag->defrag_status = status;
|
|
|
190130 |
|
|
|
190130 |
if (output)
|
|
|
190130 |
- gf_defrag_status_get(conf, output);
|
|
|
190130 |
+ gf_defrag_status_get(conf, output, _gf_false);
|
|
|
190130 |
ret = 0;
|
|
|
190130 |
out:
|
|
|
190130 |
gf_msg_debug("", 0, "Returning %d", ret);
|
|
|
190130 |
--
|
|
|
190130 |
1.8.3.1
|
|
|
190130 |
|