|
|
d1681e |
From 8863bc3638ca8b3a2c6105d35f5a9113b5796dd5 Mon Sep 17 00:00:00 2001
|
|
|
d1681e |
From: Susant Palai <spalai@redhat.com>
|
|
|
d1681e |
Date: Mon, 21 May 2018 11:51:47 +0530
|
|
|
d1681e |
Subject: [PATCH 296/305] cluster/dht: Increase failure count for lookup
|
|
|
d1681e |
failure in remove-brick op
|
|
|
d1681e |
|
|
|
d1681e |
An entry from readdirp might get renamed just before migration leading to
|
|
|
d1681e |
lookup failures. For such lookup failure, remove-brick process does not
|
|
|
d1681e |
see any increment in failure count. Though there is a warning message
|
|
|
d1681e |
after remove-brick commit for the user to check in the decommissioned brick
|
|
|
d1681e |
for any files those are not migrated, it's better to increase the failure count
|
|
|
d1681e |
so that user can check in the decommissioned bricks for files before commit.
|
|
|
d1681e |
|
|
|
d1681e |
Note: This can result in false negative cases for rm -rf interaction with
|
|
|
d1681e |
remove-brick op, where remove-brick shows non-zero failed count, but the
|
|
|
d1681e |
entry was actually deleted by user.
|
|
|
d1681e |
|
|
|
d1681e |
upstream patch: https://review.gluster.org/#/c/20044/
|
|
|
d1681e |
> Fixes :bz#1580269
|
|
|
d1681e |
> Change-Id: Icd1047ab9edc1d5bfc231a1f417a7801c424917c
|
|
|
d1681e |
> fixes: bz#1580269
|
|
|
d1681e |
> Signed-off-by: Susant Palai <spalai@redhat.com>
|
|
|
d1681e |
|
|
|
d1681e |
Change-Id: Icd1047ab9edc1d5bfc231a1f417a7801c424917c
|
|
|
d1681e |
BUG: 1577051
|
|
|
d1681e |
(cherry-picked from upstream: https://review.gluster.org/#/c/20044/)
|
|
|
d1681e |
Signed-off-by: Susant Palai <spalai@redhat.com>
|
|
|
d1681e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/139989
|
|
|
d1681e |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
d1681e |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
d1681e |
---
|
|
|
d1681e |
xlators/cluster/dht/src/dht-rebalance.c | 34 ++++++++++++++++++++++++++++++---
|
|
|
d1681e |
1 file changed, 31 insertions(+), 3 deletions(-)
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
d1681e |
index f03931f..b9078e0 100644
|
|
|
d1681e |
--- a/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
d1681e |
+++ b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
d1681e |
@@ -2713,6 +2713,19 @@ gf_defrag_migrate_single_file (void *opaque)
|
|
|
d1681e |
DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
d1681e |
"Migrate file failed: %s lookup failed",
|
|
|
d1681e |
entry_loc.path);
|
|
|
d1681e |
+
|
|
|
d1681e |
+ /* Increase failure count only for remove-brick op, so that
|
|
|
d1681e |
+ * user is warned to check the removed-brick for any files left
|
|
|
d1681e |
+ * unmigrated
|
|
|
d1681e |
+ */
|
|
|
d1681e |
+ if (conf->decommission_subvols_cnt) {
|
|
|
d1681e |
+ LOCK (&defrag->lock);
|
|
|
d1681e |
+ {
|
|
|
d1681e |
+ defrag->total_failures += 1;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+ UNLOCK (&defrag->lock);
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+
|
|
|
d1681e |
ret = 0;
|
|
|
d1681e |
goto out;
|
|
|
d1681e |
}
|
|
|
d1681e |
@@ -3722,8 +3735,11 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
|
|
|
d1681e |
DHT_MSG_DIR_LOOKUP_FAILED,
|
|
|
d1681e |
"Dir:%s renamed or removed. Skipping",
|
|
|
d1681e |
loc->path);
|
|
|
d1681e |
- ret = 0;
|
|
|
d1681e |
- goto out;
|
|
|
d1681e |
+ if (conf->decommission_subvols_cnt) {
|
|
|
d1681e |
+ defrag->total_failures++;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+ ret = 0;
|
|
|
d1681e |
+ goto out;
|
|
|
d1681e |
} else {
|
|
|
d1681e |
gf_msg (this->name, GF_LOG_ERROR, -ret,
|
|
|
d1681e |
DHT_MSG_DIR_LOOKUP_FAILED,
|
|
|
d1681e |
@@ -3744,6 +3760,9 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
|
|
|
d1681e |
ret = syncop_opendir (this, loc, fd, NULL, NULL);
|
|
|
d1681e |
if (ret) {
|
|
|
d1681e |
if (-ret == ENOENT || -ret == ESTALE) {
|
|
|
d1681e |
+ if (conf->decommission_subvols_cnt) {
|
|
|
d1681e |
+ defrag->total_failures++;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
ret = 0;
|
|
|
d1681e |
goto out;
|
|
|
d1681e |
}
|
|
|
d1681e |
@@ -3763,6 +3782,9 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
|
|
|
d1681e |
{
|
|
|
d1681e |
if (ret < 0) {
|
|
|
d1681e |
if (-ret == ENOENT || -ret == ESTALE) {
|
|
|
d1681e |
+ if (conf->decommission_subvols_cnt) {
|
|
|
d1681e |
+ defrag->total_failures++;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
ret = 0;
|
|
|
d1681e |
goto out;
|
|
|
d1681e |
}
|
|
|
d1681e |
@@ -3868,7 +3890,10 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
|
|
|
d1681e |
DHT_MSG_DIR_LOOKUP_FAILED,
|
|
|
d1681e |
"Dir:%s renamed or removed. "
|
|
|
d1681e |
"Skipping", loc->path);
|
|
|
d1681e |
- ret = 0;
|
|
|
d1681e |
+ ret = 0;
|
|
|
d1681e |
+ if (conf->decommission_subvols_cnt) {
|
|
|
d1681e |
+ defrag->total_failures++;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
continue;
|
|
|
d1681e |
} else {
|
|
|
d1681e |
gf_msg (this->name, GF_LOG_ERROR, -ret,
|
|
|
d1681e |
@@ -3931,6 +3956,9 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
|
|
|
d1681e |
"Setxattr failed. Dir %s "
|
|
|
d1681e |
"renamed or removed",
|
|
|
d1681e |
loc->path);
|
|
|
d1681e |
+ if (conf->decommission_subvols_cnt) {
|
|
|
d1681e |
+ defrag->total_failures++;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
ret = 0;
|
|
|
d1681e |
} else {
|
|
|
d1681e |
gf_msg (this->name, GF_LOG_ERROR, -ret,
|
|
|
d1681e |
--
|
|
|
d1681e |
1.8.3.1
|
|
|
d1681e |
|