d1681e
From 8863bc3638ca8b3a2c6105d35f5a9113b5796dd5 Mon Sep 17 00:00:00 2001
d1681e
From: Susant Palai <spalai@redhat.com>
d1681e
Date: Mon, 21 May 2018 11:51:47 +0530
d1681e
Subject: [PATCH 296/305] cluster/dht: Increase failure count for lookup
d1681e
 failure in remove-brick op
d1681e
d1681e
An entry from readdirp might get renamed just before migration leading to
d1681e
lookup failures. For such lookup failure, remove-brick process does not
d1681e
see any increment in failure count. Though there is a warning message
d1681e
after remove-brick commit for the user to check in the decommissioned brick
d1681e
for any files those are not migrated, it's better to increase the failure count
d1681e
so that user can check in the decommissioned bricks for files before commit.
d1681e
d1681e
Note: This can result in false negative cases for rm -rf interaction with
d1681e
remove-brick op, where remove-brick shows non-zero failed count, but the
d1681e
entry was actually deleted by user.
d1681e
d1681e
upstream patch: https://review.gluster.org/#/c/20044/
d1681e
> Fixes :bz#1580269
d1681e
> Change-Id: Icd1047ab9edc1d5bfc231a1f417a7801c424917c
d1681e
> fixes: bz#1580269
d1681e
> Signed-off-by: Susant Palai <spalai@redhat.com>
d1681e
d1681e
Change-Id: Icd1047ab9edc1d5bfc231a1f417a7801c424917c
d1681e
BUG: 1577051
d1681e
(cherry-picked from upstream: https://review.gluster.org/#/c/20044/)
d1681e
Signed-off-by: Susant Palai <spalai@redhat.com>
d1681e
Reviewed-on: https://code.engineering.redhat.com/gerrit/139989
d1681e
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d1681e
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
d1681e
---
d1681e
 xlators/cluster/dht/src/dht-rebalance.c | 34 ++++++++++++++++++++++++++++++---
d1681e
 1 file changed, 31 insertions(+), 3 deletions(-)
d1681e
d1681e
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
d1681e
index f03931f..b9078e0 100644
d1681e
--- a/xlators/cluster/dht/src/dht-rebalance.c
d1681e
+++ b/xlators/cluster/dht/src/dht-rebalance.c
d1681e
@@ -2713,6 +2713,19 @@ gf_defrag_migrate_single_file (void *opaque)
d1681e
                         DHT_MSG_MIGRATE_FILE_FAILED,
d1681e
                         "Migrate file failed: %s lookup failed",
d1681e
                         entry_loc.path);
d1681e
+
d1681e
+                /* Increase failure count only for remove-brick op, so that
d1681e
+                 * user is warned to check the removed-brick for any files left
d1681e
+                 * unmigrated
d1681e
+                 */
d1681e
+                if (conf->decommission_subvols_cnt) {
d1681e
+                        LOCK (&defrag->lock);
d1681e
+                        {
d1681e
+                                defrag->total_failures += 1;
d1681e
+                        }
d1681e
+                        UNLOCK (&defrag->lock);
d1681e
+                }
d1681e
+
d1681e
                 ret = 0;
d1681e
                 goto out;
d1681e
         }
d1681e
@@ -3722,8 +3735,11 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
d1681e
                                 DHT_MSG_DIR_LOOKUP_FAILED,
d1681e
                                 "Dir:%s renamed or removed. Skipping",
d1681e
                                 loc->path);
d1681e
-                                ret = 0;
d1681e
-                                goto out;
d1681e
+                        if (conf->decommission_subvols_cnt) {
d1681e
+                                defrag->total_failures++;
d1681e
+                        }
d1681e
+                        ret = 0;
d1681e
+                        goto out;
d1681e
                 } else {
d1681e
                         gf_msg (this->name, GF_LOG_ERROR, -ret,
d1681e
                                 DHT_MSG_DIR_LOOKUP_FAILED,
d1681e
@@ -3744,6 +3760,9 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
d1681e
         ret = syncop_opendir (this, loc, fd, NULL, NULL);
d1681e
         if (ret) {
d1681e
                 if (-ret == ENOENT || -ret == ESTALE) {
d1681e
+                        if (conf->decommission_subvols_cnt) {
d1681e
+                                defrag->total_failures++;
d1681e
+                        }
d1681e
                         ret = 0;
d1681e
                         goto out;
d1681e
                 }
d1681e
@@ -3763,6 +3782,9 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
d1681e
         {
d1681e
                 if (ret < 0) {
d1681e
                         if (-ret == ENOENT || -ret == ESTALE) {
d1681e
+                                if (conf->decommission_subvols_cnt) {
d1681e
+                                        defrag->total_failures++;
d1681e
+                                }
d1681e
                                 ret = 0;
d1681e
                                 goto out;
d1681e
                         }
d1681e
@@ -3868,7 +3890,10 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
d1681e
                                                 DHT_MSG_DIR_LOOKUP_FAILED,
d1681e
                                                 "Dir:%s renamed or removed. "
d1681e
                                                 "Skipping", loc->path);
d1681e
-                                                ret = 0;
d1681e
+                                        ret = 0;
d1681e
+                                        if (conf->decommission_subvols_cnt) {
d1681e
+                                                defrag->total_failures++;
d1681e
+                                        }
d1681e
                                         continue;
d1681e
                                 } else {
d1681e
                                         gf_msg (this->name, GF_LOG_ERROR, -ret,
d1681e
@@ -3931,6 +3956,9 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
d1681e
                                 "Setxattr failed. Dir %s "
d1681e
                                 "renamed or removed",
d1681e
                                 loc->path);
d1681e
+                        if (conf->decommission_subvols_cnt) {
d1681e
+                                defrag->total_failures++;
d1681e
+                        }
d1681e
                         ret = 0;
d1681e
                 } else {
d1681e
                         gf_msg (this->name, GF_LOG_ERROR, -ret,
d1681e
-- 
d1681e
1.8.3.1
d1681e