Blob Blame History Raw
From a5da8bb830e86b6dd77a06cd59d220052e80b21c Mon Sep 17 00:00:00 2001
From: Tamar Shacked <tshacked@redhat.com>
Date: Sun, 6 Jun 2021 11:57:06 +0300
Subject: [PATCH 580/584] cluster/dht: suppress file migration error for node
 not supposed to migrate file

A rebalance process does a lookup for every file in the dir it is processing
before checking if it supposed to migrate the file.
In this issue there are two rebalance processses running on a replica subvol:
R1 is migrating the FILE.
R2 is not supposed to migrate the FILE, but it does a lookup and
   finds a stale linkfile which is mostly due to a stale layout.
   Then, it tries to unlink the stale linkfile and gets EBUSY
   as the linkfile fd is open due R1 migration.
   As a result a misleading error msg about FILE migration failure
   due EBUSY is logged in R2 logfile.

Fix:
suppress the error in case it occured in a node that
is not supposed to migrate the file.

Backport of:
> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24712/
> fixes: #1371
> Change-Id: I37832b404e2b0cc40ac5caf45f14c32c891e71f3
> Signed-off-by: Tamar Shacked <tshacked@redhat.com>

BUG: 1815462
Signed-off-by: Tamar Shacked <tshacked@redhat.com>
Change-Id: I915ee8e7470d85a849b198bfa7d58d368a246aae
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245401
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 xlators/cluster/dht/src/dht-rebalance.c | 38 ++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index e07dec0..cc0f2c9 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2604,10 +2604,10 @@ out:
  * all hardlinks.
  */
 
-int
+gf_boolean_t
 gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
 {
-    int ret = 0;
+    gf_boolean_t ret = _gf_false;
     int i = local_subvol_index;
     char *str = NULL;
     uint32_t hashval = 0;
@@ -2629,12 +2629,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
     }
 
     str = uuid_utoa_r(gfid, buf);
-    ret = dht_hash_compute(this, 0, str, &hashval);
-    if (ret == 0) {
+    if (dht_hash_compute(this, 0, str, &hashval) == 0) {
         index = (hashval % entry->count);
         if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
             /* Index matches this node's nodeuuid.*/
-            ret = 1;
+            ret = _gf_true;
             goto out;
         }
 
@@ -2647,12 +2646,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
                 /* None of the bricks in the subvol are up.
                  * CHILD_DOWN will kill the process soon */
 
-                return 0;
+                return _gf_false;
             }
 
             if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
                 /* Index matches this node's nodeuuid.*/
-                ret = 1;
+                ret = _gf_true;
                 goto out;
             }
         }
@@ -2701,6 +2700,7 @@ gf_defrag_migrate_single_file(void *opaque)
     struct iatt *iatt_ptr = NULL;
     gf_boolean_t update_skippedcount = _gf_true;
     int i = 0;
+    gf_boolean_t should_i_migrate = 0;
 
     rebal_entry = (struct dht_container *)opaque;
     if (!rebal_entry) {
@@ -2754,11 +2754,29 @@ gf_defrag_migrate_single_file(void *opaque)
         goto out;
     }
 
+    should_i_migrate = gf_defrag_should_i_migrate(
+        this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
+
     gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
 
     gf_uuid_copy(entry_loc.pargfid, loc->gfid);
 
     ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
+
+    if (!should_i_migrate) {
+        /* this node isn't supposed to migrate the file. suppressing any
+         * potential error from lookup as this file is under migration by
+         * another node */
+        if (ret) {
+            gf_msg_debug(this->name, -ret,
+                         "Ignoring lookup failure: node isn't migrating %s",
+                         entry_loc.path);
+            ret = 0;
+        }
+        gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+        goto out;
+    }
+
     if (ret) {
         gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
                "Migrate file failed: %s lookup failed", entry_loc.path);
@@ -2779,12 +2797,6 @@ gf_defrag_migrate_single_file(void *opaque)
         goto out;
     }
 
-    if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
-                                    entry->d_stat.ia_gfid)) {
-        gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
-        goto out;
-    }
-
     iatt_ptr = &iatt;
 
     hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
-- 
1.8.3.1