Blob Blame History Raw
From ab94e24af9b61c39bf38a6f6ae02313a9b602644 Mon Sep 17 00:00:00 2001
From: Joseph Fernandes <josferna@redhat.com>
Date: Tue, 10 May 2016 21:10:08 +0530
Subject: [PATCH 156/158] tier/detach: Clear tier-fix-layout-complete xattr after migration threads join

Previously we had wrongly placed the clearing tier-fix-layout-complete
xattr before the joining of migration threads. This would lead to
situations where failure of clearing the xattr would cause the
premature death of migration threads.

Now we clear the xattr only after the data movement threads join,
ensuring that all migration is done.

Backport of http://review.gluster.org/14285

> Change-Id: I829b671efa165ae13dbff7b00707434970b37a09
> BUG: 1334839
> Signed-off-by: Joseph Fernandes <josferna@redhat.com>
> Reviewed-on: http://review.gluster.org/14285
> Smoke: Gluster Build System <jenkins@build.gluster.com>
> Tested-by: Joseph Fernandes
> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> Reviewed-by: N Balachandran <nbalacha@redhat.com>
> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
> Signed-off-by: Joseph Fernandes <josferna@redhat.com>

Change-Id: I28ad9f4889b771ecfd625d0fb5127009cf44aced
BUG: 1334234
Signed-off-by: Joseph Fernandes <josferna@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/74382
---
 xlators/cluster/dht/src/dht-rebalance.c |   75 +++++++++++++++++-------------
 1 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index abdb8eb..d763237 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -3347,39 +3347,45 @@ out:
         return ret;
 }
 
-int
+void
 gf_tier_clear_fix_layout (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
 {
         int ret         = -1;
         dict_t *dict    = NULL;
 
-        /* Check if background fixlayout is completed. */
+        GF_VALIDATE_OR_GOTO ("tier", this, out);
+        GF_VALIDATE_OR_GOTO (this->name, loc, out);
+        GF_VALIDATE_OR_GOTO (this->name, defrag, out);
+
+        /* Check if background fixlayout is completed. This is not
+         * multi-process safe i.e there is a possibility that by the time
+         * we move to remove the xattr there it might have been cleared by some
+         * other detach process from other node. We ignore the error if such
+         * a thing happens */
         ret = syncop_getxattr (this, loc, &dict,
                         GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL, NULL);
         if (ret) {
                 /* Background fixlayout not complete - nothing to clear*/
-                gf_log (this->name, GF_LOG_WARNING,
+                gf_msg (this->name, GF_LOG_WARNING, -ret,
+                        DHT_MSG_LOG_TIER_STATUS,
                         "Unable to retrieve fixlayout xattr."
                         "Assume background fix layout not complete");
-                ret = 0;
                 goto out;
         }
 
         ret = syncop_removexattr (this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
                                   NULL, NULL);
         if (ret) {
-                gf_log (this->name, GF_LOG_WARNING,
+                gf_msg (this->name, GF_LOG_WARNING, -ret,
+                        DHT_MSG_LOG_TIER_STATUS,
                         "Failed removing tier fix layout "
                         "xattr from %s", loc->path);
-                defrag->total_failures++;
-                ret = -1;
                 goto out;
         }
         ret = 0;
 out:
         if (dict)
                 dict_unref (dict);
-        return ret;
 }
 
 void
@@ -3396,24 +3402,25 @@ gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
 int
 gf_defrag_start_crawl (void *data)
 {
-        xlator_t                *this           = NULL;
-        dht_conf_t              *conf           = NULL;
-        gf_defrag_info_t        *defrag         = NULL;
-        int                      ret            = -1;
-        loc_t                    loc            = {0,};
-        struct iatt              iatt           = {0,};
-        struct iatt              parent         = {0,};
-        dict_t                  *fix_layout     = NULL;
-        dict_t                  *migrate_data   = NULL;
-        dict_t                  *status         = NULL;
-        dict_t                  *dict           = NULL;
-        glusterfs_ctx_t         *ctx            = NULL;
-        dht_methods_t           *methods        = NULL;
-        int                      i              = 0;
-        int                     thread_index    = 0;
-        int                     err             = 0;
-        int                     thread_spawn_count = 0;
+        xlator_t                *this                   = NULL;
+        dht_conf_t              *conf                   = NULL;
+        gf_defrag_info_t        *defrag                 = NULL;
+        int                      ret                    = -1;
+        loc_t                    loc                    = {0,};
+        struct iatt              iatt                   = {0,};
+        struct iatt              parent                 = {0,};
+        dict_t                  *fix_layout             = NULL;
+        dict_t                  *migrate_data           = NULL;
+        dict_t                  *status                 = NULL;
+        dict_t                  *dict                   = NULL;
+        glusterfs_ctx_t         *ctx                    = NULL;
+        dht_methods_t           *methods                = NULL;
+        int                      i                      = 0;
+        int                     thread_index            = 0;
+        int                     err                     = 0;
+        int                     thread_spawn_count      = 0;
         pthread_t tid[MAX_MIGRATOR_THREAD_COUNT];
+        gf_boolean_t            is_tier_detach          = _gf_false;
 
         this = data;
         if (!this)
@@ -3618,14 +3625,9 @@ gf_defrag_start_crawl (void *data)
                         goto out;
                 }
 
-                if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
-                        /* If its was a detach remove the tier fix-layout
-                         * xattr on root */
-                         ret = gf_tier_clear_fix_layout (this, &loc, defrag);
-                         if (ret) {
-                                goto out;
-                         }
-                }
+                if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
+                        is_tier_detach = _gf_true;
+
         }
 
         gf_log ("DHT", GF_LOG_INFO, "crawling file-system completed");
@@ -3661,6 +3663,12 @@ out:
                  gf_tier_wait_fix_lookup (defrag);
         }
 
+        if (is_tier_detach && ret == 0) {
+                /* If it was a detach remove the tier fix-layout
+                * xattr on root. Ignoring the failure, as nothing has to be
+                * done, logging is done in gf_tier_clear_fix_layout */
+                gf_tier_clear_fix_layout (this, &loc, defrag);
+        }
 
         if (defrag->queue) {
                 gf_dirent_free (defrag->queue[0].df_entry);
@@ -3700,6 +3708,7 @@ exit:
 }
 
 
+
 static int
 gf_defrag_done  (int ret, call_frame_t *sync_frame, void *data)
 {
-- 
1.7.1