12a457
From ab94e24af9b61c39bf38a6f6ae02313a9b602644 Mon Sep 17 00:00:00 2001
12a457
From: Joseph Fernandes <josferna@redhat.com>
12a457
Date: Tue, 10 May 2016 21:10:08 +0530
12a457
Subject: [PATCH 156/158] tier/detach: Clear tier-fix-layout-complete xattr after migration threads join
12a457
12a457
Previously we had wrongly placed the clearing tier-fix-layout-complete
12a457
xattr before the joining of migration threads. This would lead to
12a457
situations where failure of clearing the xattr would cause the
12a457
premature death of migration threads.
12a457
12a457
Now we clear the xattr only after the data movement threads join,
12a457
ensuring that all migration is done.
12a457
12a457
Backport of http://review.gluster.org/14285
12a457
12a457
> Change-Id: I829b671efa165ae13dbff7b00707434970b37a09
12a457
> BUG: 1334839
12a457
> Signed-off-by: Joseph Fernandes <josferna@redhat.com>
12a457
> Reviewed-on: http://review.gluster.org/14285
12a457
> Smoke: Gluster Build System <jenkins@build.gluster.com>
12a457
> Tested-by: Joseph Fernandes
12a457
> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
12a457
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
12a457
> Reviewed-by: N Balachandran <nbalacha@redhat.com>
12a457
> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
12a457
> Signed-off-by: Joseph Fernandes <josferna@redhat.com>
12a457
12a457
Change-Id: I28ad9f4889b771ecfd625d0fb5127009cf44aced
12a457
BUG: 1334234
12a457
Signed-off-by: Joseph Fernandes <josferna@redhat.com>
12a457
Reviewed-on: https://code.engineering.redhat.com/gerrit/74382
12a457
---
12a457
 xlators/cluster/dht/src/dht-rebalance.c |   75 +++++++++++++++++-------------
12a457
 1 files changed, 42 insertions(+), 33 deletions(-)
12a457
12a457
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
12a457
index abdb8eb..d763237 100644
12a457
--- a/xlators/cluster/dht/src/dht-rebalance.c
12a457
+++ b/xlators/cluster/dht/src/dht-rebalance.c
12a457
@@ -3347,39 +3347,45 @@ out:
12a457
         return ret;
12a457
 }
12a457
 
12a457
-int
12a457
+void
12a457
 gf_tier_clear_fix_layout (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
12a457
 {
12a457
         int ret         = -1;
12a457
         dict_t *dict    = NULL;
12a457
 
12a457
-        /* Check if background fixlayout is completed. */
12a457
+        GF_VALIDATE_OR_GOTO ("tier", this, out);
12a457
+        GF_VALIDATE_OR_GOTO (this->name, loc, out);
12a457
+        GF_VALIDATE_OR_GOTO (this->name, defrag, out);
12a457
+
12a457
+        /* Check if background fixlayout is completed. This is not
12a457
+         * multi-process safe i.e there is a possibility that by the time
12a457
+         * we move to remove the xattr there it might have been cleared by some
12a457
+         * other detach process from other node. We ignore the error if such
12a457
+         * a thing happens */
12a457
         ret = syncop_getxattr (this, loc, &dict,
12a457
                         GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL, NULL);
12a457
         if (ret) {
12a457
                 /* Background fixlayout not complete - nothing to clear*/
12a457
-                gf_log (this->name, GF_LOG_WARNING,
12a457
+                gf_msg (this->name, GF_LOG_WARNING, -ret,
12a457
+                        DHT_MSG_LOG_TIER_STATUS,
12a457
                         "Unable to retrieve fixlayout xattr."
12a457
                         "Assume background fix layout not complete");
12a457
-                ret = 0;
12a457
                 goto out;
12a457
         }
12a457
 
12a457
         ret = syncop_removexattr (this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
12a457
                                   NULL, NULL);
12a457
         if (ret) {
12a457
-                gf_log (this->name, GF_LOG_WARNING,
12a457
+                gf_msg (this->name, GF_LOG_WARNING, -ret,
12a457
+                        DHT_MSG_LOG_TIER_STATUS,
12a457
                         "Failed removing tier fix layout "
12a457
                         "xattr from %s", loc->path);
12a457
-                defrag->total_failures++;
12a457
-                ret = -1;
12a457
                 goto out;
12a457
         }
12a457
         ret = 0;
12a457
 out:
12a457
         if (dict)
12a457
                 dict_unref (dict);
12a457
-        return ret;
12a457
 }
12a457
 
12a457
 void
12a457
@@ -3396,24 +3402,25 @@ gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
12a457
 int
12a457
 gf_defrag_start_crawl (void *data)
12a457
 {
12a457
-        xlator_t                *this           = NULL;
12a457
-        dht_conf_t              *conf           = NULL;
12a457
-        gf_defrag_info_t        *defrag         = NULL;
12a457
-        int                      ret            = -1;
12a457
-        loc_t                    loc            = {0,};
12a457
-        struct iatt              iatt           = {0,};
12a457
-        struct iatt              parent         = {0,};
12a457
-        dict_t                  *fix_layout     = NULL;
12a457
-        dict_t                  *migrate_data   = NULL;
12a457
-        dict_t                  *status         = NULL;
12a457
-        dict_t                  *dict           = NULL;
12a457
-        glusterfs_ctx_t         *ctx            = NULL;
12a457
-        dht_methods_t           *methods        = NULL;
12a457
-        int                      i              = 0;
12a457
-        int                     thread_index    = 0;
12a457
-        int                     err             = 0;
12a457
-        int                     thread_spawn_count = 0;
12a457
+        xlator_t                *this                   = NULL;
12a457
+        dht_conf_t              *conf                   = NULL;
12a457
+        gf_defrag_info_t        *defrag                 = NULL;
12a457
+        int                      ret                    = -1;
12a457
+        loc_t                    loc                    = {0,};
12a457
+        struct iatt              iatt                   = {0,};
12a457
+        struct iatt              parent                 = {0,};
12a457
+        dict_t                  *fix_layout             = NULL;
12a457
+        dict_t                  *migrate_data           = NULL;
12a457
+        dict_t                  *status                 = NULL;
12a457
+        dict_t                  *dict                   = NULL;
12a457
+        glusterfs_ctx_t         *ctx                    = NULL;
12a457
+        dht_methods_t           *methods                = NULL;
12a457
+        int                      i                      = 0;
12a457
+        int                     thread_index            = 0;
12a457
+        int                     err                     = 0;
12a457
+        int                     thread_spawn_count      = 0;
12a457
         pthread_t tid[MAX_MIGRATOR_THREAD_COUNT];
12a457
+        gf_boolean_t            is_tier_detach          = _gf_false;
12a457
 
12a457
         this = data;
12a457
         if (!this)
12a457
@@ -3618,14 +3625,9 @@ gf_defrag_start_crawl (void *data)
12a457
                         goto out;
12a457
                 }
12a457
 
12a457
-                if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
12a457
-                        /* If its was a detach remove the tier fix-layout
12a457
-                         * xattr on root */
12a457
-                         ret = gf_tier_clear_fix_layout (this, &loc, defrag);
12a457
-                         if (ret) {
12a457
-                                goto out;
12a457
-                         }
12a457
-                }
12a457
+                if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
12a457
+                        is_tier_detach = _gf_true;
12a457
+
12a457
         }
12a457
 
12a457
         gf_log ("DHT", GF_LOG_INFO, "crawling file-system completed");
12a457
@@ -3661,6 +3663,12 @@ out:
12a457
                  gf_tier_wait_fix_lookup (defrag);
12a457
         }
12a457
 
12a457
+        if (is_tier_detach && ret == 0) {
12a457
+                /* If it was a detach remove the tier fix-layout
12a457
+                * xattr on root. Ignoring the failure, as nothing has to be
12a457
+                * done, logging is done in gf_tier_clear_fix_layout */
12a457
+                gf_tier_clear_fix_layout (this, &loc, defrag);
12a457
+        }
12a457
 
12a457
         if (defrag->queue) {
12a457
                 gf_dirent_free (defrag->queue[0].df_entry);
12a457
@@ -3700,6 +3708,7 @@ exit:
12a457
 }
12a457
 
12a457
 
12a457
+
12a457
 static int
12a457
 gf_defrag_done  (int ret, call_frame_t *sync_frame, void *data)
12a457
 {
12a457
-- 
12a457
1.7.1
12a457