12a457
From 293766aa6d4f4d6a329f5ebc131fdc85f1ed4bf4 Mon Sep 17 00:00:00 2001
12a457
From: Joseph Fernandes <josferna@redhat.com>
12a457
Date: Tue, 23 Feb 2016 12:51:45 +0530
12a457
Subject: [PATCH 46/80] tier/dht : Attach tier fix layout to run in background
12a457
12a457
1. Spawn a thread for background fix-layout for tier process.
12a457
12a457
2. Once the fix-layout is completed a marker xttr is set on the root of
12a457
   volume to mark the completion of the background fixlayout, so that
12a457
   even if the tier process is spawned again, fixlayout will not be
12a457
   issued, if it was completed last time.
12a457
12a457
3. Please note that promotion of legacy files will happen eventually as
12a457
   the ctr lookup heal in the fixlayout slowly heals the ctr db for legacy
12a457
   files OR the ctr lookup heal happend due to a name lookup.
12a457
12a457
4. When a detach tier is successful in evacuation data from hot tier, we remove
12a457
   the marker xattr is removed. So that next attach tier runs the background
12a457
   tier fixlayout.
12a457
12a457
what is remaining ?
12a457
1. Instead of clearing the marker xattr of tiering fix layout at the end of detach start
12a457
   clear it during detach commit. But the issue is detach commit is a glusterd operation
12a457
   and the volume is not mounted in glusterd.
12a457
   The reason we want to do it in detach commit is that if the admin wants to attach the
12a457
   same tier again, then a background fixlayout will be triggered, which would not be needed.
12a457
2. Clearing the CTR DB of the cold bricks when there is a detach commit, as it will be having
12a457
   entries which will be stale when the volume is used, with ctr off (ctr is switched off only when
12a457
   we have detach commit.)
12a457
12a457
Backport of http://review.gluster.org/13491
12a457
12a457
> Change-Id: Ibe343572e95865325cd0eef4d0b976b626a3c0c5
12a457
> BUG: 1313228
12a457
> Signed-off-by: Joseph Fernandes <josferna@redhat.com>
12a457
> Reviewed-on: http://review.gluster.org/13491
12a457
> Smoke: Gluster Build System <jenkins@build.gluster.com>
12a457
> Tested-by: Joseph Fernandes
12a457
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
12a457
> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
12a457
> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
12a457
> Signed-off-by: Joseph Fernandes <josferna@redhat.com>
12a457
12a457
> Change-Id: Ic28affdf78d2ac0f394f3dd59f0126df7915d609
12a457
> BUG: 1323016
12a457
> Reviewed-on: http://review.gluster.org/13879
12a457
> Smoke: Gluster Build System <jenkins@build.gluster.com>
12a457
> Reviewed-by: Joseph Fernandes
12a457
> Tested-by: Joseph Fernandes
12a457
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
12a457
> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
12a457
> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
12a457
Signed-off-by: Joseph Fernandes <josferna@redhat.com>
12a457
12a457
Change-Id: I3bccb27a160f988f3721542e5ca07939a369e457
12a457
BUG: 1294790
12a457
Reviewed-on: https://code.engineering.redhat.com/gerrit/71484
12a457
Reviewed-by: Joseph Fernandes <josferna@redhat.com>
12a457
Tested-by: Joseph Fernandes <josferna@redhat.com>
12a457
---
12a457
 tests/basic/tier/legacy-many.t          |   13 ++
12a457
 tests/basic/tier/tier.t                 |    5 -
12a457
 tests/tier.rc                           |    6 +
12a457
 xlators/cluster/dht/src/dht-common.h    |   28 +++-
12a457
 xlators/cluster/dht/src/dht-rebalance.c |  244 +++++++++++++++++++++++++++++--
12a457
 5 files changed, 266 insertions(+), 30 deletions(-)
12a457
12a457
diff --git a/tests/basic/tier/legacy-many.t b/tests/basic/tier/legacy-many.t
12a457
index 0f32ff6..e419bec 100644
12a457
--- a/tests/basic/tier/legacy-many.t
12a457
+++ b/tests/basic/tier/legacy-many.t
12a457
@@ -58,6 +58,11 @@ TEST $CLI volume set $V0 cluster.write-freq-threshold 0
12a457
 # wait a little for lookup heal to finish
12a457
 sleep 10
12a457
 
12a457
+# make sure fix layout completed
12a457
+CPATH=$B0/${V0}0
12a457
+echo $CPATH > /tmp/out
12a457
+TEST getfattr -n "trusted.tier.fix.layout.complete" $CPATH
12a457
+
12a457
 # Read "legacy" files
12a457
 drop_cache $M0
12a457
 
12a457
@@ -69,5 +74,13 @@ TEST read_all
12a457
 sleep $PROMOTE_TIMEOUT
12a457
 EXPECT_WITHIN $PROMOTE_TIMEOUT "0" check_counters $NUM_FILES 0
12a457
 
12a457
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" detach_start $V0
12a457
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}${CACHE_BRICK_FIRST}"
12a457
+
12a457
+TEST $CLI volume tier $V0 detach commit
12a457
+
12a457
+# fix layout flag should be cleared
12a457
+TEST ! getfattr -n "trusted.tier.fix.layout.complete" $CPATH
12a457
+
12a457
 cd;
12a457
 cleanup
12a457
diff --git a/tests/basic/tier/tier.t b/tests/basic/tier/tier.t
12a457
index cfd8e77..4b36930 100755
12a457
--- a/tests/basic/tier/tier.t
12a457
+++ b/tests/basic/tier/tier.t
12a457
@@ -13,11 +13,6 @@ MIGRATION_TIMEOUT=10
12a457
 DEMOTE_FREQ=4
12a457
 PROMOTE_FREQ=12
12a457
 
12a457
-function detach_start {
12a457
-        $CLI volume tier $1 detach start
12a457
-        echo $?;
12a457
-}
12a457
-
12a457
 function file_on_slow_tier {
12a457
     found=0
12a457
 
12a457
diff --git a/tests/tier.rc b/tests/tier.rc
12a457
index dd220fe..ee37e07 100644
12a457
--- a/tests/tier.rc
12a457
+++ b/tests/tier.rc
12a457
@@ -60,6 +60,12 @@ function check_counters {
12a457
 }
12a457
 
12a457
 
12a457
+function detach_start {
12a457
+        $CLI volume tier $1 detach start
12a457
+        echo $?;
12a457
+}
12a457
+
12a457
+
12a457
 # Grab md5sum without file path (failed attempt notifications are discarded)
12a457
 function fingerprint {
12a457
     md5sum $1 2> /dev/null | grep --only-matching -m 1 '^[0-9a-f]*'
12a457
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
12a457
index 53d1489..edfb805 100644
12a457
--- a/xlators/cluster/dht/src/dht-common.h
12a457
+++ b/xlators/cluster/dht/src/dht-common.h
12a457
@@ -26,15 +26,16 @@
12a457
 #ifndef _DHT_H
12a457
 #define _DHT_H
12a457
 
12a457
-#define GF_XATTR_FIX_LAYOUT_KEY     "distribute.fix.layout"
12a457
-#define GF_XATTR_FILE_MIGRATE_KEY   "trusted.distribute.migrate-data"
12a457
-#define GF_DHT_LOOKUP_UNHASHED_ON   1
12a457
-#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
12a457
-#define DHT_PATHINFO_HEADER         "DISTRIBUTE:"
12a457
-#define DHT_FILE_MIGRATE_DOMAIN     "dht.file.migrate"
12a457
-#define DHT_LAYOUT_HEAL_DOMAIN      "dht.layout.heal"
12a457
-#define DHT_LAYOUT_HASH_INVALID     1
12a457
-#define TIERING_MIGRATION_KEY       "tiering.migration"
12a457
+#define GF_XATTR_FIX_LAYOUT_KEY         "distribute.fix.layout"
12a457
+#define GF_XATTR_TIER_LAYOUT_FIXED_KEY  "trusted.tier.fix.layout.complete"
12a457
+#define GF_XATTR_FILE_MIGRATE_KEY       "trusted.distribute.migrate-data"
12a457
+#define GF_DHT_LOOKUP_UNHASHED_ON       1
12a457
+#define GF_DHT_LOOKUP_UNHASHED_AUTO     2
12a457
+#define DHT_PATHINFO_HEADER             "DISTRIBUTE:"
12a457
+#define DHT_FILE_MIGRATE_DOMAIN         "dht.file.migrate"
12a457
+#define DHT_LAYOUT_HEAL_DOMAIN          "dht.layout.heal"
12a457
+#define TIERING_MIGRATION_KEY           "tiering.migration"
12a457
+#define DHT_LAYOUT_HASH_INVALID         1
12a457
 
12a457
 #define DHT_DIR_STAT_BLOCKS          8
12a457
 #define DHT_DIR_STAT_SIZE            4096
12a457
@@ -354,6 +355,13 @@ typedef enum tier_pause_state_ {
12a457
         TIER_PAUSED
12a457
 } tier_pause_state_t;
12a457
 
12a457
+/* This Structure is only used in tiering fixlayout */
12a457
+typedef struct gf_tier_fix_layout_arg {
12a457
+        xlator_t                *this;
12a457
+        dict_t                  *fix_layout;
12a457
+        pthread_t               thread_id;
12a457
+} gf_tier_fix_layout_arg_t;
12a457
+
12a457
 typedef struct gf_tier_conf {
12a457
         int                          is_tier;
12a457
         int                          watermark_hi;
12a457
@@ -375,6 +383,8 @@ typedef struct gf_tier_conf {
12a457
         pthread_mutex_t              pause_mutex;
12a457
         int                          promote_in_progress;
12a457
         int                          demote_in_progress;
12a457
+        /* This Structure is only used in tiering fixlayout */
12a457
+        gf_tier_fix_layout_arg_t     tier_fix_layout_arg;
12a457
 } gf_tier_conf_t;
12a457
 
12a457
 struct gf_defrag_info_ {
12a457
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
12a457
index 5942e92..de24135 100644
12a457
--- a/xlators/cluster/dht/src/dht-rebalance.c
12a457
+++ b/xlators/cluster/dht/src/dht-rebalance.c
12a457
@@ -3178,6 +3178,189 @@ out:
12a457
 
12a457
 }
12a457
 
12a457
+
12a457
+
12a457
+/******************************************************************************
12a457
+ *                      Tier background Fix layout functions
12a457
+ ******************************************************************************/
12a457
+/* This is the background tier fixlayout thread */
12a457
+void *
12a457
+gf_tier_do_fix_layout (void *args)
12a457
+{
12a457
+        gf_tier_fix_layout_arg_t *tier_fix_layout_arg   =  args;
12a457
+        int                 ret                         = -1;
12a457
+        xlator_t            *this                       = NULL;
12a457
+        dht_conf_t          *conf                       = NULL;
12a457
+        gf_defrag_info_t    *defrag                     = NULL;
12a457
+        dict_t              *dict                       = NULL;
12a457
+        loc_t               loc                         = {0,};
12a457
+        struct iatt         iatt                        = {0,};
12a457
+        struct iatt         parent                      = {0,};
12a457
+
12a457
+        GF_VALIDATE_OR_GOTO ("tier", tier_fix_layout_arg, out);
12a457
+        GF_VALIDATE_OR_GOTO ("tier", tier_fix_layout_arg->this, out);
12a457
+        this = tier_fix_layout_arg->this;
12a457
+
12a457
+        conf = this->private;
12a457
+        GF_VALIDATE_OR_GOTO (this->name, conf, out);
12a457
+
12a457
+        defrag = conf->defrag;
12a457
+        GF_VALIDATE_OR_GOTO (this->name, defrag, out);
12a457
+        GF_VALIDATE_OR_GOTO (this->name, defrag->root_inode, out);
12a457
+
12a457
+        GF_VALIDATE_OR_GOTO (this->name, tier_fix_layout_arg->fix_layout, out);
12a457
+
12a457
+
12a457
+        /* Get Root loc_t */
12a457
+        dht_build_root_loc (defrag->root_inode, &loc;;
12a457
+        ret = syncop_lookup (this, &loc, &iatt, &parent, NULL, NULL);
12a457
+        if (ret) {
12a457
+                gf_msg (this->name, GF_LOG_ERROR, 0,
12a457
+                        DHT_MSG_REBALANCE_START_FAILED,
12a457
+                        "Lookup on root failed.");
12a457
+                ret = -1;
12a457
+                goto out;
12a457
+        }
12a457
+
12a457
+
12a457
+        /* Start the crawl */
12a457
+        gf_msg (this->name, GF_LOG_INFO, 0,
12a457
+                        DHT_MSG_LOG_TIER_STATUS, "Tiering Fixlayout started");
12a457
+
12a457
+        ret = gf_defrag_fix_layout (this, defrag, &loc,
12a457
+                                    tier_fix_layout_arg->fix_layout, NULL);
12a457
+        if (ret && ret != 2) {
12a457
+                gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
12a457
+                        "Tiering fixlayout failed.");
12a457
+                ret = -1;
12a457
+                goto out;
12a457
+        }
12a457
+
12a457
+        if (ret != 2 && gf_defrag_settle_hash
12a457
+                        (this, defrag, &loc,
12a457
+                                tier_fix_layout_arg->fix_layout) != 0) {
12a457
+                defrag->total_failures++;
12a457
+                ret = -1;
12a457
+                goto out;
12a457
+        }
12a457
+
12a457
+        dict = dict_new ();
12a457
+        if (!dict) {
12a457
+                ret = -1;
12a457
+                goto out;
12a457
+        }
12a457
+
12a457
+        ret = dict_set_str (dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes");
12a457
+        if (ret) {
12a457
+                gf_msg (this->name, GF_LOG_ERROR, 0,
12a457
+                        DHT_MSG_REBALANCE_FAILED,
12a457
+                        "Failed to set dictionary value: key = %s",
12a457
+                        GF_XATTR_TIER_LAYOUT_FIXED_KEY);
12a457
+                ret = -1;
12a457
+                goto out;
12a457
+        }
12a457
+
12a457
+        /* Marking the completion of tiering fix layout via a xattr on root */
12a457
+        ret = syncop_setxattr (this, &loc, dict, 0, NULL, NULL);
12a457
+        if (ret) {
12a457
+                gf_log (this->name, GF_LOG_ERROR, "Failed to set tiering fix "
12a457
+                        "layout completed xattr on %s", loc.path);
12a457
+                ret = -1;
12a457
+                goto out;
12a457
+        }
12a457
+
12a457
+        ret = 0;
12a457
+out:
12a457
+        if (ret)
12a457
+                defrag->total_failures++;
12a457
+
12a457
+        if (dict)
12a457
+                dict_unref (dict);
12a457
+
12a457
+        return NULL;
12a457
+}
12a457
+
12a457
+int
12a457
+gf_tier_start_fix_layout (xlator_t *this,
12a457
+                         loc_t *loc,
12a457
+                         gf_defrag_info_t *defrag,
12a457
+                         dict_t *fix_layout)
12a457
+{
12a457
+        int ret                                       = -1;
12a457
+        dict_t  *tier_dict                            = NULL;
12a457
+        gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL;
12a457
+
12a457
+        tier_dict = dict_new ();
12a457
+        if (!tier_dict) {
12a457
+                gf_log ("tier", GF_LOG_ERROR, "Tier fix layout failed :"
12a457
+                        "Creation of tier_dict failed");
12a457
+                ret = -1;
12a457
+                goto out;
12a457
+        }
12a457
+
12a457
+        /* Check if layout is fixed already */
12a457
+        ret = syncop_getxattr (this, loc, &tier_dict,
12a457
+                                GF_XATTR_TIER_LAYOUT_FIXED_KEY,
12a457
+                                NULL, NULL);
12a457
+        if (ret != 0) {
12a457
+
12a457
+                tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg;
12a457
+
12a457
+                /*Fill crawl arguments */
12a457
+                tier_fix_layout_arg->this = this;
12a457
+                tier_fix_layout_arg->fix_layout = fix_layout;
12a457
+
12a457
+                /* Spawn the fix layout thread so that its done in the
12a457
+                 * background */
12a457
+                ret = pthread_create (&tier_fix_layout_arg->thread_id, NULL,
12a457
+                                gf_tier_do_fix_layout, tier_fix_layout_arg);
12a457
+                if (ret) {
12a457
+                        gf_log ("tier", GF_LOG_ERROR, "Thread creation failed. "
12a457
+                                "Background fix layout for tiering will not "
12a457
+                                "work.");
12a457
+                        defrag->total_failures++;
12a457
+                        goto out;
12a457
+                }
12a457
+        }
12a457
+        ret = 0;
12a457
+out:
12a457
+        if (tier_dict)
12a457
+                dict_unref (tier_dict);
12a457
+
12a457
+        return ret;
12a457
+}
12a457
+
12a457
+int
12a457
+gf_tier_clear_fix_layout (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
12a457
+{
12a457
+        int ret = -1;
12a457
+
12a457
+        ret = syncop_removexattr (this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
12a457
+                                  NULL, NULL);
12a457
+        if (ret) {
12a457
+                gf_log (this->name, GF_LOG_WARNING,
12a457
+                        "Failed removing tier fix layout "
12a457
+                        "xattr from %s", loc->path);
12a457
+                defrag->total_failures++;
12a457
+                ret = -1;
12a457
+                goto out;
12a457
+        }
12a457
+        ret = 0;
12a457
+out:
12a457
+        return ret;
12a457
+}
12a457
+
12a457
+void
12a457
+gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
12a457
+        if (defrag->tier_conf.tier_fix_layout_arg.thread_id) {
12a457
+                pthread_join (defrag->tier_conf.tier_fix_layout_arg.thread_id,
12a457
+                        NULL);
12a457
+        }
12a457
+}
12a457
+/******************Tier background Fix layout functions END********************/
12a457
+
12a457
+
12a457
+
12a457
 int
12a457
 gf_defrag_start_crawl (void *data)
12a457
 {
12a457
@@ -3367,24 +3550,16 @@ gf_defrag_start_crawl (void *data)
12a457
                 }
12a457
         }
12a457
 
12a457
-        ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
12a457
-                                    migrate_data);
12a457
-        if (ret && ret != 2) {
12a457
-                defrag->total_failures++;
12a457
-                ret = -1;
12a457
-                goto out;
12a457
-        }
12a457
-
12a457
-        if (ret != 2 &&
12a457
-            gf_defrag_settle_hash (this, defrag, &loc, fix_layout) != 0) {
12a457
-                defrag->total_failures++;
12a457
-                ret = -1;
12a457
-                goto out;
12a457
-        }
12a457
-
12a457
         if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
12a457
+                /* Fix layout for attach tier */
12a457
+                ret = gf_tier_start_fix_layout (this, &loc, defrag, fix_layout);
12a457
+                if (ret) {
12a457
+                        goto out;
12a457
+                }
12a457
+
12a457
                 methods = &(conf->methods);
12a457
 
12a457
+                /* Calling tier_start of tier.c */
12a457
                 methods->migration_other(this, defrag);
12a457
                 if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
12a457
 
12a457
@@ -3395,9 +3570,35 @@ gf_defrag_start_crawl (void *data)
12a457
                                 goto out;
12a457
 
12a457
                 }
12a457
+        } else {
12a457
+                ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
12a457
+                                    migrate_data);
12a457
+                if (ret && ret != 2) {
12a457
+                        defrag->total_failures++;
12a457
+                        ret = -1;
12a457
+                        goto out;
12a457
+                }
12a457
+
12a457
+                if (ret != 2 && gf_defrag_settle_hash
12a457
+                        (this, defrag, &loc, fix_layout) != 0) {
12a457
+                        defrag->total_failures++;
12a457
+                        ret = -1;
12a457
+                        goto out;
12a457
+                }
12a457
+
12a457
+                if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
12a457
+                        /* If its was a detach remove the tier fix-layout
12a457
+                         * xattr on root */
12a457
+                         ret = gf_tier_clear_fix_layout (this, &loc, defrag);
12a457
+                         if (ret) {
12a457
+                                goto out;
12a457
+                         }
12a457
+                }
12a457
         }
12a457
+
12a457
         gf_log ("DHT", GF_LOG_INFO, "crawling file-system completed");
12a457
 out:
12a457
+
12a457
         /* We are here means crawling the entire file system is done
12a457
            or something failed. Set defrag->crawl_done flag to intimate
12a457
            the migrator threads to exhaust the defrag->queue and terminate*/
12a457
@@ -3422,6 +3623,13 @@ out:
12a457
                 pthread_join (tid[i], NULL);
12a457
         }
12a457
 
12a457
+        if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
12a457
+                /* Wait for the tier fixlayout to
12a457
+                 * complete if its was started.*/
12a457
+                 gf_tier_wait_fix_lookup (defrag);
12a457
+        }
12a457
+
12a457
+
12a457
         if (defrag->queue) {
12a457
                 gf_dirent_free (defrag->queue[0].df_entry);
12a457
                 INIT_LIST_HEAD (&(defrag->queue[0].list));
12a457
@@ -3450,7 +3658,11 @@ out:
12a457
         conf->defrag = NULL;
12a457
 
12a457
         if (dict)
12a457
-                dict_unref(dict);
12a457
+                dict_unref (dict);
12a457
+
12a457
+        if (migrate_data)
12a457
+                dict_unref (migrate_data);
12a457
+
12a457
 exit:
12a457
         return ret;
12a457
 }
12a457
-- 
12a457
1.7.1
12a457