From 293766aa6d4f4d6a329f5ebc131fdc85f1ed4bf4 Mon Sep 17 00:00:00 2001
From: Joseph Fernandes <josferna@redhat.com>
Date: Tue, 23 Feb 2016 12:51:45 +0530
Subject: [PATCH 46/80] tier/dht : Attach tier fix layout to run in background
1. Spawn a thread for background fix-layout for tier process.
2. Once the fix-layout is completed a marker xttr is set on the root of
volume to mark the completion of the background fixlayout, so that
even if the tier process is spawned again, fixlayout will not be
issued, if it was completed last time.
3. Please note that promotion of legacy files will happen eventually as
the ctr lookup heal in the fixlayout slowly heals the ctr db for legacy
files OR the ctr lookup heal happend due to a name lookup.
4. When a detach tier is successful in evacuation data from hot tier, we remove
the marker xattr is removed. So that next attach tier runs the background
tier fixlayout.
what is remaining ?
1. Instead of clearing the marker xattr of tiering fix layout at the end of detach start
clear it during detach commit. But the issue is detach commit is a glusterd operation
and the volume is not mounted in glusterd.
The reason we want to do it in detach commit is that if the admin wants to attach the
same tier again, then a background fixlayout will be triggered, which would not be needed.
2. Clearing the CTR DB of the cold bricks when there is a detach commit, as it will be having
entries which will be stale when the volume is used, with ctr off (ctr is switched off only when
we have detach commit.)
Backport of http://review.gluster.org/13491
> Change-Id: Ibe343572e95865325cd0eef4d0b976b626a3c0c5
> BUG: 1313228
> Signed-off-by: Joseph Fernandes <josferna@redhat.com>
> Reviewed-on: http://review.gluster.org/13491
> Smoke: Gluster Build System <jenkins@build.gluster.com>
> Tested-by: Joseph Fernandes
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
> Signed-off-by: Joseph Fernandes <josferna@redhat.com>
> Change-Id: Ic28affdf78d2ac0f394f3dd59f0126df7915d609
> BUG: 1323016
> Reviewed-on: http://review.gluster.org/13879
> Smoke: Gluster Build System <jenkins@build.gluster.com>
> Reviewed-by: Joseph Fernandes
> Tested-by: Joseph Fernandes
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Signed-off-by: Joseph Fernandes <josferna@redhat.com>
Change-Id: I3bccb27a160f988f3721542e5ca07939a369e457
BUG: 1294790
Reviewed-on: https://code.engineering.redhat.com/gerrit/71484
Reviewed-by: Joseph Fernandes <josferna@redhat.com>
Tested-by: Joseph Fernandes <josferna@redhat.com>
---
tests/basic/tier/legacy-many.t | 13 ++
tests/basic/tier/tier.t | 5 -
tests/tier.rc | 6 +
xlators/cluster/dht/src/dht-common.h | 28 +++-
xlators/cluster/dht/src/dht-rebalance.c | 244 +++++++++++++++++++++++++++++--
5 files changed, 266 insertions(+), 30 deletions(-)
diff --git a/tests/basic/tier/legacy-many.t b/tests/basic/tier/legacy-many.t
index 0f32ff6..e419bec 100644
--- a/tests/basic/tier/legacy-many.t
+++ b/tests/basic/tier/legacy-many.t
@@ -58,6 +58,11 @@ TEST $CLI volume set $V0 cluster.write-freq-threshold 0
# wait a little for lookup heal to finish
sleep 10
+# make sure fix layout completed
+CPATH=$B0/${V0}0
+echo $CPATH > /tmp/out
+TEST getfattr -n "trusted.tier.fix.layout.complete" $CPATH
+
# Read "legacy" files
drop_cache $M0
@@ -69,5 +74,13 @@ TEST read_all
sleep $PROMOTE_TIMEOUT
EXPECT_WITHIN $PROMOTE_TIMEOUT "0" check_counters $NUM_FILES 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" detach_start $V0
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}${CACHE_BRICK_FIRST}"
+
+TEST $CLI volume tier $V0 detach commit
+
+# fix layout flag should be cleared
+TEST ! getfattr -n "trusted.tier.fix.layout.complete" $CPATH
+
cd;
cleanup
diff --git a/tests/basic/tier/tier.t b/tests/basic/tier/tier.t
index cfd8e77..4b36930 100755
--- a/tests/basic/tier/tier.t
+++ b/tests/basic/tier/tier.t
@@ -13,11 +13,6 @@ MIGRATION_TIMEOUT=10
DEMOTE_FREQ=4
PROMOTE_FREQ=12
-function detach_start {
- $CLI volume tier $1 detach start
- echo $?;
-}
-
function file_on_slow_tier {
found=0
diff --git a/tests/tier.rc b/tests/tier.rc
index dd220fe..ee37e07 100644
--- a/tests/tier.rc
+++ b/tests/tier.rc
@@ -60,6 +60,12 @@ function check_counters {
}
+function detach_start {
+ $CLI volume tier $1 detach start
+ echo $?;
+}
+
+
# Grab md5sum without file path (failed attempt notifications are discarded)
function fingerprint {
md5sum $1 2> /dev/null | grep --only-matching -m 1 '^[0-9a-f]*'
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 53d1489..edfb805 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -26,15 +26,16 @@
#ifndef _DHT_H
#define _DHT_H
-#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
-#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
-#define GF_DHT_LOOKUP_UNHASHED_ON 1
-#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
-#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
-#define DHT_FILE_MIGRATE_DOMAIN "dht.file.migrate"
-#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
-#define DHT_LAYOUT_HASH_INVALID 1
-#define TIERING_MIGRATION_KEY "tiering.migration"
+#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
+#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete"
+#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
+#define GF_DHT_LOOKUP_UNHASHED_ON 1
+#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
+#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
+#define DHT_FILE_MIGRATE_DOMAIN "dht.file.migrate"
+#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
+#define TIERING_MIGRATION_KEY "tiering.migration"
+#define DHT_LAYOUT_HASH_INVALID 1
#define DHT_DIR_STAT_BLOCKS 8
#define DHT_DIR_STAT_SIZE 4096
@@ -354,6 +355,13 @@ typedef enum tier_pause_state_ {
TIER_PAUSED
} tier_pause_state_t;
+/* This Structure is only used in tiering fixlayout */
+typedef struct gf_tier_fix_layout_arg {
+ xlator_t *this;
+ dict_t *fix_layout;
+ pthread_t thread_id;
+} gf_tier_fix_layout_arg_t;
+
typedef struct gf_tier_conf {
int is_tier;
int watermark_hi;
@@ -375,6 +383,8 @@ typedef struct gf_tier_conf {
pthread_mutex_t pause_mutex;
int promote_in_progress;
int demote_in_progress;
+ /* This Structure is only used in tiering fixlayout */
+ gf_tier_fix_layout_arg_t tier_fix_layout_arg;
} gf_tier_conf_t;
struct gf_defrag_info_ {
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 5942e92..de24135 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -3178,6 +3178,189 @@ out:
}
+
+
+/******************************************************************************
+ * Tier background Fix layout functions
+ ******************************************************************************/
+/* This is the background tier fixlayout thread */
+void *
+gf_tier_do_fix_layout (void *args)
+{
+ gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args;
+ int ret = -1;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ struct iatt parent = {0,};
+
+ GF_VALIDATE_OR_GOTO ("tier", tier_fix_layout_arg, out);
+ GF_VALIDATE_OR_GOTO ("tier", tier_fix_layout_arg->this, out);
+ this = tier_fix_layout_arg->this;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ defrag = conf->defrag;
+ GF_VALIDATE_OR_GOTO (this->name, defrag, out);
+ GF_VALIDATE_OR_GOTO (this->name, defrag->root_inode, out);
+
+ GF_VALIDATE_OR_GOTO (this->name, tier_fix_layout_arg->fix_layout, out);
+
+
+ /* Get Root loc_t */
+ dht_build_root_loc (defrag->root_inode, &loc);
+ ret = syncop_lookup (this, &loc, &iatt, &parent, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_REBALANCE_START_FAILED,
+ "Lookup on root failed.");
+ ret = -1;
+ goto out;
+ }
+
+
+ /* Start the crawl */
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS, "Tiering Fixlayout started");
+
+ ret = gf_defrag_fix_layout (this, defrag, &loc,
+ tier_fix_layout_arg->fix_layout, NULL);
+ if (ret && ret != 2) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
+ "Tiering fixlayout failed.");
+ ret = -1;
+ goto out;
+ }
+
+ if (ret != 2 && gf_defrag_settle_hash
+ (this, defrag, &loc,
+ tier_fix_layout_arg->fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes");
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_REBALANCE_FAILED,
+ "Failed to set dictionary value: key = %s",
+ GF_XATTR_TIER_LAYOUT_FIXED_KEY);
+ ret = -1;
+ goto out;
+ }
+
+ /* Marking the completion of tiering fix layout via a xattr on root */
+ ret = syncop_setxattr (this, &loc, dict, 0, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set tiering fix "
+ "layout completed xattr on %s", loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ defrag->total_failures++;
+
+ if (dict)
+ dict_unref (dict);
+
+ return NULL;
+}
+
+int
+gf_tier_start_fix_layout (xlator_t *this,
+ loc_t *loc,
+ gf_defrag_info_t *defrag,
+ dict_t *fix_layout)
+{
+ int ret = -1;
+ dict_t *tier_dict = NULL;
+ gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL;
+
+ tier_dict = dict_new ();
+ if (!tier_dict) {
+ gf_log ("tier", GF_LOG_ERROR, "Tier fix layout failed :"
+ "Creation of tier_dict failed");
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if layout is fixed already */
+ ret = syncop_getxattr (this, loc, &tier_dict,
+ GF_XATTR_TIER_LAYOUT_FIXED_KEY,
+ NULL, NULL);
+ if (ret != 0) {
+
+ tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg;
+
+ /*Fill crawl arguments */
+ tier_fix_layout_arg->this = this;
+ tier_fix_layout_arg->fix_layout = fix_layout;
+
+ /* Spawn the fix layout thread so that its done in the
+ * background */
+ ret = pthread_create (&tier_fix_layout_arg->thread_id, NULL,
+ gf_tier_do_fix_layout, tier_fix_layout_arg);
+ if (ret) {
+ gf_log ("tier", GF_LOG_ERROR, "Thread creation failed. "
+ "Background fix layout for tiering will not "
+ "work.");
+ defrag->total_failures++;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ if (tier_dict)
+ dict_unref (tier_dict);
+
+ return ret;
+}
+
+int
+gf_tier_clear_fix_layout (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
+{
+ int ret = -1;
+
+ ret = syncop_removexattr (this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed removing tier fix layout "
+ "xattr from %s", loc->path);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
+ if (defrag->tier_conf.tier_fix_layout_arg.thread_id) {
+ pthread_join (defrag->tier_conf.tier_fix_layout_arg.thread_id,
+ NULL);
+ }
+}
+/******************Tier background Fix layout functions END********************/
+
+
+
int
gf_defrag_start_crawl (void *data)
{
@@ -3367,24 +3550,16 @@ gf_defrag_start_crawl (void *data)
}
}
- ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
- migrate_data);
- if (ret && ret != 2) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
- if (ret != 2 &&
- gf_defrag_settle_hash (this, defrag, &loc, fix_layout) != 0) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
+ /* Fix layout for attach tier */
+ ret = gf_tier_start_fix_layout (this, &loc, defrag, fix_layout);
+ if (ret) {
+ goto out;
+ }
+
methods = &(conf->methods);
+ /* Calling tier_start of tier.c */
methods->migration_other(this, defrag);
if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
@@ -3395,9 +3570,35 @@ gf_defrag_start_crawl (void *data)
goto out;
}
+ } else {
+ ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
+ migrate_data);
+ if (ret && ret != 2) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret != 2 && gf_defrag_settle_hash
+ (this, defrag, &loc, fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
+ /* If its was a detach remove the tier fix-layout
+ * xattr on root */
+ ret = gf_tier_clear_fix_layout (this, &loc, defrag);
+ if (ret) {
+ goto out;
+ }
+ }
}
+
gf_log ("DHT", GF_LOG_INFO, "crawling file-system completed");
out:
+
/* We are here means crawling the entire file system is done
or something failed. Set defrag->crawl_done flag to intimate
the migrator threads to exhaust the defrag->queue and terminate*/
@@ -3422,6 +3623,13 @@ out:
pthread_join (tid[i], NULL);
}
+ if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
+ /* Wait for the tier fixlayout to
+ * complete if its was started.*/
+ gf_tier_wait_fix_lookup (defrag);
+ }
+
+
if (defrag->queue) {
gf_dirent_free (defrag->queue[0].df_entry);
INIT_LIST_HEAD (&(defrag->queue[0].list));
@@ -3450,7 +3658,11 @@ out:
conf->defrag = NULL;
if (dict)
- dict_unref(dict);
+ dict_unref (dict);
+
+ if (migrate_data)
+ dict_unref (migrate_data);
+
exit:
return ret;
}
--
1.7.1