|
|
21ab4e |
From 1532a98db62428bbacae6abf514e0c33427d0e08 Mon Sep 17 00:00:00 2001
|
|
|
21ab4e |
From: N Balachandran <nbalacha@redhat.com>
|
|
|
21ab4e |
Date: Thu, 22 Jun 2017 15:56:28 +0530
|
|
|
21ab4e |
Subject: [PATCH 529/529] cluster/dht: rebalance gets file count periodically
|
|
|
21ab4e |
|
|
|
21ab4e |
The rebalance used to get the file count in the beginning
|
|
|
21ab4e |
and not update it. This caused estimates to fail
|
|
|
21ab4e |
if the number changed during the rebalance.
|
|
|
21ab4e |
|
|
|
21ab4e |
The rebalance now updates the file count periodically.
|
|
|
21ab4e |
|
|
|
21ab4e |
> BUG: 1464110
|
|
|
21ab4e |
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
|
|
|
21ab4e |
> Reviewed-on: https://review.gluster.org/17607
|
|
|
21ab4e |
> Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
|
|
|
21ab4e |
Change-Id: I1667ee69e8a1d7d6bc6bc2f060fad7f989d19ed4
|
|
|
21ab4e |
BUG: 1457731
|
|
|
21ab4e |
Signed-off-by: N Balachandran <nbalacha@redhat.com>
|
|
|
21ab4e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/109917
|
|
|
21ab4e |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
---
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-common.h | 3 +
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-rebalance.c | 114 ++++++++++++++++++++++++--------
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-shared.c | 1 +
|
|
|
21ab4e |
xlators/cluster/dht/src/tier.c | 1 +
|
|
|
21ab4e |
4 files changed, 93 insertions(+), 26 deletions(-)
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
index b35ef0c..0309fb5 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
@@ -466,6 +466,9 @@ struct gf_defrag_info_ {
|
|
|
21ab4e |
|
|
|
21ab4e |
/* lock migration flag */
|
|
|
21ab4e |
gf_boolean_t lock_migration_enabled;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ /* backpointer to make it easier to write functions for rebalance */
|
|
|
21ab4e |
+ xlator_t *this;
|
|
|
21ab4e |
};
|
|
|
21ab4e |
|
|
|
21ab4e |
typedef struct gf_defrag_info_ gf_defrag_info_t;
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
index 87e3dc5..5fa7139 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
@@ -24,6 +24,7 @@
|
|
|
21ab4e |
#define MAX_MIGRATE_QUEUE_COUNT 500
|
|
|
21ab4e |
#define MIN_MIGRATE_QUEUE_COUNT 200
|
|
|
21ab4e |
#define MAX_REBAL_TYPE_SIZE 16
|
|
|
21ab4e |
+#define FILE_CNT_INTERVAL 600 /* 10 mins */
|
|
|
21ab4e |
|
|
|
21ab4e |
#ifndef MAX
|
|
|
21ab4e |
#define MAX(a, b) (((a) > (b))?(a):(b))
|
|
|
21ab4e |
@@ -4040,6 +4041,9 @@ gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
|
|
|
21ab4e |
/******************Tier background Fix layout functions END********************/
|
|
|
21ab4e |
|
|
|
21ab4e |
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+
|
|
|
21ab4e |
uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
int ret = -1;
|
|
|
21ab4e |
@@ -4057,23 +4061,23 @@ uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
|
|
|
21ab4e |
-int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
|
|
|
21ab4e |
+uint64_t
|
|
|
21ab4e |
+gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
dht_conf_t *conf = NULL;
|
|
|
21ab4e |
- int ret = -1;
|
|
|
21ab4e |
int i = 0;
|
|
|
21ab4e |
uint64_t num_files = 0;
|
|
|
21ab4e |
-
|
|
|
21ab4e |
+ uint64_t total_entries = 0;
|
|
|
21ab4e |
|
|
|
21ab4e |
conf = this->private;
|
|
|
21ab4e |
if (!conf) {
|
|
|
21ab4e |
- return ret;
|
|
|
21ab4e |
+ return 0;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
for (i = 0 ; i < conf->local_subvols_cnt; i++) {
|
|
|
21ab4e |
num_files = gf_defrag_subvol_file_cnt (conf->local_subvols[i],
|
|
|
21ab4e |
root_loc);
|
|
|
21ab4e |
- g_totalfiles += num_files;
|
|
|
21ab4e |
+ total_entries += num_files;
|
|
|
21ab4e |
gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: %s,"
|
|
|
21ab4e |
"cnt = %"PRIu64, conf->local_subvols[i]->name,
|
|
|
21ab4e |
num_files);
|
|
|
21ab4e |
@@ -4082,14 +4086,14 @@ int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
|
|
|
21ab4e |
/* FIXFIXFIX: halve the number of files to negate .glusterfs contents
|
|
|
21ab4e |
We need a better way to figure this out */
|
|
|
21ab4e |
|
|
|
21ab4e |
- g_totalfiles = g_totalfiles/2;
|
|
|
21ab4e |
- if (g_totalfiles > 20000)
|
|
|
21ab4e |
- g_totalfiles += 10000;
|
|
|
21ab4e |
+ total_entries = total_entries/2;
|
|
|
21ab4e |
+ if (total_entries > 20000)
|
|
|
21ab4e |
+ total_entries += 10000;
|
|
|
21ab4e |
|
|
|
21ab4e |
gf_msg (this->name, GF_LOG_INFO, 0, 0,
|
|
|
21ab4e |
- "Total number of files = %"PRIu64, g_totalfiles);
|
|
|
21ab4e |
+ "Total number of files = %"PRIu64, total_entries);
|
|
|
21ab4e |
|
|
|
21ab4e |
- return 0;
|
|
|
21ab4e |
+ return total_entries;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -4118,6 +4122,39 @@ out:
|
|
|
21ab4e |
return ret;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
+static void*
|
|
|
21ab4e |
+dht_file_counter_thread (void *args)
|
|
|
21ab4e |
+{
|
|
|
21ab4e |
+ gf_defrag_info_t *defrag = NULL;
|
|
|
21ab4e |
+ loc_t root_loc = {0,};
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ if (!args)
|
|
|
21ab4e |
+ return NULL;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ defrag = (gf_defrag_info_t *) args;
|
|
|
21ab4e |
+ dht_build_root_loc (defrag->root_inode, &root_loc);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ sleep (FILE_CNT_INTERVAL);
|
|
|
21ab4e |
+ g_totalfiles = gf_defrag_total_file_cnt (defrag->this,
|
|
|
21ab4e |
+ &root_loc);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ if (!g_totalfiles) {
|
|
|
21ab4e |
+ gf_msg ("dht", GF_LOG_ERROR, 0, 0, "Failed to get "
|
|
|
21ab4e |
+ "the total number of files. Unable to estimate "
|
|
|
21ab4e |
+ "time to complete rebalance.");
|
|
|
21ab4e |
+ } else {
|
|
|
21ab4e |
+ gf_msg_debug ("dht", 0,
|
|
|
21ab4e |
+ "total number of files =%"PRIu64,
|
|
|
21ab4e |
+ g_totalfiles);
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ return NULL;
|
|
|
21ab4e |
+}
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+
|
|
|
21ab4e |
|
|
|
21ab4e |
int
|
|
|
21ab4e |
gf_defrag_start_crawl (void *data)
|
|
|
21ab4e |
@@ -4140,6 +4177,7 @@ gf_defrag_start_crawl (void *data)
|
|
|
21ab4e |
int err = 0;
|
|
|
21ab4e |
int thread_spawn_count = 0;
|
|
|
21ab4e |
pthread_t *tid = NULL;
|
|
|
21ab4e |
+ pthread_t filecnt_thread;
|
|
|
21ab4e |
gf_boolean_t is_tier_detach = _gf_false;
|
|
|
21ab4e |
call_frame_t *statfs_frame = NULL;
|
|
|
21ab4e |
xlator_t *old_THIS = NULL;
|
|
|
21ab4e |
@@ -4289,13 +4327,23 @@ gf_defrag_start_crawl (void *data)
|
|
|
21ab4e |
}
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
- ret = gf_defrag_total_file_cnt (this, &loc;;
|
|
|
21ab4e |
- if (ret) {
|
|
|
21ab4e |
+ g_totalfiles = gf_defrag_total_file_cnt (this, &loc;;
|
|
|
21ab4e |
+ if (!g_totalfiles) {
|
|
|
21ab4e |
gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get "
|
|
|
21ab4e |
"the total number of files. Unable to estimate "
|
|
|
21ab4e |
"time to complete rebalance.");
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
+ ret = gf_thread_create_detached (&filecnt_thread,
|
|
|
21ab4e |
+ &dht_file_counter_thread,
|
|
|
21ab4e |
+ (void *)defrag);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ if (ret) {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_ERROR, ret, 0, "Failed to "
|
|
|
21ab4e |
+ "create the file counter thread ");
|
|
|
21ab4e |
+ ret = 0;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
/* Initialize global entry queue */
|
|
|
21ab4e |
defrag->queue = GF_CALLOC (1, sizeof (struct dht_container),
|
|
|
21ab4e |
gf_dht_mt_container_t);
|
|
|
21ab4e |
@@ -4412,6 +4460,8 @@ out:
|
|
|
21ab4e |
pthread_join (tid[i], NULL);
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+
|
|
|
21ab4e |
GF_FREE (tid);
|
|
|
21ab4e |
|
|
|
21ab4e |
if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
|
|
|
21ab4e |
@@ -4529,13 +4579,16 @@ uint64_t
|
|
|
21ab4e |
gf_defrag_get_estimates (dht_conf_t *conf)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
gf_defrag_info_t *defrag = NULL;
|
|
|
21ab4e |
- double rate_lookedup = 0;
|
|
|
21ab4e |
- uint64_t dirs_processed = 0;
|
|
|
21ab4e |
- uint64_t total_processed = 0;
|
|
|
21ab4e |
- uint64_t tmp_count = 0;
|
|
|
21ab4e |
- uint64_t time_to_complete = 0;
|
|
|
21ab4e |
- struct timeval end = {0,};
|
|
|
21ab4e |
- double elapsed = 0;
|
|
|
21ab4e |
+ loc_t loc = {0,};
|
|
|
21ab4e |
+ double rate_lookedup = 0;
|
|
|
21ab4e |
+ uint64_t dirs_processed = 0;
|
|
|
21ab4e |
+ uint64_t files_processed = 0;
|
|
|
21ab4e |
+ uint64_t total_processed = 0;
|
|
|
21ab4e |
+ uint64_t tmp_count = 0;
|
|
|
21ab4e |
+ uint64_t time_to_complete = 0;
|
|
|
21ab4e |
+ struct timeval end = {0,};
|
|
|
21ab4e |
+ double elapsed = 0;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
|
|
|
21ab4e |
defrag = conf->defrag;
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -4552,26 +4605,34 @@ gf_defrag_get_estimates (dht_conf_t *conf)
|
|
|
21ab4e |
*/
|
|
|
21ab4e |
|
|
|
21ab4e |
dirs_processed = defrag->num_dirs_processed;
|
|
|
21ab4e |
+ files_processed = defrag->num_files_lookedup;
|
|
|
21ab4e |
|
|
|
21ab4e |
- total_processed = defrag->num_files_lookedup
|
|
|
21ab4e |
- + dirs_processed;
|
|
|
21ab4e |
+ total_processed = files_processed + dirs_processed;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ if (total_processed > g_totalfiles) {
|
|
|
21ab4e |
+ /* lookup the number of files again
|
|
|
21ab4e |
+ * The problem here is that not all the newly added files
|
|
|
21ab4e |
+ * might need to be processed. So this need not work
|
|
|
21ab4e |
+ * in some cases
|
|
|
21ab4e |
+ */
|
|
|
21ab4e |
+ dht_build_root_loc (defrag->root_inode, &loc;;
|
|
|
21ab4e |
+ g_totalfiles = gf_defrag_total_file_cnt (defrag->this, &loc;;
|
|
|
21ab4e |
+ if (!g_totalfiles)
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
|
|
|
21ab4e |
/* rate at which files looked up */
|
|
|
21ab4e |
rate_lookedup = (total_processed)/elapsed;
|
|
|
21ab4e |
|
|
|
21ab4e |
-
|
|
|
21ab4e |
/* We initially sum up dirs across all local subvols because we get the
|
|
|
21ab4e |
* file count from the inodes on each subvol.
|
|
|
21ab4e |
* The same directories will be counted for each subvol but
|
|
|
21ab4e |
- * we want that they are only counted once.
|
|
|
21ab4e |
+ * we want them to be counted once.
|
|
|
21ab4e |
*/
|
|
|
21ab4e |
|
|
|
21ab4e |
tmp_count = g_totalfiles
|
|
|
21ab4e |
- (dirs_processed * (conf->local_subvols_cnt - 1));
|
|
|
21ab4e |
|
|
|
21ab4e |
- if (total_processed > g_totalfiles)
|
|
|
21ab4e |
- g_totalfiles = total_processed + 10000;
|
|
|
21ab4e |
-
|
|
|
21ab4e |
if (rate_lookedup) {
|
|
|
21ab4e |
time_to_complete = (tmp_count)/rate_lookedup;
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -4586,6 +4647,7 @@ gf_defrag_get_estimates (dht_conf_t *conf)
|
|
|
21ab4e |
"rate_lookedup=%f", total_processed, tmp_count,
|
|
|
21ab4e |
rate_lookedup);
|
|
|
21ab4e |
|
|
|
21ab4e |
+out:
|
|
|
21ab4e |
return time_to_complete;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
|
|
|
21ab4e |
index 1128cfe..70ae7da 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-shared.c
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-shared.c
|
|
|
21ab4e |
@@ -705,6 +705,7 @@ dht_init (xlator_t *this)
|
|
|
21ab4e |
defrag->is_exiting = 0;
|
|
|
21ab4e |
|
|
|
21ab4e |
conf->defrag = defrag;
|
|
|
21ab4e |
+ defrag->this = this;
|
|
|
21ab4e |
|
|
|
21ab4e |
ret = dict_get_str (this->options, "node-uuid", &node_uuid);
|
|
|
21ab4e |
if (ret) {
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
|
|
|
21ab4e |
index 1fba88a..0f6651d 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/tier.c
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/tier.c
|
|
|
21ab4e |
@@ -2423,6 +2423,7 @@ tier_init (xlator_t *this)
|
|
|
21ab4e |
defrag->tier_conf.last_promote_qfile_index = 0;
|
|
|
21ab4e |
|
|
|
21ab4e |
defrag->tier_conf.is_tier = 1;
|
|
|
21ab4e |
+ defrag->this = this;
|
|
|
21ab4e |
|
|
|
21ab4e |
ret = dict_get_int32 (this->options,
|
|
|
21ab4e |
"tier-max-promote-file-size", &maxsize);
|
|
|
21ab4e |
--
|
|
|
21ab4e |
1.8.3.1
|
|
|
21ab4e |
|