From 1532a98db62428bbacae6abf514e0c33427d0e08 Mon Sep 17 00:00:00 2001
From: N Balachandran <nbalacha@redhat.com>
Date: Thu, 22 Jun 2017 15:56:28 +0530
Subject: [PATCH 529/529] cluster/dht: rebalance gets file count periodically
The rebalance used to get the file count in the beginning
and not update it. This caused estimates to fail
if the number changed during the rebalance.
The rebalance now updates the file count periodically.
> BUG: 1464110
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
> Reviewed-on: https://review.gluster.org/17607
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Change-Id: I1667ee69e8a1d7d6bc6bc2f060fad7f989d19ed4
BUG: 1457731
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/109917
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/cluster/dht/src/dht-common.h | 3 +
xlators/cluster/dht/src/dht-rebalance.c | 114 ++++++++++++++++++++++++--------
xlators/cluster/dht/src/dht-shared.c | 1 +
xlators/cluster/dht/src/tier.c | 1 +
4 files changed, 93 insertions(+), 26 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index b35ef0c..0309fb5 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -466,6 +466,9 @@ struct gf_defrag_info_ {
/* lock migration flag */
gf_boolean_t lock_migration_enabled;
+
+ /* backpointer to make it easier to write functions for rebalance */
+ xlator_t *this;
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 87e3dc5..5fa7139 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -24,6 +24,7 @@
#define MAX_MIGRATE_QUEUE_COUNT 500
#define MIN_MIGRATE_QUEUE_COUNT 200
#define MAX_REBAL_TYPE_SIZE 16
+#define FILE_CNT_INTERVAL 600 /* 10 mins */
#ifndef MAX
#define MAX(a, b) (((a) > (b))?(a):(b))
@@ -4040,6 +4041,9 @@ gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
/******************Tier background Fix layout functions END********************/
+
+
+
uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
{
int ret = -1;
@@ -4057,23 +4061,23 @@ uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
}
-int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
+uint64_t
+gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
{
dht_conf_t *conf = NULL;
- int ret = -1;
int i = 0;
uint64_t num_files = 0;
-
+ uint64_t total_entries = 0;
conf = this->private;
if (!conf) {
- return ret;
+ return 0;
}
for (i = 0 ; i < conf->local_subvols_cnt; i++) {
num_files = gf_defrag_subvol_file_cnt (conf->local_subvols[i],
root_loc);
- g_totalfiles += num_files;
+ total_entries += num_files;
gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: %s,"
"cnt = %"PRIu64, conf->local_subvols[i]->name,
num_files);
@@ -4082,14 +4086,14 @@ int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
/* FIXFIXFIX: halve the number of files to negate .glusterfs contents
We need a better way to figure this out */
- g_totalfiles = g_totalfiles/2;
- if (g_totalfiles > 20000)
- g_totalfiles += 10000;
+ total_entries = total_entries/2;
+ if (total_entries > 20000)
+ total_entries += 10000;
gf_msg (this->name, GF_LOG_INFO, 0, 0,
- "Total number of files = %"PRIu64, g_totalfiles);
+ "Total number of files = %"PRIu64, total_entries);
- return 0;
+ return total_entries;
}
@@ -4118,6 +4122,39 @@ out:
return ret;
}
+static void*
+dht_file_counter_thread (void *args)
+{
+ gf_defrag_info_t *defrag = NULL;
+ loc_t root_loc = {0,};
+
+ if (!args)
+ return NULL;
+
+ defrag = (gf_defrag_info_t *) args;
+ dht_build_root_loc (defrag->root_inode, &root_loc);
+
+ while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
+
+ sleep (FILE_CNT_INTERVAL);
+ g_totalfiles = gf_defrag_total_file_cnt (defrag->this,
+ &root_loc);
+
+ if (!g_totalfiles) {
+ gf_msg ("dht", GF_LOG_ERROR, 0, 0, "Failed to get "
+ "the total number of files. Unable to estimate "
+ "time to complete rebalance.");
+ } else {
+ gf_msg_debug ("dht", 0,
+ "total number of files =%"PRIu64,
+ g_totalfiles);
+ }
+ }
+
+ return NULL;
+}
+
+
int
gf_defrag_start_crawl (void *data)
@@ -4140,6 +4177,7 @@ gf_defrag_start_crawl (void *data)
int err = 0;
int thread_spawn_count = 0;
pthread_t *tid = NULL;
+ pthread_t filecnt_thread;
gf_boolean_t is_tier_detach = _gf_false;
call_frame_t *statfs_frame = NULL;
xlator_t *old_THIS = NULL;
@@ -4289,13 +4327,23 @@ gf_defrag_start_crawl (void *data)
}
}
- ret = gf_defrag_total_file_cnt (this, &loc);
- if (ret) {
+ g_totalfiles = gf_defrag_total_file_cnt (this, &loc);
+ if (!g_totalfiles) {
gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get "
"the total number of files. Unable to estimate "
"time to complete rebalance.");
}
+ ret = gf_thread_create_detached (&filecnt_thread,
+ &dht_file_counter_thread,
+ (void *)defrag);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ret, 0, "Failed to "
+ "create the file counter thread ");
+ ret = 0;
+ }
+
/* Initialize global entry queue */
defrag->queue = GF_CALLOC (1, sizeof (struct dht_container),
gf_dht_mt_container_t);
@@ -4412,6 +4460,8 @@ out:
pthread_join (tid[i], NULL);
}
+
+
GF_FREE (tid);
if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
@@ -4529,13 +4579,16 @@ uint64_t
gf_defrag_get_estimates (dht_conf_t *conf)
{
gf_defrag_info_t *defrag = NULL;
- double rate_lookedup = 0;
- uint64_t dirs_processed = 0;
- uint64_t total_processed = 0;
- uint64_t tmp_count = 0;
- uint64_t time_to_complete = 0;
- struct timeval end = {0,};
- double elapsed = 0;
+ loc_t loc = {0,};
+ double rate_lookedup = 0;
+ uint64_t dirs_processed = 0;
+ uint64_t files_processed = 0;
+ uint64_t total_processed = 0;
+ uint64_t tmp_count = 0;
+ uint64_t time_to_complete = 0;
+ struct timeval end = {0,};
+ double elapsed = 0;
+
defrag = conf->defrag;
@@ -4552,26 +4605,34 @@ gf_defrag_get_estimates (dht_conf_t *conf)
*/
dirs_processed = defrag->num_dirs_processed;
+ files_processed = defrag->num_files_lookedup;
- total_processed = defrag->num_files_lookedup
- + dirs_processed;
+ total_processed = files_processed + dirs_processed;
+
+ if (total_processed > g_totalfiles) {
+ /* lookup the number of files again
+ * The problem here is that not all the newly added files
+ * might need to be processed. So this need not work
+ * in some cases
+ */
+ dht_build_root_loc (defrag->root_inode, &loc);
+ g_totalfiles = gf_defrag_total_file_cnt (defrag->this, &loc);
+ if (!g_totalfiles)
+ goto out;
+ }
/* rate at which files looked up */
rate_lookedup = (total_processed)/elapsed;
-
/* We initially sum up dirs across all local subvols because we get the
* file count from the inodes on each subvol.
* The same directories will be counted for each subvol but
- * we want that they are only counted once.
+ * we want them to be counted once.
*/
tmp_count = g_totalfiles
- (dirs_processed * (conf->local_subvols_cnt - 1));
- if (total_processed > g_totalfiles)
- g_totalfiles = total_processed + 10000;
-
if (rate_lookedup) {
time_to_complete = (tmp_count)/rate_lookedup;
@@ -4586,6 +4647,7 @@ gf_defrag_get_estimates (dht_conf_t *conf)
"rate_lookedup=%f", total_processed, tmp_count,
rate_lookedup);
+out:
return time_to_complete;
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 1128cfe..70ae7da 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -705,6 +705,7 @@ dht_init (xlator_t *this)
defrag->is_exiting = 0;
conf->defrag = defrag;
+ defrag->this = this;
ret = dict_get_str (this->options, "node-uuid", &node_uuid);
if (ret) {
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 1fba88a..0f6651d 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -2423,6 +2423,7 @@ tier_init (xlator_t *this)
defrag->tier_conf.last_promote_qfile_index = 0;
defrag->tier_conf.is_tier = 1;
+ defrag->this = this;
ret = dict_get_int32 (this->options,
"tier-max-promote-file-size", &maxsize);
--
1.8.3.1