From d3a75f1a21120dd411af3a2f1b33417b6913c29c Mon Sep 17 00:00:00 2001
From: N Balachandran <nbalacha@redhat.com>
Date: Mon, 3 Jul 2017 13:13:35 +0530
Subject: [PATCH 558/566] cluster/dht: Use size to calculate estimates
The earlier approach of using the number of files
to determine when the rebalance would complete did
not work well when file sizes differed widely.
The new approach now gets the total data size and
uses that information to determine how long
the rebalance is expected to take.
> BUG: 1467209
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
> Reviewed-on: https://review.gluster.org/17668
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: MOHIT AGRAWAL <moagrawa@redhat.com>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Change-Id: I84e80a0893efab72ff06130e4596fa71c9c8c868
BUG: 1460936
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/111921
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/cluster/dht/src/dht-common.h | 5 +
xlators/cluster/dht/src/dht-rebalance.c | 202 ++++++++++++++++++++++++++++----
xlators/cluster/dht/src/dht-shared.c | 3 +
3 files changed, 187 insertions(+), 23 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 0309fb5..28e9bbf 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -425,6 +425,7 @@ struct gf_defrag_info_ {
uint64_t total_failures;
uint64_t skipped;
uint64_t num_dirs_processed;
+ uint64_t size_processed;
gf_lock_t lock;
int cmd;
pthread_t th;
@@ -469,6 +470,10 @@ struct gf_defrag_info_ {
/* backpointer to make it easier to write functions for rebalance */
xlator_t *this;
+
+ pthread_cond_t fc_wakeup_cond;
+ pthread_mutex_t fc_mutex;
+
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 3777527..8f081c3 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -48,6 +48,7 @@
} \
uint64_t g_totalfiles = 0;
+uint64_t g_totalsize = 0;
void
@@ -2652,6 +2653,7 @@ gf_defrag_migrate_single_file (void *opaque)
LOCK (&defrag->lock);
{
defrag->skipped += 1;
+ defrag->size_processed += iatt.ia_size;
}
UNLOCK (&defrag->lock);
} else if (fop_errno == ENOTSUP) {
@@ -2660,6 +2662,7 @@ gf_defrag_migrate_single_file (void *opaque)
LOCK (&defrag->lock);
{
defrag->skipped += 1;
+ defrag->size_processed += iatt.ia_size;
}
UNLOCK (&defrag->lock);
} else if (fop_errno != EEXIST) {
@@ -2670,6 +2673,7 @@ gf_defrag_migrate_single_file (void *opaque)
LOCK (&defrag->lock);
{
defrag->total_failures += 1;
+ defrag->size_processed += iatt.ia_size;
}
UNLOCK (&defrag->lock);
@@ -2694,6 +2698,7 @@ gf_defrag_migrate_single_file (void *opaque)
{
defrag->total_files += 1;
defrag->total_data += iatt.ia_size;
+ defrag->size_processed += iatt.ia_size;
}
UNLOCK (&defrag->lock);
@@ -2963,8 +2968,11 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
!strcmp (df_entry->d_name, ".."))
continue;
- if (IA_ISDIR (df_entry->d_stat.ia_type))
+
+ if (IA_ISDIR (df_entry->d_stat.ia_type)) {
+ defrag->size_processed += df_entry->d_stat.ia_size;
continue;
+ }
defrag->num_files_lookedup++;
@@ -2972,6 +2980,7 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
(gf_defrag_pattern_match (defrag, df_entry->d_name,
df_entry->d_stat.ia_size)
== _gf_false)) {
+ defrag->size_processed += df_entry->d_stat.ia_size;
continue;
}
@@ -4041,10 +4050,25 @@ gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
/******************Tier background Fix layout functions END********************/
+uint64_t
+gf_defrag_subvol_file_size (xlator_t *this, loc_t *root_loc)
+{
+ int ret = -1;
+ struct statvfs buf = {0,};
+ if (!this)
+ return 0;
+ ret = syncop_statfs (this, root_loc, &buf, NULL, NULL);
+ if (ret) {
+ /* Aargh! */
+ return 0;
+ }
+ return ((buf.f_blocks - buf.f_bfree) * buf.f_frsize);
+}
-uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
+uint64_t
+gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
{
int ret = -1;
struct statvfs buf = {0,};
@@ -4062,6 +4086,35 @@ uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
uint64_t
+gf_defrag_total_file_size (xlator_t *this, loc_t *root_loc)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ uint64_t size_files = 0;
+ uint64_t total_size = 0;
+
+ conf = this->private;
+ if (!conf) {
+ return 0;
+ }
+
+ for (i = 0 ; i < conf->local_subvols_cnt; i++) {
+ size_files = gf_defrag_subvol_file_size (conf->local_subvols[i],
+ root_loc);
+ total_size += size_files;
+ gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: %s,"
+ "cnt = %"PRIu64, conf->local_subvols[i]->name,
+ size_files);
+ }
+
+ gf_msg (this->name, GF_LOG_INFO, 0, 0,
+ "Total size files = %"PRIu64, total_size);
+
+ return total_size;
+}
+
+
+uint64_t
gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
{
dht_conf_t *conf = NULL;
@@ -4144,8 +4197,12 @@ out:
static void*
dht_file_counter_thread (void *args)
{
- gf_defrag_info_t *defrag = NULL;
- loc_t root_loc = {0,};
+ gf_defrag_info_t *defrag = NULL;
+ loc_t root_loc = {0,};
+ struct timespec time_to_wait = {0,};
+ struct timeval now = {0,};
+ uint64_t tmp_size = 0;
+
if (!args)
return NULL;
@@ -4155,18 +4212,38 @@ dht_file_counter_thread (void *args)
while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
- sleep (FILE_CNT_INTERVAL);
- g_totalfiles = gf_defrag_total_file_cnt (defrag->this,
+ gettimeofday (&now, NULL);
+ time_to_wait.tv_sec = now.tv_sec + 600;
+ time_to_wait.tv_nsec = 0;
+
+
+ pthread_mutex_lock (&defrag->fc_mutex);
+ pthread_cond_timedwait (&defrag->fc_wakeup_cond,
+ &defrag->fc_mutex,
+ &time_to_wait);
+
+ pthread_mutex_unlock (&defrag->fc_mutex);
+
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
+ break;
+
+ tmp_size = gf_defrag_total_file_size (defrag->this,
&root_loc);
- if (!g_totalfiles) {
+ gf_log ("dht", GF_LOG_INFO,
+ "tmp data size =%"PRIu64,
+ tmp_size);
+
+ if (!tmp_size) {
gf_msg ("dht", GF_LOG_ERROR, 0, 0, "Failed to get "
- "the total number of files. Unable to estimate "
+ "the total data size. Unable to estimate "
"time to complete rebalance.");
} else {
+ g_totalsize = tmp_size;
gf_msg_debug ("dht", 0,
- "total number of files =%"PRIu64,
- g_totalfiles);
+ "total data size =%"PRIu64,
+ g_totalsize);
}
}
@@ -4201,6 +4278,8 @@ gf_defrag_start_crawl (void *data)
call_frame_t *statfs_frame = NULL;
xlator_t *old_THIS = NULL;
int j = 0;
+ gf_boolean_t fc_thread_started = _gf_false;
+
this = data;
if (!this)
@@ -4346,6 +4425,13 @@ gf_defrag_start_crawl (void *data)
}
}
+ g_totalsize = gf_defrag_total_file_size (this, &loc);
+ if (!g_totalsize) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get "
+ "the total data size. Unable to estimate "
+ "time to complete rebalance.");
+ }
+
g_totalfiles = gf_defrag_total_file_cnt (this, &loc);
if (!g_totalfiles) {
gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get "
@@ -4353,16 +4439,19 @@ gf_defrag_start_crawl (void *data)
"time to complete rebalance.");
}
- ret = gf_thread_create_detached (&filecnt_thread,
- &dht_file_counter_thread,
- (void *)defrag);
+ ret = pthread_create (&filecnt_thread, NULL,
+ &dht_file_counter_thread,
+ (void *)defrag);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, ret, 0, "Failed to "
"create the file counter thread ");
ret = 0;
+ } else {
+ fc_thread_started = _gf_true;
}
+
/* Initialize global entry queue */
defrag->queue = GF_CALLOC (1, sizeof (struct dht_container),
gf_dht_mt_container_t);
@@ -4479,8 +4568,6 @@ out:
pthread_join (tid[i], NULL);
}
-
-
GF_FREE (tid);
if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
@@ -4506,6 +4593,16 @@ out:
defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
}
+ if (fc_thread_started) {
+ pthread_mutex_lock (&defrag->fc_mutex);
+ {
+ pthread_cond_broadcast (&defrag->fc_wakeup_cond);
+ }
+ pthread_mutex_unlock (&defrag->fc_mutex);
+
+ pthread_join (filecnt_thread, NULL);
+ }
+
dht_send_rebalance_event (this, defrag->cmd, defrag->defrag_status);
LOCK (&defrag->lock);
@@ -4595,6 +4692,52 @@ out:
uint64_t
+gf_defrag_get_estimates_based_on_size (dht_conf_t *conf)
+{
+ gf_defrag_info_t *defrag = NULL;
+ double rate_processed = 0;
+ uint64_t total_processed = 0;
+ uint64_t tmp_count = 0;
+ uint64_t time_to_complete = 0;
+ struct timeval now = {0,};
+ double elapsed = 0;
+
+ defrag = conf->defrag;
+
+ if (!g_totalsize)
+ goto out;
+
+ gettimeofday (&now, NULL);
+ elapsed = now.tv_sec - defrag->start_time.tv_sec;
+
+ total_processed = defrag->size_processed;
+
+ /* rate at which files processed */
+ rate_processed = (total_processed)/elapsed;
+
+ tmp_count = g_totalsize;
+
+ if (rate_processed) {
+ time_to_complete = (tmp_count)/rate_processed;
+
+ } else {
+
+ gf_msg (THIS->name, GF_LOG_ERROR, 0, 0,
+ "Unable to calculate estimated time for rebalance");
+ }
+
+ gf_log (THIS->name, GF_LOG_INFO,
+ "TIME: (size) total_processed=%"PRIu64" tmp_cnt = %"PRIu64","
+ "rate_processed=%f, elapsed = %f", total_processed, tmp_count,
+ rate_processed, elapsed);
+
+out:
+ return time_to_complete;
+}
+
+
+
+uint64_t
gf_defrag_get_estimates (dht_conf_t *conf)
{
gf_defrag_info_t *defrag = NULL;
@@ -4605,17 +4748,17 @@ gf_defrag_get_estimates (dht_conf_t *conf)
uint64_t total_processed = 0;
uint64_t tmp_count = 0;
uint64_t time_to_complete = 0;
- struct timeval end = {0,};
+ struct timeval now = {0,};
double elapsed = 0;
defrag = conf->defrag;
if (!g_totalfiles)
- return 0;
+ goto out;
- gettimeofday (&end, NULL);
- elapsed = end.tv_sec - defrag->start_time.tv_sec;
+ gettimeofday (&now, NULL);
+ elapsed = now.tv_sec - defrag->start_time.tv_sec;
/* I tried locking before accessing num_files_lookedup and
* num_dirs_processed but the status function
@@ -4662,7 +4805,7 @@ gf_defrag_get_estimates (dht_conf_t *conf)
}
gf_log (THIS->name, GF_LOG_INFO,
- "TIME: total_processed=%"PRIu64" tmp_cnt = %"PRIu64","
+ "TIME: (count) total_processed=%"PRIu64" tmp_cnt = %"PRIu64","
"rate_lookedup=%f", total_processed, tmp_count,
rate_lookedup);
@@ -4709,19 +4852,32 @@ gf_defrag_status_get (dht_conf_t *conf, dict_t *dict)
elapsed = end.tv_sec - defrag->start_time.tv_sec;
+ /* The rebalance is still in progress */
+
if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER)
- && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) {
+ && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) {
+/*
time_to_complete = gf_defrag_get_estimates (conf);
if (time_to_complete && (time_to_complete > elapsed))
time_left = time_to_complete - elapsed;
gf_log (THIS->name, GF_LOG_INFO,
- "TIME: Estimated total time to complete = %"PRIu64
- " seconds, seconds left = %"PRIu64"",
+ "TIME: Estimated total time to complete based on"
+ " count = %"PRIu64 " seconds, seconds left = %"PRIu64"",
time_to_complete, time_left);
+*/
+ time_to_complete = gf_defrag_get_estimates_based_on_size (conf);
+
+ if (time_to_complete && (time_to_complete > elapsed))
+ time_left = time_to_complete - elapsed;
+
+ gf_log (THIS->name, GF_LOG_INFO,
+ "TIME: Estimated total time to complete (size)= %"PRIu64
+ " seconds, seconds left = %"PRIu64"",
+ time_to_complete, time_left);
}
if (!dict)
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 70ae7da..031cfbe 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -742,6 +742,9 @@ dht_init (xlator_t *this)
pthread_cond_init (&defrag->rebalance_crawler_alarm, 0);
pthread_cond_init (&defrag->df_wakeup_thread, 0);
+ pthread_mutex_init (&defrag->fc_mutex, 0);
+ pthread_cond_init (&defrag->fc_wakeup_cond, 0);
+
defrag->global_error = 0;
}
--
1.8.3.1