21ab4e
From d3a75f1a21120dd411af3a2f1b33417b6913c29c Mon Sep 17 00:00:00 2001
21ab4e
From: N Balachandran <nbalacha@redhat.com>
21ab4e
Date: Mon, 3 Jul 2017 13:13:35 +0530
21ab4e
Subject: [PATCH 558/566] cluster/dht: Use size to calculate estimates
21ab4e
21ab4e
The earlier approach of using the number of files
21ab4e
to determine when the rebalance would complete did
21ab4e
not work well when file sizes differed widely.
21ab4e
21ab4e
The new approach now gets the total data size and
21ab4e
uses that information to determine how long
21ab4e
the rebalance is expected to take.
21ab4e
21ab4e
> BUG: 1467209
21ab4e
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
21ab4e
> Reviewed-on: https://review.gluster.org/17668
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: MOHIT AGRAWAL <moagrawa@redhat.com>
21ab4e
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
21ab4e
21ab4e
Change-Id: I84e80a0893efab72ff06130e4596fa71c9c8c868
21ab4e
BUG: 1460936
21ab4e
Signed-off-by: N Balachandran <nbalacha@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/111921
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 xlators/cluster/dht/src/dht-common.h    |   5 +
21ab4e
 xlators/cluster/dht/src/dht-rebalance.c | 202 ++++++++++++++++++++++++++++----
21ab4e
 xlators/cluster/dht/src/dht-shared.c    |   3 +
21ab4e
 3 files changed, 187 insertions(+), 23 deletions(-)
21ab4e
21ab4e
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
21ab4e
index 0309fb5..28e9bbf 100644
21ab4e
--- a/xlators/cluster/dht/src/dht-common.h
21ab4e
+++ b/xlators/cluster/dht/src/dht-common.h
21ab4e
@@ -425,6 +425,7 @@ struct gf_defrag_info_ {
21ab4e
         uint64_t                     total_failures;
21ab4e
         uint64_t                     skipped;
21ab4e
         uint64_t                     num_dirs_processed;
21ab4e
+        uint64_t                     size_processed;
21ab4e
         gf_lock_t                    lock;
21ab4e
         int                          cmd;
21ab4e
         pthread_t                    th;
21ab4e
@@ -469,6 +470,10 @@ struct gf_defrag_info_ {
21ab4e
 
21ab4e
         /* backpointer to make it easier to write functions for rebalance */
21ab4e
         xlator_t                     *this;
21ab4e
+
21ab4e
+        pthread_cond_t               fc_wakeup_cond;
21ab4e
+        pthread_mutex_t              fc_mutex;
21ab4e
+
21ab4e
 };
21ab4e
 
21ab4e
 typedef struct gf_defrag_info_ gf_defrag_info_t;
21ab4e
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
index 3777527..8f081c3 100644
21ab4e
--- a/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
+++ b/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
@@ -48,6 +48,7 @@
21ab4e
         }                                                       \
21ab4e
 
21ab4e
 uint64_t g_totalfiles = 0;
21ab4e
+uint64_t g_totalsize = 0;
21ab4e
 
21ab4e
 
21ab4e
 void
21ab4e
@@ -2652,6 +2653,7 @@ gf_defrag_migrate_single_file (void *opaque)
21ab4e
                         LOCK (&defrag->lock);
21ab4e
                         {
21ab4e
                                 defrag->skipped += 1;
21ab4e
+                                defrag->size_processed += iatt.ia_size;
21ab4e
                         }
21ab4e
                         UNLOCK (&defrag->lock);
21ab4e
                 } else if (fop_errno == ENOTSUP) {
21ab4e
@@ -2660,6 +2662,7 @@ gf_defrag_migrate_single_file (void *opaque)
21ab4e
                         LOCK (&defrag->lock);
21ab4e
                         {
21ab4e
                                 defrag->skipped += 1;
21ab4e
+                                defrag->size_processed += iatt.ia_size;
21ab4e
                         }
21ab4e
                         UNLOCK (&defrag->lock);
21ab4e
                 } else if (fop_errno != EEXIST) {
21ab4e
@@ -2670,6 +2673,7 @@ gf_defrag_migrate_single_file (void *opaque)
21ab4e
                         LOCK (&defrag->lock);
21ab4e
                         {
21ab4e
                                 defrag->total_failures += 1;
21ab4e
+                                defrag->size_processed += iatt.ia_size;
21ab4e
                         }
21ab4e
                         UNLOCK (&defrag->lock);
21ab4e
 
21ab4e
@@ -2694,6 +2698,7 @@ gf_defrag_migrate_single_file (void *opaque)
21ab4e
         {
21ab4e
                 defrag->total_files += 1;
21ab4e
                 defrag->total_data += iatt.ia_size;
21ab4e
+                defrag->size_processed += iatt.ia_size;
21ab4e
         }
21ab4e
         UNLOCK (&defrag->lock);
21ab4e
 
21ab4e
@@ -2963,8 +2968,11 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
21ab4e
                     !strcmp (df_entry->d_name, ".."))
21ab4e
                         continue;
21ab4e
 
21ab4e
-                if (IA_ISDIR (df_entry->d_stat.ia_type))
21ab4e
+
21ab4e
+                if (IA_ISDIR (df_entry->d_stat.ia_type)) {
21ab4e
+                        defrag->size_processed += df_entry->d_stat.ia_size;
21ab4e
                         continue;
21ab4e
+                }
21ab4e
 
21ab4e
                 defrag->num_files_lookedup++;
21ab4e
 
21ab4e
@@ -2972,6 +2980,7 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
21ab4e
                     (gf_defrag_pattern_match (defrag, df_entry->d_name,
21ab4e
                                               df_entry->d_stat.ia_size)
21ab4e
                      == _gf_false)) {
21ab4e
+                        defrag->size_processed += df_entry->d_stat.ia_size;
21ab4e
                         continue;
21ab4e
                 }
21ab4e
 
21ab4e
@@ -4041,10 +4050,25 @@ gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
21ab4e
 /******************Tier background Fix layout functions END********************/
21ab4e
 
21ab4e
 
21ab4e
+uint64_t
21ab4e
+gf_defrag_subvol_file_size (xlator_t *this, loc_t *root_loc)
21ab4e
+{
21ab4e
+        int ret = -1;
21ab4e
+        struct statvfs buf = {0,};
21ab4e
 
21ab4e
+        if (!this)
21ab4e
+                return 0;
21ab4e
 
21ab4e
+        ret = syncop_statfs (this, root_loc, &buf, NULL, NULL);
21ab4e
+        if (ret) {
21ab4e
+                /* Aargh! */
21ab4e
+                return 0;
21ab4e
+        }
21ab4e
+        return ((buf.f_blocks - buf.f_bfree) * buf.f_frsize);
21ab4e
+}
21ab4e
 
21ab4e
-uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
21ab4e
+uint64_t
21ab4e
+gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
21ab4e
 {
21ab4e
         int ret = -1;
21ab4e
         struct statvfs buf = {0,};
21ab4e
@@ -4062,6 +4086,35 @@ uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
21ab4e
 
21ab4e
 
21ab4e
 uint64_t
21ab4e
+gf_defrag_total_file_size (xlator_t *this, loc_t *root_loc)
21ab4e
+{
21ab4e
+        dht_conf_t    *conf  = NULL;
21ab4e
+        int            i     = 0;
21ab4e
+        uint64_t       size_files = 0;
21ab4e
+        uint64_t       total_size = 0;
21ab4e
+
21ab4e
+        conf = this->private;
21ab4e
+        if (!conf) {
21ab4e
+                return 0;
21ab4e
+        }
21ab4e
+
21ab4e
+        for (i = 0 ; i < conf->local_subvols_cnt; i++) {
21ab4e
+                size_files = gf_defrag_subvol_file_size (conf->local_subvols[i],
21ab4e
+                                                       root_loc);
21ab4e
+                total_size += size_files;
21ab4e
+                gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: %s,"
21ab4e
+                        "cnt = %"PRIu64, conf->local_subvols[i]->name,
21ab4e
+                        size_files);
21ab4e
+        }
21ab4e
+
21ab4e
+        gf_msg (this->name, GF_LOG_INFO, 0, 0,
21ab4e
+                "Total size files = %"PRIu64, total_size);
21ab4e
+
21ab4e
+        return total_size;
21ab4e
+}
21ab4e
+
21ab4e
+
21ab4e
+uint64_t
21ab4e
 gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
21ab4e
 {
21ab4e
         dht_conf_t    *conf  = NULL;
21ab4e
@@ -4144,8 +4197,12 @@ out:
21ab4e
 static void*
21ab4e
 dht_file_counter_thread (void *args)
21ab4e
 {
21ab4e
-        gf_defrag_info_t *defrag = NULL;
21ab4e
-        loc_t root_loc = {0,};
21ab4e
+        gf_defrag_info_t *defrag      = NULL;
21ab4e
+        loc_t root_loc                = {0,};
21ab4e
+        struct timespec time_to_wait  = {0,};
21ab4e
+        struct timeval now            = {0,};
21ab4e
+        uint64_t tmp_size             = 0;
21ab4e
+
21ab4e
 
21ab4e
         if (!args)
21ab4e
                 return NULL;
21ab4e
@@ -4155,18 +4212,38 @@ dht_file_counter_thread (void *args)
21ab4e
 
21ab4e
         while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
21ab4e
 
21ab4e
-                sleep (FILE_CNT_INTERVAL);
21ab4e
-                g_totalfiles = gf_defrag_total_file_cnt (defrag->this,
21ab4e
+                gettimeofday (&now, NULL);
21ab4e
+                time_to_wait.tv_sec = now.tv_sec + 600;
21ab4e
+                time_to_wait.tv_nsec = 0;
21ab4e
+
21ab4e
+
21ab4e
+                pthread_mutex_lock (&defrag->fc_mutex);
21ab4e
+                pthread_cond_timedwait (&defrag->fc_wakeup_cond,
21ab4e
+                                        &defrag->fc_mutex,
21ab4e
+                                        &time_to_wait);
21ab4e
+
21ab4e
+                pthread_mutex_unlock (&defrag->fc_mutex);
21ab4e
+
21ab4e
+
21ab4e
+                if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
21ab4e
+                        break;
21ab4e
+
21ab4e
+                tmp_size = gf_defrag_total_file_size (defrag->this,
21ab4e
                                                          &root_loc);
21ab4e
 
21ab4e
-                if (!g_totalfiles) {
21ab4e
+                gf_log ("dht", GF_LOG_INFO,
21ab4e
+                        "tmp data size =%"PRIu64,
21ab4e
+                        tmp_size);
21ab4e
+
21ab4e
+                if (!tmp_size) {
21ab4e
                         gf_msg ("dht", GF_LOG_ERROR, 0, 0, "Failed to get "
21ab4e
-                                "the total number of files. Unable to estimate "
21ab4e
+                                "the total data size. Unable to estimate "
21ab4e
                                 "time to complete rebalance.");
21ab4e
                 } else {
21ab4e
+                        g_totalsize = tmp_size;
21ab4e
                         gf_msg_debug ("dht", 0,
21ab4e
-                                      "total number of files =%"PRIu64,
21ab4e
-                                      g_totalfiles);
21ab4e
+                                      "total data size =%"PRIu64,
21ab4e
+                                      g_totalsize);
21ab4e
                 }
21ab4e
         }
21ab4e
 
21ab4e
@@ -4201,6 +4278,8 @@ gf_defrag_start_crawl (void *data)
21ab4e
         call_frame_t            *statfs_frame           = NULL;
21ab4e
         xlator_t                *old_THIS               = NULL;
21ab4e
         int                      j                      = 0;
21ab4e
+        gf_boolean_t             fc_thread_started      = _gf_false;
21ab4e
+
21ab4e
 
21ab4e
         this = data;
21ab4e
         if (!this)
21ab4e
@@ -4346,6 +4425,13 @@ gf_defrag_start_crawl (void *data)
21ab4e
                         }
21ab4e
                 }
21ab4e
 
21ab4e
+                g_totalsize = gf_defrag_total_file_size (this, &loc;;
21ab4e
+                if (!g_totalsize) {
21ab4e
+                        gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get "
21ab4e
+                                "the total data size. Unable to estimate "
21ab4e
+                                "time to complete rebalance.");
21ab4e
+                }
21ab4e
+
21ab4e
                 g_totalfiles = gf_defrag_total_file_cnt (this, &loc;;
21ab4e
                 if (!g_totalfiles) {
21ab4e
                         gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get "
21ab4e
@@ -4353,16 +4439,19 @@ gf_defrag_start_crawl (void *data)
21ab4e
                                 "time to complete rebalance.");
21ab4e
                 }
21ab4e
 
21ab4e
-                ret = gf_thread_create_detached (&filecnt_thread,
21ab4e
-                                                 &dht_file_counter_thread,
21ab4e
-                                                 (void *)defrag);
21ab4e
+                ret = pthread_create (&filecnt_thread, NULL,
21ab4e
+                                      &dht_file_counter_thread,
21ab4e
+                                      (void *)defrag);
21ab4e
 
21ab4e
                 if (ret) {
21ab4e
                         gf_msg (this->name, GF_LOG_ERROR, ret, 0, "Failed to "
21ab4e
                                 "create the file counter thread ");
21ab4e
                         ret = 0;
21ab4e
+                } else {
21ab4e
+                        fc_thread_started = _gf_true;
21ab4e
                 }
21ab4e
 
21ab4e
+
21ab4e
                 /* Initialize global entry queue */
21ab4e
                 defrag->queue = GF_CALLOC (1, sizeof (struct dht_container),
21ab4e
                                            gf_dht_mt_container_t);
21ab4e
@@ -4479,8 +4568,6 @@ out:
21ab4e
                 pthread_join (tid[i], NULL);
21ab4e
         }
21ab4e
 
21ab4e
-
21ab4e
-
21ab4e
         GF_FREE (tid);
21ab4e
 
21ab4e
         if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
21ab4e
@@ -4506,6 +4593,16 @@ out:
21ab4e
                 defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
21ab4e
         }
21ab4e
 
21ab4e
+        if (fc_thread_started) {
21ab4e
+                pthread_mutex_lock (&defrag->fc_mutex);
21ab4e
+                {
21ab4e
+                        pthread_cond_broadcast (&defrag->fc_wakeup_cond);
21ab4e
+                }
21ab4e
+                pthread_mutex_unlock (&defrag->fc_mutex);
21ab4e
+
21ab4e
+                pthread_join (filecnt_thread, NULL);
21ab4e
+        }
21ab4e
+
21ab4e
         dht_send_rebalance_event (this, defrag->cmd, defrag->defrag_status);
21ab4e
 
21ab4e
         LOCK (&defrag->lock);
21ab4e
@@ -4595,6 +4692,52 @@ out:
21ab4e
 
21ab4e
 
21ab4e
 uint64_t
21ab4e
+gf_defrag_get_estimates_based_on_size (dht_conf_t *conf)
21ab4e
+{
21ab4e
+        gf_defrag_info_t *defrag = NULL;
21ab4e
+        double            rate_processed = 0;
21ab4e
+        uint64_t          total_processed = 0;
21ab4e
+        uint64_t          tmp_count = 0;
21ab4e
+        uint64_t          time_to_complete = 0;
21ab4e
+        struct            timeval now = {0,};
21ab4e
+        double            elapsed = 0;
21ab4e
+
21ab4e
+        defrag = conf->defrag;
21ab4e
+
21ab4e
+        if (!g_totalsize)
21ab4e
+                goto out;
21ab4e
+
21ab4e
+        gettimeofday (&now, NULL);
21ab4e
+        elapsed = now.tv_sec - defrag->start_time.tv_sec;
21ab4e
+
21ab4e
+        total_processed = defrag->size_processed;
21ab4e
+
21ab4e
+        /* rate at which files processed */
21ab4e
+        rate_processed = (total_processed)/elapsed;
21ab4e
+
21ab4e
+        tmp_count = g_totalsize;
21ab4e
+
21ab4e
+        if (rate_processed) {
21ab4e
+                time_to_complete = (tmp_count)/rate_processed;
21ab4e
+
21ab4e
+        } else {
21ab4e
+
21ab4e
+                gf_msg (THIS->name, GF_LOG_ERROR, 0, 0,
21ab4e
+                        "Unable to calculate estimated time for rebalance");
21ab4e
+        }
21ab4e
+
21ab4e
+        gf_log (THIS->name, GF_LOG_INFO,
21ab4e
+                "TIME: (size) total_processed=%"PRIu64" tmp_cnt = %"PRIu64","
21ab4e
+                "rate_processed=%f, elapsed = %f", total_processed, tmp_count,
21ab4e
+                rate_processed, elapsed);
21ab4e
+
21ab4e
+out:
21ab4e
+        return time_to_complete;
21ab4e
+}
21ab4e
+
21ab4e
+
21ab4e
+
21ab4e
+uint64_t
21ab4e
 gf_defrag_get_estimates (dht_conf_t *conf)
21ab4e
 {
21ab4e
         gf_defrag_info_t *defrag = NULL;
21ab4e
@@ -4605,17 +4748,17 @@ gf_defrag_get_estimates (dht_conf_t *conf)
21ab4e
         uint64_t          total_processed = 0;
21ab4e
         uint64_t          tmp_count = 0;
21ab4e
         uint64_t          time_to_complete = 0;
21ab4e
-        struct            timeval end = {0,};
21ab4e
+        struct            timeval now = {0,};
21ab4e
         double            elapsed = 0;
21ab4e
 
21ab4e
 
21ab4e
         defrag = conf->defrag;
21ab4e
 
21ab4e
         if (!g_totalfiles)
21ab4e
-                return 0;
21ab4e
+                goto out;
21ab4e
 
21ab4e
-        gettimeofday (&end, NULL);
21ab4e
-        elapsed = end.tv_sec - defrag->start_time.tv_sec;
21ab4e
+        gettimeofday (&now, NULL);
21ab4e
+        elapsed = now.tv_sec - defrag->start_time.tv_sec;
21ab4e
 
21ab4e
         /* I tried locking before accessing num_files_lookedup and
21ab4e
          * num_dirs_processed but the status function
21ab4e
@@ -4662,7 +4805,7 @@ gf_defrag_get_estimates (dht_conf_t *conf)
21ab4e
         }
21ab4e
 
21ab4e
         gf_log (THIS->name, GF_LOG_INFO,
21ab4e
-                "TIME: total_processed=%"PRIu64" tmp_cnt = %"PRIu64","
21ab4e
+                "TIME: (count) total_processed=%"PRIu64" tmp_cnt = %"PRIu64","
21ab4e
                 "rate_lookedup=%f", total_processed, tmp_count,
21ab4e
                 rate_lookedup);
21ab4e
 
21ab4e
@@ -4709,19 +4852,32 @@ gf_defrag_status_get (dht_conf_t *conf, dict_t *dict)
21ab4e
         elapsed = end.tv_sec - defrag->start_time.tv_sec;
21ab4e
 
21ab4e
 
21ab4e
+        /* The rebalance is still in progress */
21ab4e
+
21ab4e
         if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER)
21ab4e
-                && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) {
21ab4e
+            && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) {
21ab4e
 
21ab4e
+/*
21ab4e
                 time_to_complete = gf_defrag_get_estimates (conf);
21ab4e
 
21ab4e
                 if (time_to_complete && (time_to_complete > elapsed))
21ab4e
                         time_left = time_to_complete - elapsed;
21ab4e
 
21ab4e
                 gf_log (THIS->name, GF_LOG_INFO,
21ab4e
-                        "TIME: Estimated total time to complete = %"PRIu64
21ab4e
-                        " seconds, seconds left = %"PRIu64"",
21ab4e
+                        "TIME: Estimated total time to complete based on"
21ab4e
+                        " count = %"PRIu64 " seconds, seconds left = %"PRIu64"",
21ab4e
                         time_to_complete, time_left);
21ab4e
 
21ab4e
+*/
21ab4e
+                time_to_complete = gf_defrag_get_estimates_based_on_size (conf);
21ab4e
+
21ab4e
+                if (time_to_complete && (time_to_complete > elapsed))
21ab4e
+                        time_left = time_to_complete - elapsed;
21ab4e
+
21ab4e
+                gf_log (THIS->name, GF_LOG_INFO,
21ab4e
+                        "TIME: Estimated total time to complete (size)= %"PRIu64
21ab4e
+                        " seconds, seconds left = %"PRIu64"",
21ab4e
+                        time_to_complete, time_left);
21ab4e
         }
21ab4e
 
21ab4e
         if (!dict)
21ab4e
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
21ab4e
index 70ae7da..031cfbe 100644
21ab4e
--- a/xlators/cluster/dht/src/dht-shared.c
21ab4e
+++ b/xlators/cluster/dht/src/dht-shared.c
21ab4e
@@ -742,6 +742,9 @@ dht_init (xlator_t *this)
21ab4e
                 pthread_cond_init  (&defrag->rebalance_crawler_alarm, 0);
21ab4e
                 pthread_cond_init  (&defrag->df_wakeup_thread, 0);
21ab4e
 
21ab4e
+                pthread_mutex_init (&defrag->fc_mutex, 0);
21ab4e
+                pthread_cond_init  (&defrag->fc_wakeup_cond, 0);
21ab4e
+
21ab4e
                 defrag->global_error = 0;
21ab4e
 
21ab4e
         }
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e