Blob Blame History Raw
From 1532a98db62428bbacae6abf514e0c33427d0e08 Mon Sep 17 00:00:00 2001
From: N Balachandran <nbalacha@redhat.com>
Date: Thu, 22 Jun 2017 15:56:28 +0530
Subject: [PATCH 529/529] cluster/dht: rebalance gets file count periodically

The rebalance used to get the file count in the beginning
and not update it. This caused estimates to fail
if the number changed during the rebalance.

The rebalance now updates the file count periodically.

> BUG: 1464110
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
> Reviewed-on: https://review.gluster.org/17607
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Change-Id: I1667ee69e8a1d7d6bc6bc2f060fad7f989d19ed4
BUG: 1457731
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/109917
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 xlators/cluster/dht/src/dht-common.h    |   3 +
 xlators/cluster/dht/src/dht-rebalance.c | 114 ++++++++++++++++++++++++--------
 xlators/cluster/dht/src/dht-shared.c    |   1 +
 xlators/cluster/dht/src/tier.c          |   1 +
 4 files changed, 93 insertions(+), 26 deletions(-)

diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index b35ef0c..0309fb5 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -466,6 +466,9 @@ struct gf_defrag_info_ {
 
         /* lock migration flag */
         gf_boolean_t                 lock_migration_enabled;
+
+        /* backpointer to make it easier to write functions for rebalance */
+        xlator_t                     *this;
 };
 
 typedef struct gf_defrag_info_ gf_defrag_info_t;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 87e3dc5..5fa7139 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -24,6 +24,7 @@
 #define MAX_MIGRATE_QUEUE_COUNT         500
 #define MIN_MIGRATE_QUEUE_COUNT         200
 #define MAX_REBAL_TYPE_SIZE             16
+#define FILE_CNT_INTERVAL               600 /* 10 mins */
 
 #ifndef MAX
 #define MAX(a, b) (((a) > (b))?(a):(b))
@@ -4040,6 +4041,9 @@ gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
 /******************Tier background Fix layout functions END********************/
 
 
+
+
+
 uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
 {
         int ret = -1;
@@ -4057,23 +4061,23 @@ uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
 }
 
 
-int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
+uint64_t
+gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
 {
         dht_conf_t    *conf  = NULL;
-        int            ret   = -1;
         int            i     = 0;
         uint64_t       num_files = 0;
-
+        uint64_t       total_entries = 0;
 
         conf = this->private;
         if (!conf) {
-                return ret;
+                return 0;
         }
 
         for (i = 0 ; i < conf->local_subvols_cnt; i++) {
                 num_files = gf_defrag_subvol_file_cnt (conf->local_subvols[i],
                                                        root_loc);
-                g_totalfiles += num_files;
+                total_entries += num_files;
                 gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: %s,"
                         "cnt = %"PRIu64, conf->local_subvols[i]->name,
                         num_files);
@@ -4082,14 +4086,14 @@ int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
         /* FIXFIXFIX: halve the number of files to negate .glusterfs contents
            We need a better way to figure this out */
 
-        g_totalfiles = g_totalfiles/2;
-        if (g_totalfiles > 20000)
-                g_totalfiles += 10000;
+        total_entries = total_entries/2;
+        if (total_entries > 20000)
+                total_entries += 10000;
 
         gf_msg (this->name, GF_LOG_INFO, 0, 0,
-                "Total number of files = %"PRIu64, g_totalfiles);
+                "Total number of files = %"PRIu64, total_entries);
 
-        return 0;
+        return total_entries;
 }
 
 
@@ -4118,6 +4122,39 @@ out:
         return ret;
 }
 
+static void*
+dht_file_counter_thread (void *args)
+{
+        gf_defrag_info_t *defrag = NULL;
+        loc_t root_loc = {0,};
+
+        if (!args)
+                return NULL;
+
+        defrag = (gf_defrag_info_t *) args;
+        dht_build_root_loc (defrag->root_inode, &root_loc);
+
+        while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
+
+                sleep (FILE_CNT_INTERVAL);
+                g_totalfiles = gf_defrag_total_file_cnt (defrag->this,
+                                                         &root_loc);
+
+                if (!g_totalfiles) {
+                        gf_msg ("dht", GF_LOG_ERROR, 0, 0, "Failed to get "
+                                "the total number of files. Unable to estimate "
+                                "time to complete rebalance.");
+                } else {
+                        gf_msg_debug ("dht", 0,
+                                      "total number of files =%"PRIu64,
+                                      g_totalfiles);
+                }
+        }
+
+        return NULL;
+}
+
+
 
 int
 gf_defrag_start_crawl (void *data)
@@ -4140,6 +4177,7 @@ gf_defrag_start_crawl (void *data)
         int                      err                    = 0;
         int                      thread_spawn_count     = 0;
         pthread_t               *tid                    = NULL;
+        pthread_t                filecnt_thread;
         gf_boolean_t             is_tier_detach         = _gf_false;
         call_frame_t            *statfs_frame           = NULL;
         xlator_t                *old_THIS               = NULL;
@@ -4289,13 +4327,23 @@ gf_defrag_start_crawl (void *data)
                         }
                 }
 
-                ret = gf_defrag_total_file_cnt (this, &loc);
-                if (ret) {
+                g_totalfiles = gf_defrag_total_file_cnt (this, &loc);
+                if (!g_totalfiles) {
                         gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get "
                                 "the total number of files. Unable to estimate "
                                 "time to complete rebalance.");
                 }
 
+                ret = gf_thread_create_detached (&filecnt_thread,
+                                                 &dht_file_counter_thread,
+                                                 (void *)defrag);
+
+                if (ret) {
+                        gf_msg (this->name, GF_LOG_ERROR, ret, 0, "Failed to "
+                                "create the file counter thread ");
+                        ret = 0;
+                }
+
                 /* Initialize global entry queue */
                 defrag->queue = GF_CALLOC (1, sizeof (struct dht_container),
                                            gf_dht_mt_container_t);
@@ -4412,6 +4460,8 @@ out:
                 pthread_join (tid[i], NULL);
         }
 
+
+
         GF_FREE (tid);
 
         if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
@@ -4529,13 +4579,16 @@ uint64_t
 gf_defrag_get_estimates (dht_conf_t *conf)
 {
         gf_defrag_info_t *defrag = NULL;
-        double rate_lookedup = 0;
-        uint64_t dirs_processed = 0;
-        uint64_t total_processed = 0;
-        uint64_t tmp_count = 0;
-        uint64_t time_to_complete = 0;
-        struct timeval end = {0,};
-        double   elapsed = 0;
+        loc_t             loc = {0,};
+        double            rate_lookedup = 0;
+        uint64_t          dirs_processed = 0;
+        uint64_t          files_processed = 0;
+        uint64_t          total_processed = 0;
+        uint64_t          tmp_count = 0;
+        uint64_t          time_to_complete = 0;
+        struct            timeval end = {0,};
+        double            elapsed = 0;
+
 
         defrag = conf->defrag;
 
@@ -4552,26 +4605,34 @@ gf_defrag_get_estimates (dht_conf_t *conf)
          */
 
         dirs_processed = defrag->num_dirs_processed;
+        files_processed = defrag->num_files_lookedup;
 
-        total_processed = defrag->num_files_lookedup
-                           + dirs_processed;
+        total_processed = files_processed + dirs_processed;
+
+        if (total_processed > g_totalfiles) {
+                /* lookup the number of files again
+                 * The problem here is that not all the newly added files
+                 * might need to be processed. So this need not work
+                 * in some cases
+                 */
+                dht_build_root_loc (defrag->root_inode, &loc);
+                g_totalfiles = gf_defrag_total_file_cnt (defrag->this, &loc);
+                if (!g_totalfiles)
+                        goto out;
+        }
 
         /* rate at which files looked up */
         rate_lookedup = (total_processed)/elapsed;
 
-
         /* We initially sum up dirs across all local subvols because we get the
          * file count from the inodes on each subvol.
          * The same directories will be counted for each subvol but
-         * we want that they are only counted once.
+         * we want them to be counted once.
          */
 
         tmp_count = g_totalfiles
                      - (dirs_processed * (conf->local_subvols_cnt - 1));
 
-        if (total_processed > g_totalfiles)
-                g_totalfiles = total_processed + 10000;
-
         if (rate_lookedup) {
                 time_to_complete = (tmp_count)/rate_lookedup;
 
@@ -4586,6 +4647,7 @@ gf_defrag_get_estimates (dht_conf_t *conf)
                 "rate_lookedup=%f", total_processed, tmp_count,
                 rate_lookedup);
 
+out:
         return time_to_complete;
 }
 
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 1128cfe..70ae7da 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -705,6 +705,7 @@ dht_init (xlator_t *this)
                 defrag->is_exiting = 0;
 
                 conf->defrag = defrag;
+                defrag->this = this;
 
                 ret = dict_get_str (this->options, "node-uuid", &node_uuid);
                 if (ret) {
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 1fba88a..0f6651d 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -2423,6 +2423,7 @@ tier_init (xlator_t *this)
         defrag->tier_conf.last_promote_qfile_index = 0;
 
         defrag->tier_conf.is_tier = 1;
+        defrag->this = this;
 
         ret = dict_get_int32 (this->options,
                               "tier-max-promote-file-size", &maxsize);
-- 
1.8.3.1