21ab4e
From 1532a98db62428bbacae6abf514e0c33427d0e08 Mon Sep 17 00:00:00 2001
21ab4e
From: N Balachandran <nbalacha@redhat.com>
21ab4e
Date: Thu, 22 Jun 2017 15:56:28 +0530
21ab4e
Subject: [PATCH 529/529] cluster/dht: rebalance gets file count periodically
21ab4e
21ab4e
The rebalance used to get the file count in the beginning
21ab4e
and not update it. This caused estimates to fail
21ab4e
if the number changed during the rebalance.
21ab4e
21ab4e
The rebalance now updates the file count periodically.
21ab4e
21ab4e
> BUG: 1464110
21ab4e
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
21ab4e
> Reviewed-on: https://review.gluster.org/17607
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
21ab4e
Change-Id: I1667ee69e8a1d7d6bc6bc2f060fad7f989d19ed4
21ab4e
BUG: 1457731
21ab4e
Signed-off-by: N Balachandran <nbalacha@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/109917
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 xlators/cluster/dht/src/dht-common.h    |   3 +
21ab4e
 xlators/cluster/dht/src/dht-rebalance.c | 114 ++++++++++++++++++++++++--------
21ab4e
 xlators/cluster/dht/src/dht-shared.c    |   1 +
21ab4e
 xlators/cluster/dht/src/tier.c          |   1 +
21ab4e
 4 files changed, 93 insertions(+), 26 deletions(-)
21ab4e
21ab4e
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
21ab4e
index b35ef0c..0309fb5 100644
21ab4e
--- a/xlators/cluster/dht/src/dht-common.h
21ab4e
+++ b/xlators/cluster/dht/src/dht-common.h
21ab4e
@@ -466,6 +466,9 @@ struct gf_defrag_info_ {
21ab4e
 
21ab4e
         /* lock migration flag */
21ab4e
         gf_boolean_t                 lock_migration_enabled;
21ab4e
+
21ab4e
+        /* backpointer to make it easier to write functions for rebalance */
21ab4e
+        xlator_t                     *this;
21ab4e
 };
21ab4e
 
21ab4e
 typedef struct gf_defrag_info_ gf_defrag_info_t;
21ab4e
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
index 87e3dc5..5fa7139 100644
21ab4e
--- a/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
+++ b/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
@@ -24,6 +24,7 @@
21ab4e
 #define MAX_MIGRATE_QUEUE_COUNT         500
21ab4e
 #define MIN_MIGRATE_QUEUE_COUNT         200
21ab4e
 #define MAX_REBAL_TYPE_SIZE             16
21ab4e
+#define FILE_CNT_INTERVAL               600 /* 10 mins */
21ab4e
 
21ab4e
 #ifndef MAX
21ab4e
 #define MAX(a, b) (((a) > (b))?(a):(b))
21ab4e
@@ -4040,6 +4041,9 @@ gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {
21ab4e
 /******************Tier background Fix layout functions END********************/
21ab4e
 
21ab4e
 
21ab4e
+
21ab4e
+
21ab4e
+
21ab4e
 uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
21ab4e
 {
21ab4e
         int ret = -1;
21ab4e
@@ -4057,23 +4061,23 @@ uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc)
21ab4e
 }
21ab4e
 
21ab4e
 
21ab4e
-int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
21ab4e
+uint64_t
21ab4e
+gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
21ab4e
 {
21ab4e
         dht_conf_t    *conf  = NULL;
21ab4e
-        int            ret   = -1;
21ab4e
         int            i     = 0;
21ab4e
         uint64_t       num_files = 0;
21ab4e
-
21ab4e
+        uint64_t       total_entries = 0;
21ab4e
 
21ab4e
         conf = this->private;
21ab4e
         if (!conf) {
21ab4e
-                return ret;
21ab4e
+                return 0;
21ab4e
         }
21ab4e
 
21ab4e
         for (i = 0 ; i < conf->local_subvols_cnt; i++) {
21ab4e
                 num_files = gf_defrag_subvol_file_cnt (conf->local_subvols[i],
21ab4e
                                                        root_loc);
21ab4e
-                g_totalfiles += num_files;
21ab4e
+                total_entries += num_files;
21ab4e
                 gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: %s,"
21ab4e
                         "cnt = %"PRIu64, conf->local_subvols[i]->name,
21ab4e
                         num_files);
21ab4e
@@ -4082,14 +4086,14 @@ int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
21ab4e
         /* FIXFIXFIX: halve the number of files to negate .glusterfs contents
21ab4e
            We need a better way to figure this out */
21ab4e
 
21ab4e
-        g_totalfiles = g_totalfiles/2;
21ab4e
-        if (g_totalfiles > 20000)
21ab4e
-                g_totalfiles += 10000;
21ab4e
+        total_entries = total_entries/2;
21ab4e
+        if (total_entries > 20000)
21ab4e
+                total_entries += 10000;
21ab4e
 
21ab4e
         gf_msg (this->name, GF_LOG_INFO, 0, 0,
21ab4e
-                "Total number of files = %"PRIu64, g_totalfiles);
21ab4e
+                "Total number of files = %"PRIu64, total_entries);
21ab4e
 
21ab4e
-        return 0;
21ab4e
+        return total_entries;
21ab4e
 }
21ab4e
 
21ab4e
 
21ab4e
@@ -4118,6 +4122,39 @@ out:
21ab4e
         return ret;
21ab4e
 }
21ab4e
 
21ab4e
+static void*
21ab4e
+dht_file_counter_thread (void *args)
21ab4e
+{
21ab4e
+        gf_defrag_info_t *defrag = NULL;
21ab4e
+        loc_t root_loc = {0,};
21ab4e
+
21ab4e
+        if (!args)
21ab4e
+                return NULL;
21ab4e
+
21ab4e
+        defrag = (gf_defrag_info_t *) args;
21ab4e
+        dht_build_root_loc (defrag->root_inode, &root_loc);
21ab4e
+
21ab4e
+        while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
21ab4e
+
21ab4e
+                sleep (FILE_CNT_INTERVAL);
21ab4e
+                g_totalfiles = gf_defrag_total_file_cnt (defrag->this,
21ab4e
+                                                         &root_loc);
21ab4e
+
21ab4e
+                if (!g_totalfiles) {
21ab4e
+                        gf_msg ("dht", GF_LOG_ERROR, 0, 0, "Failed to get "
21ab4e
+                                "the total number of files. Unable to estimate "
21ab4e
+                                "time to complete rebalance.");
21ab4e
+                } else {
21ab4e
+                        gf_msg_debug ("dht", 0,
21ab4e
+                                      "total number of files =%"PRIu64,
21ab4e
+                                      g_totalfiles);
21ab4e
+                }
21ab4e
+        }
21ab4e
+
21ab4e
+        return NULL;
21ab4e
+}
21ab4e
+
21ab4e
+
21ab4e
 
21ab4e
 int
21ab4e
 gf_defrag_start_crawl (void *data)
21ab4e
@@ -4140,6 +4177,7 @@ gf_defrag_start_crawl (void *data)
21ab4e
         int                      err                    = 0;
21ab4e
         int                      thread_spawn_count     = 0;
21ab4e
         pthread_t               *tid                    = NULL;
21ab4e
+        pthread_t                filecnt_thread;
21ab4e
         gf_boolean_t             is_tier_detach         = _gf_false;
21ab4e
         call_frame_t            *statfs_frame           = NULL;
21ab4e
         xlator_t                *old_THIS               = NULL;
21ab4e
@@ -4289,13 +4327,23 @@ gf_defrag_start_crawl (void *data)
21ab4e
                         }
21ab4e
                 }
21ab4e
 
21ab4e
-                ret = gf_defrag_total_file_cnt (this, &loc;;
21ab4e
-                if (ret) {
21ab4e
+                g_totalfiles = gf_defrag_total_file_cnt (this, &loc;;
21ab4e
+                if (!g_totalfiles) {
21ab4e
                         gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get "
21ab4e
                                 "the total number of files. Unable to estimate "
21ab4e
                                 "time to complete rebalance.");
21ab4e
                 }
21ab4e
 
21ab4e
+                ret = gf_thread_create_detached (&filecnt_thread,
21ab4e
+                                                 &dht_file_counter_thread,
21ab4e
+                                                 (void *)defrag);
21ab4e
+
21ab4e
+                if (ret) {
21ab4e
+                        gf_msg (this->name, GF_LOG_ERROR, ret, 0, "Failed to "
21ab4e
+                                "create the file counter thread ");
21ab4e
+                        ret = 0;
21ab4e
+                }
21ab4e
+
21ab4e
                 /* Initialize global entry queue */
21ab4e
                 defrag->queue = GF_CALLOC (1, sizeof (struct dht_container),
21ab4e
                                            gf_dht_mt_container_t);
21ab4e
@@ -4412,6 +4460,8 @@ out:
21ab4e
                 pthread_join (tid[i], NULL);
21ab4e
         }
21ab4e
 
21ab4e
+
21ab4e
+
21ab4e
         GF_FREE (tid);
21ab4e
 
21ab4e
         if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
21ab4e
@@ -4529,13 +4579,16 @@ uint64_t
21ab4e
 gf_defrag_get_estimates (dht_conf_t *conf)
21ab4e
 {
21ab4e
         gf_defrag_info_t *defrag = NULL;
21ab4e
-        double rate_lookedup = 0;
21ab4e
-        uint64_t dirs_processed = 0;
21ab4e
-        uint64_t total_processed = 0;
21ab4e
-        uint64_t tmp_count = 0;
21ab4e
-        uint64_t time_to_complete = 0;
21ab4e
-        struct timeval end = {0,};
21ab4e
-        double   elapsed = 0;
21ab4e
+        loc_t             loc = {0,};
21ab4e
+        double            rate_lookedup = 0;
21ab4e
+        uint64_t          dirs_processed = 0;
21ab4e
+        uint64_t          files_processed = 0;
21ab4e
+        uint64_t          total_processed = 0;
21ab4e
+        uint64_t          tmp_count = 0;
21ab4e
+        uint64_t          time_to_complete = 0;
21ab4e
+        struct            timeval end = {0,};
21ab4e
+        double            elapsed = 0;
21ab4e
+
21ab4e
 
21ab4e
         defrag = conf->defrag;
21ab4e
 
21ab4e
@@ -4552,26 +4605,34 @@ gf_defrag_get_estimates (dht_conf_t *conf)
21ab4e
          */
21ab4e
 
21ab4e
         dirs_processed = defrag->num_dirs_processed;
21ab4e
+        files_processed = defrag->num_files_lookedup;
21ab4e
 
21ab4e
-        total_processed = defrag->num_files_lookedup
21ab4e
-                           + dirs_processed;
21ab4e
+        total_processed = files_processed + dirs_processed;
21ab4e
+
21ab4e
+        if (total_processed > g_totalfiles) {
21ab4e
+                /* lookup the number of files again
21ab4e
+                 * The problem here is that not all the newly added files
21ab4e
+                 * might need to be processed. So this need not work
21ab4e
+                 * in some cases
21ab4e
+                 */
21ab4e
+                dht_build_root_loc (defrag->root_inode, &loc;;
21ab4e
+                g_totalfiles = gf_defrag_total_file_cnt (defrag->this, &loc;;
21ab4e
+                if (!g_totalfiles)
21ab4e
+                        goto out;
21ab4e
+        }
21ab4e
 
21ab4e
         /* rate at which files looked up */
21ab4e
         rate_lookedup = (total_processed)/elapsed;
21ab4e
 
21ab4e
-
21ab4e
         /* We initially sum up dirs across all local subvols because we get the
21ab4e
          * file count from the inodes on each subvol.
21ab4e
          * The same directories will be counted for each subvol but
21ab4e
-         * we want that they are only counted once.
21ab4e
+         * we want them to be counted once.
21ab4e
          */
21ab4e
 
21ab4e
         tmp_count = g_totalfiles
21ab4e
                      - (dirs_processed * (conf->local_subvols_cnt - 1));
21ab4e
 
21ab4e
-        if (total_processed > g_totalfiles)
21ab4e
-                g_totalfiles = total_processed + 10000;
21ab4e
-
21ab4e
         if (rate_lookedup) {
21ab4e
                 time_to_complete = (tmp_count)/rate_lookedup;
21ab4e
 
21ab4e
@@ -4586,6 +4647,7 @@ gf_defrag_get_estimates (dht_conf_t *conf)
21ab4e
                 "rate_lookedup=%f", total_processed, tmp_count,
21ab4e
                 rate_lookedup);
21ab4e
 
21ab4e
+out:
21ab4e
         return time_to_complete;
21ab4e
 }
21ab4e
 
21ab4e
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
21ab4e
index 1128cfe..70ae7da 100644
21ab4e
--- a/xlators/cluster/dht/src/dht-shared.c
21ab4e
+++ b/xlators/cluster/dht/src/dht-shared.c
21ab4e
@@ -705,6 +705,7 @@ dht_init (xlator_t *this)
21ab4e
                 defrag->is_exiting = 0;
21ab4e
 
21ab4e
                 conf->defrag = defrag;
21ab4e
+                defrag->this = this;
21ab4e
 
21ab4e
                 ret = dict_get_str (this->options, "node-uuid", &node_uuid);
21ab4e
                 if (ret) {
21ab4e
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
21ab4e
index 1fba88a..0f6651d 100644
21ab4e
--- a/xlators/cluster/dht/src/tier.c
21ab4e
+++ b/xlators/cluster/dht/src/tier.c
21ab4e
@@ -2423,6 +2423,7 @@ tier_init (xlator_t *this)
21ab4e
         defrag->tier_conf.last_promote_qfile_index = 0;
21ab4e
 
21ab4e
         defrag->tier_conf.is_tier = 1;
21ab4e
+        defrag->this = this;
21ab4e
 
21ab4e
         ret = dict_get_int32 (this->options,
21ab4e
                               "tier-max-promote-file-size", &maxsize);
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e