21ab4e
From 89d1add3c4f68afae9816c33e26a7bfe41269297 Mon Sep 17 00:00:00 2001
21ab4e
From: N Balachandran <nbalacha@redhat.com>
21ab4e
Date: Mon, 24 Jul 2017 18:27:39 +0530
21ab4e
Subject: [PATCH 574/576] cluster/dht: Fix negative rebalance estimates
21ab4e
21ab4e
The calculation of the rebalance estimates will start
21ab4e
after the rebalance operation has been running for 10
21ab4e
minutes. This patch also changes the cli rebalance status
21ab4e
code to use unsigned variables for the time calculations.
21ab4e
21ab4e
> BUG: 1457985
21ab4e
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
21ab4e
> Reviewed-on: https://review.gluster.org/17863
21ab4e
> Reviewed-by: Amar Tumballi <amarts@redhat.com>
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
Change-Id: Ic76f517c59ad938a407f1cf5e3b9add571690a6c
21ab4e
BUG: 1454602
21ab4e
Signed-off-by: N Balachandran <nbalacha@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/113576
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 cli/src/cli-rpc-ops.c                   | 86 ++++++++++++++++++++++-----------
21ab4e
 xlators/cluster/dht/src/dht-rebalance.c | 28 ++++++++---
21ab4e
 2 files changed, 79 insertions(+), 35 deletions(-)
21ab4e
21ab4e
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
21ab4e
index 4c5c3bb..c46f9d4 100644
21ab4e
--- a/cli/src/cli-rpc-ops.c
21ab4e
+++ b/cli/src/cli-rpc-ops.c
21ab4e
@@ -19,6 +19,10 @@
21ab4e
 
21ab4e
 #define INDENT_MAIN_HEAD "%-25s %s "
21ab4e
 
21ab4e
+/* Do not show estimates if greater than this number */
21ab4e
+#define REBAL_ESTIMATE_SEC_UPPER_LIMIT    (60*24*3600)
21ab4e
+#define REBAL_ESTIMATE_START_TIME         600
21ab4e
+
21ab4e
 #include "cli.h"
21ab4e
 #include "compat-errno.h"
21ab4e
 #include "cli-cmd.h"
21ab4e
@@ -1592,27 +1596,28 @@ int
21ab4e
 gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,
21ab4e
                                gf_boolean_t is_tier)
21ab4e
 {
21ab4e
-        int                ret          = -1;
21ab4e
-        int                count        = 0;
21ab4e
-        int                i            = 1;
21ab4e
-        char               key[256]     = {0,};
21ab4e
-        gf_defrag_status_t status_rcd   = GF_DEFRAG_STATUS_NOT_STARTED;
21ab4e
-        uint64_t           files        = 0;
21ab4e
-        uint64_t           size         = 0;
21ab4e
-        uint64_t           lookup       = 0;
21ab4e
-        char               *node_name   = NULL;
21ab4e
-        uint64_t           failures     = 0;
21ab4e
-        uint64_t           skipped      = 0;
21ab4e
-        double             elapsed      = 0;
21ab4e
-        char               *status_str  = NULL;
21ab4e
-        char               *size_str    = NULL;
21ab4e
-        int                hrs          = 0;
21ab4e
-        int                min          = 0;
21ab4e
-        int                sec          = 0;
21ab4e
-        gf_boolean_t       down         = _gf_false;
21ab4e
-	gf_boolean_t       fix_layout   = _gf_false;
21ab4e
-        uint64_t           max_time     = 0;
21ab4e
-        uint64_t           time_left    = 0;
21ab4e
+        int                ret            = -1;
21ab4e
+        int                count          = 0;
21ab4e
+        int                i              = 1;
21ab4e
+        char               key[256]       = {0,};
21ab4e
+        gf_defrag_status_t status_rcd     = GF_DEFRAG_STATUS_NOT_STARTED;
21ab4e
+        uint64_t           files          = 0;
21ab4e
+        uint64_t           size           = 0;
21ab4e
+        uint64_t           lookup         = 0;
21ab4e
+        char               *node_name     = NULL;
21ab4e
+        uint64_t           failures       = 0;
21ab4e
+        uint64_t           skipped        = 0;
21ab4e
+        double             elapsed        = 0;
21ab4e
+        char               *status_str    = NULL;
21ab4e
+        char               *size_str      = NULL;
21ab4e
+        int32_t            hrs            = 0;
21ab4e
+        uint32_t           min            = 0;
21ab4e
+        uint32_t           sec            = 0;
21ab4e
+        gf_boolean_t       down           = _gf_false;
21ab4e
+        gf_boolean_t       fix_layout     = _gf_false;
21ab4e
+        uint64_t           max_time       = 0;
21ab4e
+        uint64_t           time_left      = 0;
21ab4e
+        gf_boolean_t       show_estimates = _gf_false;
21ab4e
 
21ab4e
 
21ab4e
         ret = dict_get_int32 (dict, "count", &count);
21ab4e
@@ -1691,6 +1696,8 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,
21ab4e
                 if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd)
21ab4e
                         continue;
21ab4e
 
21ab4e
+                if (GF_DEFRAG_STATUS_STARTED == status_rcd)
21ab4e
+                        show_estimates = _gf_true;
21ab4e
 
21ab4e
                 snprintf (key, 256, "node-name-%d", i);
21ab4e
                 ret = dict_get_str (dict, key, &node_name);
21ab4e
@@ -1750,6 +1757,7 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,
21ab4e
                 if (ret)
21ab4e
                         gf_log ("cli", GF_LOG_TRACE,
21ab4e
                                 "failed to get time left");
21ab4e
+
21ab4e
                 if (time_left > max_time)
21ab4e
                         max_time = time_left;
21ab4e
 
21ab4e
@@ -1760,8 +1768,8 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,
21ab4e
                 status_str = cli_vol_task_status_str[status_rcd];
21ab4e
                 size_str = gf_uint64_2human_readable(size);
21ab4e
                 hrs = elapsed / 3600;
21ab4e
-                min = ((int) elapsed % 3600) / 60;
21ab4e
-                sec = ((int) elapsed % 3600) % 60;
21ab4e
+                min = ((uint64_t) elapsed % 3600) / 60;
21ab4e
+                sec = ((uint64_t) elapsed % 3600) % 60;
21ab4e
 
21ab4e
                 if (fix_layout) {
21ab4e
                         cli_out ("%20s %40s %8d:%d:%d", node_name, status_str,
21ab4e
@@ -1788,12 +1796,36 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,
21ab4e
                          " Please check the nodes that are down using \'gluster"
21ab4e
                          " peer status\' and start the glusterd on those nodes,"
21ab4e
                          " else tier detach commit might fail!");
21ab4e
+
21ab4e
+        /* Max time will be non-zero if rebalance is still running */
21ab4e
         if (max_time) {
21ab4e
                 hrs = max_time / 3600;
21ab4e
-                min = ((int) max_time % 3600) / 60;
21ab4e
-                sec = ((int) max_time % 3600) % 60;
21ab4e
-                cli_out ("Estimated time left for rebalance to complete :"
21ab4e
-                         " %8d:%02d:%02d", hrs, min, sec);
21ab4e
+                min = (max_time % 3600) / 60;
21ab4e
+                sec = (max_time % 3600) % 60;
21ab4e
+
21ab4e
+                if (hrs < REBAL_ESTIMATE_SEC_UPPER_LIMIT) {
21ab4e
+                        cli_out ("Estimated time left for rebalance to "
21ab4e
+                                 "complete : %8d:%02d:%02d", hrs, min, sec);
21ab4e
+                } else {
21ab4e
+                        cli_out ("Estimated time left for rebalance to "
21ab4e
+                                 "complete : > 2 months. Please try again "
21ab4e
+                                 "later.");
21ab4e
+                }
21ab4e
+        } else {
21ab4e
+                /* Rebalance will return 0 if it could not calculate the
21ab4e
+                 * estimates or if it is complete.
21ab4e
+                 */
21ab4e
+                if (!show_estimates) {
21ab4e
+                        goto out;
21ab4e
+                }
21ab4e
+                if (elapsed <= REBAL_ESTIMATE_START_TIME) {
21ab4e
+                        cli_out ("The estimated time for rebalance to complete "
21ab4e
+                                 "will be unavailable for the first 10 "
21ab4e
+                                 "minutes.");
21ab4e
+                } else {
21ab4e
+                        cli_out ("Rebalance estimated time unavailable. Please "
21ab4e
+                                 "try again later.");
21ab4e
+                }
21ab4e
         }
21ab4e
 out:
21ab4e
         return ret;
21ab4e
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
index 39005f0..d3fae0e 100644
21ab4e
--- a/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
+++ b/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
@@ -18,13 +18,14 @@
21ab4e
 #include <signal.h>
21ab4e
 #include "events.h"
21ab4e
 
21ab4e
-#define GF_DISK_SECTOR_SIZE             512
21ab4e
+#define GF_DISK_SECTOR_SIZE              512
21ab4e
 #define DHT_REBALANCE_PID               4242 /* Change it if required */
21ab4e
 #define DHT_REBALANCE_BLKSIZE           (1024 * 1024)  /* 1 MB */
21ab4e
-#define MAX_MIGRATE_QUEUE_COUNT         500
21ab4e
-#define MIN_MIGRATE_QUEUE_COUNT         200
21ab4e
-#define MAX_REBAL_TYPE_SIZE             16
21ab4e
-#define FILE_CNT_INTERVAL               600 /* 10 mins */
21ab4e
+#define MAX_MIGRATE_QUEUE_COUNT          500
21ab4e
+#define MIN_MIGRATE_QUEUE_COUNT          200
21ab4e
+#define MAX_REBAL_TYPE_SIZE               16
21ab4e
+#define FILE_CNT_INTERVAL                600 /* 10 mins */
21ab4e
+#define ESTIMATE_START_INTERVAL          600 /* 10 mins */
21ab4e
 
21ab4e
 #ifndef MAX
21ab4e
 #define MAX(a, b) (((a) > (b))?(a):(b))
21ab4e
@@ -3048,7 +3049,6 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
21ab4e
                     !strcmp (df_entry->d_name, ".."))
21ab4e
                         continue;
21ab4e
 
21ab4e
-
21ab4e
                 if (IA_ISDIR (df_entry->d_stat.ia_type)) {
21ab4e
                         defrag->size_processed += df_entry->d_stat.ia_size;
21ab4e
                         continue;
21ab4e
@@ -4790,6 +4790,19 @@ gf_defrag_get_estimates_based_on_size (dht_conf_t *conf)
21ab4e
         gettimeofday (&now, NULL);
21ab4e
         elapsed = now.tv_sec - defrag->start_time.tv_sec;
21ab4e
 
21ab4e
+        /* Don't calculate the estimates for the first 10 minutes.
21ab4e
+         * It is unlikely to be accurate and estimates are not required
21ab4e
+         * if the process finishes in less than 10 mins.
21ab4e
+         */
21ab4e
+
21ab4e
+        if (elapsed < ESTIMATE_START_INTERVAL) {
21ab4e
+                gf_msg (THIS->name, GF_LOG_INFO, 0, 0,
21ab4e
+                        "Rebalance estimates will not be available for the "
21ab4e
+                        "first %d seconds.", ESTIMATE_START_INTERVAL);
21ab4e
+
21ab4e
+                goto out;
21ab4e
+        }
21ab4e
+
21ab4e
         total_processed = defrag->size_processed;
21ab4e
 
21ab4e
         /* rate at which files processed */
21ab4e
@@ -4801,7 +4814,6 @@ gf_defrag_get_estimates_based_on_size (dht_conf_t *conf)
21ab4e
                 time_to_complete = (tmp_count)/rate_processed;
21ab4e
 
21ab4e
         } else {
21ab4e
-
21ab4e
                 gf_msg (THIS->name, GF_LOG_ERROR, 0, 0,
21ab4e
                         "Unable to calculate estimated time for rebalance");
21ab4e
         }
21ab4e
@@ -4947,8 +4959,8 @@ gf_defrag_status_get (dht_conf_t *conf, dict_t *dict)
21ab4e
                         "TIME: Estimated total time to complete based on"
21ab4e
                         " count = %"PRIu64 " seconds, seconds left = %"PRIu64"",
21ab4e
                         time_to_complete, time_left);
21ab4e
-
21ab4e
 */
21ab4e
+
21ab4e
                 time_to_complete = gf_defrag_get_estimates_based_on_size (conf);
21ab4e
 
21ab4e
                 if (time_to_complete && (time_to_complete > elapsed))
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e