21ab4e
From 883681bc9742b8b95680efa08878925c66dc034a Mon Sep 17 00:00:00 2001
21ab4e
From: Susant Palai <spalai@redhat.com>
21ab4e
Date: Wed, 22 Mar 2017 17:14:25 +0530
21ab4e
Subject: [PATCH 432/473] cluster/dht: Make rebalance throttle option tuned by
21ab4e
 number
21ab4e
21ab4e
Current rebalance throttle options: lazy/normal/aggressive may not always be
21ab4e
sufficient for the purpose of throttling.  In our recent test, we observed for
21ab4e
certain setups, normal and aggressive modes behaved similarly consuming full
21ab4e
disk bandwidth. So in cases like this admin should be able to  tune it
21ab4e
down(or vice versa) depending on the need.
21ab4e
21ab4e
Along with old throttle configurations, thread counts are tuned based on number.
21ab4e
e.g. gluster v set vol-name cluster-rebal.throttle  5.
21ab4e
21ab4e
Admin can tune up/down between 0 and the number of cores available.
21ab4e
21ab4e
Note: For heterogenous servers, validation will fail on the old server if "number"
21ab4e
is given for throttle configuration.
21ab4e
The message looks something like this:
21ab4e
"volume set: failed: Staging failed on vm2. Error: cluster.rebal-throttle should be {lazy|normal|aggressive}"
21ab4e
21ab4e
Test: Manual test by logging active thread number after reconfiguring throttle option.
21ab4e
testcase: tests/basic/distribute/throttle-rebal.t
21ab4e
21ab4e
> Change-Id: I46e3cde546900307831028b344ecf601fd9b02c3
21ab4e
> BUG: 1438370
21ab4e
> Signed-off-by: Susant Palai <spalai@redhat.com>
21ab4e
> Reviewed-on: https://review.gluster.org/16980
21ab4e
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
21ab4e
> Signed-off-by: Susant Palai <spalai@redhat.com>
21ab4e
21ab4e
Change-Id: I46e3cde546900307831028b344ecf601fd9b02c3
21ab4e
BUG: 1381142
21ab4e
Signed-off-by: Susant Palai <spalai@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/104899
21ab4e
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
21ab4e
---
21ab4e
 tests/basic/distribute/throttle-rebal.t         |  14 +++
21ab4e
 xlators/cluster/dht/src/dht-common.h            |   3 +-
21ab4e
 xlators/cluster/dht/src/dht-rebalance.c         |  25 +++++-
21ab4e
 xlators/cluster/dht/src/dht-shared.c            | 108 +++++++++++++++++++-----
21ab4e
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |  26 +++++-
21ab4e
 5 files changed, 149 insertions(+), 27 deletions(-)
21ab4e
21ab4e
diff --git a/tests/basic/distribute/throttle-rebal.t b/tests/basic/distribute/throttle-rebal.t
21ab4e
index 89495ae..f4823cf 100644
21ab4e
--- a/tests/basic/distribute/throttle-rebal.t
21ab4e
+++ b/tests/basic/distribute/throttle-rebal.t
21ab4e
@@ -16,6 +16,11 @@ function set_throttle {
21ab4e
         $CLI volume set $V0 cluster.rebal-throttle $level 2>&1 |grep -oE 'success|failed'
21ab4e
 }
21ab4e
 
21ab4e
+#Determine number of cores
21ab4e
+cores=$(cat /proc/cpuinfo | grep processor | wc -l)
21ab4e
+if [ "$cores" == "" ]; then
21ab4e
+        echo "Could not get number of cores available"
21ab4e
+fi
21ab4e
 
21ab4e
 THROTTLE_LEVEL="lazy"
21ab4e
 EXPECT "success" set_throttle $THROTTLE_LEVEL
21ab4e
@@ -36,6 +41,15 @@ EXPECT "failed" set_throttle $THROTTLE_LEVEL
21ab4e
 #check if throttle-level is still aggressive
21ab4e
 EXPECT "aggressive" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
21ab4e
 
21ab4e
+EXPECT "success" set_throttle $cores
21ab4e
+
21ab4e
+#Setting thorttle number to be more than the number of cores should fail
21ab4e
+THORTTLE_LEVEL=$((cores+1))
21ab4e
+TEST echo $THORTTLE_LEVEL
21ab4e
+EXPECT "failed" set_throttle $THROTTLE_LEVEL
21ab4e
+EXPECT "$cores" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
21ab4e
+
21ab4e
+
21ab4e
 TEST $CLI volume stop $V0;
21ab4e
 TEST $CLI volume delete $V0;
21ab4e
 
21ab4e
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
21ab4e
index 16cc056..b4d9e84 100644
21ab4e
--- a/xlators/cluster/dht/src/dht-common.h
21ab4e
+++ b/xlators/cluster/dht/src/dht-common.h
21ab4e
@@ -33,6 +33,7 @@
21ab4e
 #define DHT_LAYOUT_HEAL_DOMAIN          "dht.layout.heal"
21ab4e
 #define TIERING_MIGRATION_KEY           "tiering.migration"
21ab4e
 #define DHT_LAYOUT_HASH_INVALID         1
21ab4e
+#define MAX_REBAL_THREADS               sysconf(_SC_NPROCESSORS_ONLN)
21ab4e
 
21ab4e
 #define DHT_DIR_STAT_BLOCKS          8
21ab4e
 #define DHT_DIR_STAT_SIZE            4096
21ab4e
@@ -534,7 +535,7 @@ struct dht_conf {
21ab4e
         /* Support size-weighted rebalancing (heterogeneous bricks). */
21ab4e
         gf_boolean_t    do_weighting;
21ab4e
         gf_boolean_t    randomize_by_gfid;
21ab4e
-        char           *dthrottle;
21ab4e
+        int             dthrottle;
21ab4e
 
21ab4e
         dht_methods_t   methods;
21ab4e
 
21ab4e
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
index 9a7d9ab..570843a 100644
21ab4e
--- a/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
+++ b/xlators/cluster/dht/src/dht-rebalance.c
21ab4e
@@ -2722,7 +2722,7 @@ gf_defrag_task (void *opaque)
21ab4e
                                 defrag->current_thread_count--;
21ab4e
                                 gf_log ("DHT", GF_LOG_INFO,
21ab4e
                                         "Thread sleeping. "
21ab4e
-                                        "defrag->current_thread_count: %d",
21ab4e
+                                        "current thread count: %d",
21ab4e
                                          defrag->current_thread_count);
21ab4e
 
21ab4e
                                 pthread_cond_wait (
21ab4e
@@ -2730,11 +2730,11 @@ gf_defrag_task (void *opaque)
21ab4e
                                            &defrag->dfq_mutex);
21ab4e
 
21ab4e
                                 defrag->current_thread_count++;
21ab4e
-
21ab4e
                                 gf_log ("DHT", GF_LOG_INFO,
21ab4e
                                         "Thread wokeup. "
21ab4e
-                                        "defrag->current_thread_count: %d",
21ab4e
+                                        "current thread count: %d",
21ab4e
                                          defrag->current_thread_count);
21ab4e
+
21ab4e
                         }
21ab4e
 
21ab4e
                         if (defrag->q_entry_count) {
21ab4e
@@ -2787,6 +2787,14 @@ gf_defrag_task (void *opaque)
21ab4e
                          finished */
21ab4e
 
21ab4e
                                 if (!defrag->crawl_done) {
21ab4e
+
21ab4e
+                                        defrag->current_thread_count--;
21ab4e
+                                        gf_log ("DHT", GF_LOG_INFO, "Thread "
21ab4e
+                                                " sleeping while  waiting for "
21ab4e
+                                                "migration entries. current "
21ab4e
+                                                "thread  count :%d",
21ab4e
+                                                defrag->current_thread_count);
21ab4e
+
21ab4e
                                         pthread_cond_wait (
21ab4e
                                            &defrag->parallel_migration_cond,
21ab4e
                                            &defrag->dfq_mutex);
21ab4e
@@ -2794,10 +2802,19 @@ gf_defrag_task (void *opaque)
21ab4e
 
21ab4e
                                 if (defrag->crawl_done &&
21ab4e
                                                  !defrag->q_entry_count) {
21ab4e
+                                        defrag->current_thread_count++;
21ab4e
+                                        gf_msg_debug ("DHT", 0, "Exiting thread");
21ab4e
+
21ab4e
                                         pthread_cond_broadcast (
21ab4e
                                              &defrag->parallel_migration_cond);
21ab4e
                                         goto unlock;
21ab4e
                                 } else {
21ab4e
+                                        defrag->current_thread_count++;
21ab4e
+                                        gf_msg_debug ("DHT", 0, "Thread woke up"
21ab4e
+                                                      " as found migrating entries. "
21ab4e
+                                                      "current thread count:%d",
21ab4e
+                                                      defrag->current_thread_count);
21ab4e
+
21ab4e
                                         pthread_mutex_unlock
21ab4e
                                                  (&defrag->dfq_mutex);
21ab4e
                                         continue;
21ab4e
@@ -4325,7 +4342,7 @@ gf_defrag_start_crawl (void *data)
21ab4e
 
21ab4e
                 INIT_LIST_HEAD (&(defrag->queue[0].list));
21ab4e
 
21ab4e
-                thread_spawn_count = MAX ((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4);
21ab4e
+                thread_spawn_count = MAX (MAX_REBAL_THREADS, 4);
21ab4e
 
21ab4e
                 gf_msg_debug (this->name, 0, "thread_spawn_count: %d",
21ab4e
                               thread_spawn_count);
21ab4e
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
21ab4e
index 40f8832..318159c 100644
21ab4e
--- a/xlators/cluster/dht/src/dht-shared.c
21ab4e
+++ b/xlators/cluster/dht/src/dht-shared.c
21ab4e
@@ -400,7 +400,7 @@ dht_reconfigure (xlator_t *this, dict_t *options)
21ab4e
         char            *temp_str = NULL;
21ab4e
         gf_boolean_t     search_unhashed;
21ab4e
         int              ret = -1;
21ab4e
-        int              throttle_count = 0;
21ab4e
+        int              rebal_thread_count = 0;
21ab4e
 
21ab4e
         GF_VALIDATE_OR_GOTO ("dht", this, out);
21ab4e
         GF_VALIDATE_OR_GOTO ("dht", options, out);
21ab4e
@@ -456,22 +456,54 @@ dht_reconfigure (xlator_t *this, dict_t *options)
21ab4e
                           conf->randomize_by_gfid,
21ab4e
                           options, bool, out);
21ab4e
 
21ab4e
-        GF_OPTION_RECONF ("rebal-throttle", conf->dthrottle, options,
21ab4e
-                          str, out);
21ab4e
-
21ab4e
         GF_OPTION_RECONF ("lock-migration", conf->lock_migration_enabled,
21ab4e
                           options, bool, out);
21ab4e
 
21ab4e
         if (conf->defrag) {
21ab4e
+                pthread_mutex_lock (&conf->defrag->dfq_mutex);
21ab4e
+                {
21ab4e
+                if (dict_get_str (options, "rebal-throttle", &temp_str) == 0) {
21ab4e
+                        if (!strcasecmp (temp_str, "lazy")) {
21ab4e
+                                conf->defrag->recon_thread_count = 1;
21ab4e
+                        } else if (!strcasecmp (temp_str, "normal")) {
21ab4e
+                                conf->defrag->recon_thread_count = 3;
21ab4e
+                        } else if (!strcasecmp (temp_str, "aggressive")) {
21ab4e
+                                conf->defrag->recon_thread_count = MAX ((MAX_REBAL_THREADS - 4), 4);
21ab4e
+                        } else if ((gf_string2int (temp_str, &rebal_thread_count) == 0)) {
21ab4e
+                                if ((rebal_thread_count > 0) && (rebal_thread_count <= MAX_REBAL_THREADS)) {
21ab4e
+                                        gf_msg_debug (this->name, 0, "rebal throttle count reconfigured to %d", rebal_thread_count);
21ab4e
+                                        conf->defrag->recon_thread_count = rebal_thread_count;
21ab4e
+                                } else {
21ab4e
+                                        gf_msg(this->name, GF_LOG_ERROR, 0,
21ab4e
+                                               DHT_MSG_INVALID_OPTION,
21ab4e
+                                               "Invalid option: Reconfigure: "
21ab4e
+                                               "rebal-throttle should be "
21ab4e
+                                               "within range of 0 and maximum number of"
21ab4e
+                                               " cores available");
21ab4e
+                                        ret = -1;
21ab4e
+                                        pthread_mutex_unlock (&conf->defrag->dfq_mutex);
21ab4e
+                                        goto out;
21ab4e
+                                }
21ab4e
+                        } else {
21ab4e
+                                gf_msg(this->name, GF_LOG_ERROR, 0,
21ab4e
+                                       DHT_MSG_INVALID_OPTION,
21ab4e
+                                       "Invalid option: Reconfigure: "
21ab4e
+                                       "rebal-throttle should be {lazy|normal|aggressive}"
21ab4e
+                                       " or a number upto number of cores available,"
21ab4e
+                                       " not (%s), defaulting to (%d)",
21ab4e
+                                       temp_str, conf->dthrottle);
21ab4e
+                                ret = -1;
21ab4e
+                                pthread_mutex_unlock (&conf->defrag->dfq_mutex);
21ab4e
+                                goto out;
21ab4e
+                        }
21ab4e
+                }
21ab4e
+                }
21ab4e
+                pthread_mutex_unlock (&conf->defrag->dfq_mutex);
21ab4e
+        }
21ab4e
+
21ab4e
+        if (conf->defrag) {
21ab4e
                 conf->defrag->lock_migration_enabled =
21ab4e
                                         conf->lock_migration_enabled;
21ab4e
-
21ab4e
-                GF_DECIDE_DEFRAG_THROTTLE_COUNT (throttle_count, conf);
21ab4e
-                gf_msg ("DHT", GF_LOG_INFO, 0,
21ab4e
-                        DHT_MSG_REBAL_THROTTLE_INFO,
21ab4e
-                        "conf->dthrottle: %s, "
21ab4e
-                        "conf->defrag->recon_thread_count: %d",
21ab4e
-                         conf->dthrottle, conf->defrag->recon_thread_count);
21ab4e
         }
21ab4e
 
21ab4e
         if (conf->defrag) {
21ab4e
@@ -608,8 +640,8 @@ dht_init (xlator_t *this)
21ab4e
         gf_defrag_info_t                *defrag         = NULL;
21ab4e
         int                              cmd            = 0;
21ab4e
         char                            *node_uuid      = NULL;
21ab4e
-        int                              throttle_count = 0;
21ab4e
         uint32_t                         commit_hash    = 0;
21ab4e
+        int                              rebal_thread_count = 0;
21ab4e
 
21ab4e
         GF_VALIDATE_OR_GOTO ("dht", this, err);
21ab4e
 
21ab4e
@@ -809,15 +841,49 @@ dht_init (xlator_t *this)
21ab4e
                         conf->randomize_by_gfid, bool, err);
21ab4e
 
21ab4e
         if (defrag) {
21ab4e
-                GF_OPTION_INIT ("rebal-throttle",
21ab4e
-                                 conf->dthrottle, str, err);
21ab4e
-
21ab4e
-                GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf);
21ab4e
-
21ab4e
-                gf_msg_debug ("DHT", 0, "conf->dthrottle: %s, "
21ab4e
-                              "conf->defrag->recon_thread_count: %d",
21ab4e
-                              conf->dthrottle,
21ab4e
-                              conf->defrag->recon_thread_count);
21ab4e
+                GF_OPTION_INIT ("rebal-throttle", temp_str, str, err);
21ab4e
+                if (temp_str) {
21ab4e
+
21ab4e
+                        pthread_mutex_lock (&conf->defrag->dfq_mutex);
21ab4e
+                        {
21ab4e
+                        if (!strcasecmp (temp_str, "lazy")) {
21ab4e
+                                conf->defrag->recon_thread_count = 1;
21ab4e
+                        } else if (!strcasecmp (temp_str, "normal")) {
21ab4e
+                                conf->defrag->recon_thread_count = 2;
21ab4e
+                        } else if (!strcasecmp (temp_str, "aggressive")) {
21ab4e
+                                conf->defrag->recon_thread_count = MAX (MAX_REBAL_THREADS - 4, 4);
21ab4e
+                        } else if ((gf_string2int (temp_str, &rebal_thread_count) == 0)) {
21ab4e
+                                if ((rebal_thread_count > 0) && (rebal_thread_count <= MAX_REBAL_THREADS)) {
21ab4e
+                                        gf_msg (this->name, GF_LOG_INFO, 0, 0,
21ab4e
+                                                "rebal thread count configured to %d",
21ab4e
+                                                 rebal_thread_count);
21ab4e
+                                        conf->defrag->recon_thread_count = rebal_thread_count;
21ab4e
+                                } else {
21ab4e
+                                        gf_msg(this->name, GF_LOG_ERROR, 0,
21ab4e
+                                               DHT_MSG_INVALID_OPTION,
21ab4e
+                                               "Invalid option: Reconfigure: "
21ab4e
+                                               "rebal-throttle should be "
21ab4e
+                                               "within range of 0 and maximum number of"
21ab4e
+                                               " cores available");
21ab4e
+                                        ret = -1;
21ab4e
+                                        pthread_mutex_unlock (&conf->defrag->dfq_mutex);
21ab4e
+                                        goto err;
21ab4e
+                                }
21ab4e
+                        } else {
21ab4e
+                                gf_msg(this->name, GF_LOG_ERROR, 0,
21ab4e
+                                       DHT_MSG_INVALID_OPTION,
21ab4e
+                                       "Invalid option: Reconfigure: "
21ab4e
+                                       "rebal-throttle should be {lazy|normal|aggressive}"
21ab4e
+                                       " or a number upto number of cores available,"
21ab4e
+                                       " not (%s), defaulting to (%d)",
21ab4e
+                                       temp_str, conf->dthrottle);
21ab4e
+                                ret = -1;
21ab4e
+                                pthread_mutex_unlock (&conf->defrag->dfq_mutex);
21ab4e
+                                goto err;
21ab4e
+                        }
21ab4e
+                        }
21ab4e
+                        pthread_mutex_unlock (&conf->defrag->dfq_mutex);
21ab4e
+                }
21ab4e
         }
21ab4e
 
21ab4e
         GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err);
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
21ab4e
index ce827e5..8d464d0 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
21ab4e
@@ -542,18 +542,42 @@ validate_defrag_throttle_option (glusterd_volinfo_t *volinfo, dict_t *dict,
21ab4e
         glusterd_conf_t     *priv         = NULL;
21ab4e
         int                  ret          = 0;
21ab4e
         xlator_t            *this         = NULL;
21ab4e
+        int                  thread_count       = 0;
21ab4e
+        long int             cores_available    = 0;
21ab4e
 
21ab4e
         this = THIS;
21ab4e
         GF_ASSERT (this);
21ab4e
 
21ab4e
+        cores_available = sysconf(_SC_NPROCESSORS_ONLN);
21ab4e
+
21ab4e
+        /* Throttle option should be one of lazy|normal|aggressive or a number
21ab4e
+         * configured by user max up to the number of cores in the machine */
21ab4e
+
21ab4e
         if (!strcasecmp (value, "lazy") ||
21ab4e
             !strcasecmp (value, "normal") ||
21ab4e
             !strcasecmp (value, "aggressive")) {
21ab4e
                 ret = 0;
21ab4e
+        } else if ((gf_string2int (value, &thread_count) == 0)) {
21ab4e
+                if ((thread_count > 0) && (thread_count <= cores_available)) {
21ab4e
+                        ret = 0;
21ab4e
+                } else {
21ab4e
+                        ret = -1;
21ab4e
+                        snprintf (errstr, sizeof (errstr), "%s should be within"
21ab4e
+                                  " range of 0 and maximum number of cores "
21ab4e
+                                  "available (cores available - %ld)", key,
21ab4e
+                                  cores_available);
21ab4e
+
21ab4e
+                        gf_msg (this->name, GF_LOG_ERROR, EINVAL,
21ab4e
+                                GD_MSG_INVALID_ENTRY, "%s", errstr);
21ab4e
+
21ab4e
+                        *op_errstr = gf_strdup (errstr);
21ab4e
+                }
21ab4e
         } else {
21ab4e
                 ret = -1;
21ab4e
                 snprintf (errstr, sizeof (errstr), "%s should be "
21ab4e
-                          "{lazy|normal|aggressive}", key);
21ab4e
+                          "{lazy|normal|aggressive} or a number upto number of"
21ab4e
+                          " cores available (cores availble - %ld)", key,
21ab4e
+                          cores_available);
21ab4e
                 gf_msg (this->name, GF_LOG_ERROR, EINVAL,
21ab4e
                         GD_MSG_INVALID_ENTRY, "%s", errstr);
21ab4e
                 *op_errstr = gf_strdup (errstr);
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e