Blob Blame History Raw
From 883681bc9742b8b95680efa08878925c66dc034a Mon Sep 17 00:00:00 2001
From: Susant Palai <spalai@redhat.com>
Date: Wed, 22 Mar 2017 17:14:25 +0530
Subject: [PATCH 432/473] cluster/dht: Make rebalance throttle option tuned by
 number

Current rebalance throttle options: lazy/normal/aggressive may not always be
sufficient for the purpose of throttling.  In our recent test, we observed for
certain setups, normal and aggressive modes behaved similarly consuming full
disk bandwidth. So in cases like this admin should be able to  tune it
down(or vice versa) depending on the need.

Along with old throttle configurations, thread counts are tuned based on number.
e.g. gluster v set vol-name cluster-rebal.throttle  5.

Admin can tune up/down between 0 and the number of cores available.

Note: For heterogenous servers, validation will fail on the old server if "number"
is given for throttle configuration.
The message looks something like this:
"volume set: failed: Staging failed on vm2. Error: cluster.rebal-throttle should be {lazy|normal|aggressive}"

Test: Manual test by logging active thread number after reconfiguring throttle option.
testcase: tests/basic/distribute/throttle-rebal.t

> Change-Id: I46e3cde546900307831028b344ecf601fd9b02c3
> BUG: 1438370
> Signed-off-by: Susant Palai <spalai@redhat.com>
> Reviewed-on: https://review.gluster.org/16980
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
> Signed-off-by: Susant Palai <spalai@redhat.com>

Change-Id: I46e3cde546900307831028b344ecf601fd9b02c3
BUG: 1381142
Signed-off-by: Susant Palai <spalai@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/104899
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
---
 tests/basic/distribute/throttle-rebal.t         |  14 +++
 xlators/cluster/dht/src/dht-common.h            |   3 +-
 xlators/cluster/dht/src/dht-rebalance.c         |  25 +++++-
 xlators/cluster/dht/src/dht-shared.c            | 108 +++++++++++++++++++-----
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |  26 +++++-
 5 files changed, 149 insertions(+), 27 deletions(-)

diff --git a/tests/basic/distribute/throttle-rebal.t b/tests/basic/distribute/throttle-rebal.t
index 89495ae..f4823cf 100644
--- a/tests/basic/distribute/throttle-rebal.t
+++ b/tests/basic/distribute/throttle-rebal.t
@@ -16,6 +16,11 @@ function set_throttle {
         $CLI volume set $V0 cluster.rebal-throttle $level 2>&1 |grep -oE 'success|failed'
 }
 
+#Determine number of cores
+cores=$(cat /proc/cpuinfo | grep processor | wc -l)
+if [ "$cores" == "" ]; then
+        echo "Could not get number of cores available"
+fi
 
 THROTTLE_LEVEL="lazy"
 EXPECT "success" set_throttle $THROTTLE_LEVEL
@@ -36,6 +41,15 @@ EXPECT "failed" set_throttle $THROTTLE_LEVEL
 #check if throttle-level is still aggressive
 EXPECT "aggressive" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
 
+EXPECT "success" set_throttle $cores
+
+#Setting thorttle number to be more than the number of cores should fail
+THORTTLE_LEVEL=$((cores+1))
+TEST echo $THORTTLE_LEVEL
+EXPECT "failed" set_throttle $THROTTLE_LEVEL
+EXPECT "$cores" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+
 TEST $CLI volume stop $V0;
 TEST $CLI volume delete $V0;
 
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 16cc056..b4d9e84 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -33,6 +33,7 @@
 #define DHT_LAYOUT_HEAL_DOMAIN          "dht.layout.heal"
 #define TIERING_MIGRATION_KEY           "tiering.migration"
 #define DHT_LAYOUT_HASH_INVALID         1
+#define MAX_REBAL_THREADS               sysconf(_SC_NPROCESSORS_ONLN)
 
 #define DHT_DIR_STAT_BLOCKS          8
 #define DHT_DIR_STAT_SIZE            4096
@@ -534,7 +535,7 @@ struct dht_conf {
         /* Support size-weighted rebalancing (heterogeneous bricks). */
         gf_boolean_t    do_weighting;
         gf_boolean_t    randomize_by_gfid;
-        char           *dthrottle;
+        int             dthrottle;
 
         dht_methods_t   methods;
 
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 9a7d9ab..570843a 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2722,7 +2722,7 @@ gf_defrag_task (void *opaque)
                                 defrag->current_thread_count--;
                                 gf_log ("DHT", GF_LOG_INFO,
                                         "Thread sleeping. "
-                                        "defrag->current_thread_count: %d",
+                                        "current thread count: %d",
                                          defrag->current_thread_count);
 
                                 pthread_cond_wait (
@@ -2730,11 +2730,11 @@ gf_defrag_task (void *opaque)
                                            &defrag->dfq_mutex);
 
                                 defrag->current_thread_count++;
-
                                 gf_log ("DHT", GF_LOG_INFO,
                                         "Thread wokeup. "
-                                        "defrag->current_thread_count: %d",
+                                        "current thread count: %d",
                                          defrag->current_thread_count);
+
                         }
 
                         if (defrag->q_entry_count) {
@@ -2787,6 +2787,14 @@ gf_defrag_task (void *opaque)
                          finished */
 
                                 if (!defrag->crawl_done) {
+
+                                        defrag->current_thread_count--;
+                                        gf_log ("DHT", GF_LOG_INFO, "Thread "
+                                                " sleeping while  waiting for "
+                                                "migration entries. current "
+                                                "thread  count :%d",
+                                                defrag->current_thread_count);
+
                                         pthread_cond_wait (
                                            &defrag->parallel_migration_cond,
                                            &defrag->dfq_mutex);
@@ -2794,10 +2802,19 @@ gf_defrag_task (void *opaque)
 
                                 if (defrag->crawl_done &&
                                                  !defrag->q_entry_count) {
+                                        defrag->current_thread_count++;
+                                        gf_msg_debug ("DHT", 0, "Exiting thread");
+
                                         pthread_cond_broadcast (
                                              &defrag->parallel_migration_cond);
                                         goto unlock;
                                 } else {
+                                        defrag->current_thread_count++;
+                                        gf_msg_debug ("DHT", 0, "Thread woke up"
+                                                      " as found migrating entries. "
+                                                      "current thread count:%d",
+                                                      defrag->current_thread_count);
+
                                         pthread_mutex_unlock
                                                  (&defrag->dfq_mutex);
                                         continue;
@@ -4325,7 +4342,7 @@ gf_defrag_start_crawl (void *data)
 
                 INIT_LIST_HEAD (&(defrag->queue[0].list));
 
-                thread_spawn_count = MAX ((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4);
+                thread_spawn_count = MAX (MAX_REBAL_THREADS, 4);
 
                 gf_msg_debug (this->name, 0, "thread_spawn_count: %d",
                               thread_spawn_count);
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 40f8832..318159c 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -400,7 +400,7 @@ dht_reconfigure (xlator_t *this, dict_t *options)
         char            *temp_str = NULL;
         gf_boolean_t     search_unhashed;
         int              ret = -1;
-        int              throttle_count = 0;
+        int              rebal_thread_count = 0;
 
         GF_VALIDATE_OR_GOTO ("dht", this, out);
         GF_VALIDATE_OR_GOTO ("dht", options, out);
@@ -456,22 +456,54 @@ dht_reconfigure (xlator_t *this, dict_t *options)
                           conf->randomize_by_gfid,
                           options, bool, out);
 
-        GF_OPTION_RECONF ("rebal-throttle", conf->dthrottle, options,
-                          str, out);
-
         GF_OPTION_RECONF ("lock-migration", conf->lock_migration_enabled,
                           options, bool, out);
 
         if (conf->defrag) {
+                pthread_mutex_lock (&conf->defrag->dfq_mutex);
+                {
+                if (dict_get_str (options, "rebal-throttle", &temp_str) == 0) {
+                        if (!strcasecmp (temp_str, "lazy")) {
+                                conf->defrag->recon_thread_count = 1;
+                        } else if (!strcasecmp (temp_str, "normal")) {
+                                conf->defrag->recon_thread_count = 3;
+                        } else if (!strcasecmp (temp_str, "aggressive")) {
+                                conf->defrag->recon_thread_count = MAX ((MAX_REBAL_THREADS - 4), 4);
+                        } else if ((gf_string2int (temp_str, &rebal_thread_count) == 0)) {
+                                if ((rebal_thread_count > 0) && (rebal_thread_count <= MAX_REBAL_THREADS)) {
+                                        gf_msg_debug (this->name, 0, "rebal throttle count reconfigured to %d", rebal_thread_count);
+                                        conf->defrag->recon_thread_count = rebal_thread_count;
+                                } else {
+                                        gf_msg(this->name, GF_LOG_ERROR, 0,
+                                               DHT_MSG_INVALID_OPTION,
+                                               "Invalid option: Reconfigure: "
+                                               "rebal-throttle should be "
+                                               "within range of 0 and maximum number of"
+                                               " cores available");
+                                        ret = -1;
+                                        pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+                                        goto out;
+                                }
+                        } else {
+                                gf_msg(this->name, GF_LOG_ERROR, 0,
+                                       DHT_MSG_INVALID_OPTION,
+                                       "Invalid option: Reconfigure: "
+                                       "rebal-throttle should be {lazy|normal|aggressive}"
+                                       " or a number upto number of cores available,"
+                                       " not (%s), defaulting to (%d)",
+                                       temp_str, conf->dthrottle);
+                                ret = -1;
+                                pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+                                goto out;
+                        }
+                }
+                }
+                pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+        }
+
+        if (conf->defrag) {
                 conf->defrag->lock_migration_enabled =
                                         conf->lock_migration_enabled;
-
-                GF_DECIDE_DEFRAG_THROTTLE_COUNT (throttle_count, conf);
-                gf_msg ("DHT", GF_LOG_INFO, 0,
-                        DHT_MSG_REBAL_THROTTLE_INFO,
-                        "conf->dthrottle: %s, "
-                        "conf->defrag->recon_thread_count: %d",
-                         conf->dthrottle, conf->defrag->recon_thread_count);
         }
 
         if (conf->defrag) {
@@ -608,8 +640,8 @@ dht_init (xlator_t *this)
         gf_defrag_info_t                *defrag         = NULL;
         int                              cmd            = 0;
         char                            *node_uuid      = NULL;
-        int                              throttle_count = 0;
         uint32_t                         commit_hash    = 0;
+        int                              rebal_thread_count = 0;
 
         GF_VALIDATE_OR_GOTO ("dht", this, err);
 
@@ -809,15 +841,49 @@ dht_init (xlator_t *this)
                         conf->randomize_by_gfid, bool, err);
 
         if (defrag) {
-                GF_OPTION_INIT ("rebal-throttle",
-                                 conf->dthrottle, str, err);
-
-                GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf);
-
-                gf_msg_debug ("DHT", 0, "conf->dthrottle: %s, "
-                              "conf->defrag->recon_thread_count: %d",
-                              conf->dthrottle,
-                              conf->defrag->recon_thread_count);
+                GF_OPTION_INIT ("rebal-throttle", temp_str, str, err);
+                if (temp_str) {
+
+                        pthread_mutex_lock (&conf->defrag->dfq_mutex);
+                        {
+                        if (!strcasecmp (temp_str, "lazy")) {
+                                conf->defrag->recon_thread_count = 1;
+                        } else if (!strcasecmp (temp_str, "normal")) {
+                                conf->defrag->recon_thread_count = 2;
+                        } else if (!strcasecmp (temp_str, "aggressive")) {
+                                conf->defrag->recon_thread_count = MAX (MAX_REBAL_THREADS - 4, 4);
+                        } else if ((gf_string2int (temp_str, &rebal_thread_count) == 0)) {
+                                if ((rebal_thread_count > 0) && (rebal_thread_count <= MAX_REBAL_THREADS)) {
+                                        gf_msg (this->name, GF_LOG_INFO, 0, 0,
+                                                "rebal thread count configured to %d",
+                                                 rebal_thread_count);
+                                        conf->defrag->recon_thread_count = rebal_thread_count;
+                                } else {
+                                        gf_msg(this->name, GF_LOG_ERROR, 0,
+                                               DHT_MSG_INVALID_OPTION,
+                                               "Invalid option: Reconfigure: "
+                                               "rebal-throttle should be "
+                                               "within range of 0 and maximum number of"
+                                               " cores available");
+                                        ret = -1;
+                                        pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+                                        goto err;
+                                }
+                        } else {
+                                gf_msg(this->name, GF_LOG_ERROR, 0,
+                                       DHT_MSG_INVALID_OPTION,
+                                       "Invalid option: Reconfigure: "
+                                       "rebal-throttle should be {lazy|normal|aggressive}"
+                                       " or a number upto number of cores available,"
+                                       " not (%s), defaulting to (%d)",
+                                       temp_str, conf->dthrottle);
+                                ret = -1;
+                                pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+                                goto err;
+                        }
+                        }
+                        pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+                }
         }
 
         GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index ce827e5..8d464d0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -542,18 +542,42 @@ validate_defrag_throttle_option (glusterd_volinfo_t *volinfo, dict_t *dict,
         glusterd_conf_t     *priv         = NULL;
         int                  ret          = 0;
         xlator_t            *this         = NULL;
+        int                  thread_count       = 0;
+        long int             cores_available    = 0;
 
         this = THIS;
         GF_ASSERT (this);
 
+        cores_available = sysconf(_SC_NPROCESSORS_ONLN);
+
+        /* Throttle option should be one of lazy|normal|aggressive or a number
+         * configured by user max up to the number of cores in the machine */
+
         if (!strcasecmp (value, "lazy") ||
             !strcasecmp (value, "normal") ||
             !strcasecmp (value, "aggressive")) {
                 ret = 0;
+        } else if ((gf_string2int (value, &thread_count) == 0)) {
+                if ((thread_count > 0) && (thread_count <= cores_available)) {
+                        ret = 0;
+                } else {
+                        ret = -1;
+                        snprintf (errstr, sizeof (errstr), "%s should be within"
+                                  " range of 0 and maximum number of cores "
+                                  "available (cores available - %ld)", key,
+                                  cores_available);
+
+                        gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+                                GD_MSG_INVALID_ENTRY, "%s", errstr);
+
+                        *op_errstr = gf_strdup (errstr);
+                }
         } else {
                 ret = -1;
                 snprintf (errstr, sizeof (errstr), "%s should be "
-                          "{lazy|normal|aggressive}", key);
+                          "{lazy|normal|aggressive} or a number upto number of"
+                          " cores available (cores availble - %ld)", key,
+                          cores_available);
                 gf_msg (this->name, GF_LOG_ERROR, EINVAL,
                         GD_MSG_INVALID_ENTRY, "%s", errstr);
                 *op_errstr = gf_strdup (errstr);
-- 
1.8.3.1