7f4c2a
From 5d67d0a79a0986b3b547a80d6ebc3db5d70a7b77 Mon Sep 17 00:00:00 2001
7f4c2a
From: Dan Lambright <dlambrig@redhat.com>
7f4c2a
Date: Thu, 11 Jun 2015 14:45:52 -0400
7f4c2a
Subject: [PATCH 56/57] tier/dht: Fixing non atomic promotion/demotion w.r.t to frequency period
7f4c2a
7f4c2a
This is a backport of 11110
7f4c2a
7f4c2a
> This fixes the ping-pong issue i.e files getting demoted immediately
7f4c2a
> after promition, caused by off-sync promotion/demotion processes.
7f4c2a
> The solution is do promotion/demotion refering to the system time.
7f4c2a
> To have the fix working all the file serving nodes should have
7f4c2a
> thier system time synchronized with each other either manually or
7f4c2a
> using a NTP Server.
7f4c2a
7f4c2a
> NOTE: The ping-pong issue can re-appear even with this fix, if the admin
7f4c2a
> have different promotion freq period and demotion freq period, but this
7f4c2a
> would be under the control of the admin.
7f4c2a
7f4c2a
> Change-Id: I1b33a5881d0cac143662ddb48e5b7b653aeb1271
7f4c2a
> BUG: 1218717
7f4c2a
> Signed-off-by: Joseph Fernandes <josferna@redhat.com>
7f4c2a
> Reviewed-on: http://review.gluster.org/11110
7f4c2a
> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
7f4c2a
7f4c2a
Change-Id: I27aa58017b75e5ba8977967176802bfb52ead656
7f4c2a
BUG: 1229268
7f4c2a
Signed-off-by: Dan Lambright <dlambrig@redhat.com>
7f4c2a
Reviewed-on: https://code.engineering.redhat.com/gerrit/50581
7f4c2a
Reviewed-by: Joseph Fernandes <josferna@redhat.com>
7f4c2a
Tested-by: Joseph Fernandes <josferna@redhat.com>
7f4c2a
Reviewed-by: Shyam Ranganathan <srangana@redhat.com>
7f4c2a
---
7f4c2a
 xlators/cluster/dht/src/tier.c |  100 +++++++++++++++++++++++++--------------
7f4c2a
 1 files changed, 64 insertions(+), 36 deletions(-)
7f4c2a
7f4c2a
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
7f4c2a
index cef4f5c..7b4890c 100644
7f4c2a
--- a/xlators/cluster/dht/src/tier.c
7f4c2a
+++ b/xlators/cluster/dht/src/tier.c
7f4c2a
@@ -734,7 +734,7 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist)
7f4c2a
                         if (!db_path) {
7f4c2a
                                 gf_msg ("tier", GF_LOG_ERROR, 0,
7f4c2a
                                         DHT_MSG_LOG_TIER_STATUS,
7f4c2a
-                                        "Failed to allocate memory for bricklist");
7f4c2a
+                                        "Faile. to allocate memory for bricklist");
7f4c2a
                                 goto out;
7f4c2a
                         }
7f4c2a
 
7f4c2a
@@ -763,9 +763,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
7f4c2a
         dict_t       *bricklist_cold = NULL;
7f4c2a
         dict_t       *bricklist_hot = NULL;
7f4c2a
         dht_conf_t   *conf     = NULL;
7f4c2a
-        int tick = 0;
7f4c2a
-        int next_demote = 0;
7f4c2a
-        int next_promote = 0;
7f4c2a
+        gfdb_time_t  current_time;
7f4c2a
         int freq_promote = 0;
7f4c2a
         int freq_demote = 0;
7f4c2a
         promotion_args_t promotion_args = { 0 };
7f4c2a
@@ -775,6 +773,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
7f4c2a
         int ret = 0;
7f4c2a
         pthread_t promote_thread;
7f4c2a
         pthread_t demote_thread;
7f4c2a
+        gf_boolean_t  is_promotion_triggered = _gf_false;
7f4c2a
+        gf_boolean_t  is_demotion_triggered = _gf_false;
7f4c2a
 
7f4c2a
         conf   = this->private;
7f4c2a
 
7f4c2a
@@ -789,16 +789,9 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
7f4c2a
         tier_get_bricklist (conf->subvolumes[0], bricklist_cold);
7f4c2a
         tier_get_bricklist (conf->subvolumes[1], bricklist_hot);
7f4c2a
 
7f4c2a
-        freq_promote = defrag->tier_promote_frequency;
7f4c2a
-        freq_demote  = defrag->tier_demote_frequency;
7f4c2a
-
7f4c2a
-        next_promote = defrag->tier_promote_frequency % TIMER_SECS;
7f4c2a
-        next_demote  = defrag->tier_demote_frequency % TIMER_SECS;
7f4c2a
-
7f4c2a
-
7f4c2a
         gf_msg (this->name, GF_LOG_INFO, 0,
7f4c2a
-                DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d demote %d",
7f4c2a
-                next_promote, next_demote);
7f4c2a
+                DHT_MSG_LOG_TIER_STATUS, "Begin run tier promote %d"
7f4c2a
+                        " demote %d", freq_promote, freq_demote);
7f4c2a
 
7f4c2a
         defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
7f4c2a
 
7f4c2a
@@ -806,9 +799,6 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
7f4c2a
 
7f4c2a
                 sleep(1);
7f4c2a
 
7f4c2a
-                ret_promotion = -1;
7f4c2a
-                ret_demotion = -1;
7f4c2a
-
7f4c2a
                 if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
7f4c2a
                         ret = 1;
7f4c2a
                         gf_msg (this->name, GF_LOG_ERROR, 0,
7f4c2a
@@ -820,7 +810,8 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
7f4c2a
 
7f4c2a
                 if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
7f4c2a
                         ret = 0;
7f4c2a
-                        defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
7f4c2a
+                        defrag->defrag_status =
7f4c2a
+                                        GF_DEFRAG_STATUS_COMPLETE;
7f4c2a
                         gf_msg (this->name, GF_LOG_DEBUG, 0,
7f4c2a
                                 DHT_MSG_LOG_TIER_ERROR,
7f4c2a
                                 "defrag->defrag_cmd == "
7f4c2a
@@ -828,43 +819,75 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
7f4c2a
                         goto out;
7f4c2a
                 }
7f4c2a
 
7f4c2a
-                tick = (tick + 1) % TIMER_SECS;
7f4c2a
-                if ((next_demote != tick) && (next_promote != tick))
7f4c2a
+                freq_promote = defrag->tier_promote_frequency;
7f4c2a
+                freq_demote  = defrag->tier_demote_frequency;
7f4c2a
+
7f4c2a
+                /* To have proper synchronization amongst all
7f4c2a
+                 * brick holding nodes, so that promotion and demotions
7f4c2a
+                 * start atomicly w.r.t promotion/demotion frequency
7f4c2a
+                 * period, all nodes should have thier system time
7f4c2a
+                 * in-sync with each other either manually set or
7f4c2a
+                 * using a NTP server*/
7f4c2a
+                ret = gettimeofday (&current_time, NULL);
7f4c2a
+                if (ret == -1) {
7f4c2a
+                        gf_log (this->name, GF_LOG_ERROR,
7f4c2a
+                                "Failed to get current time");
7f4c2a
+                        goto out;
7f4c2a
+                }
7f4c2a
+
7f4c2a
+                is_demotion_triggered = ((current_time.tv_sec %
7f4c2a
+                                        freq_demote) == 0) ? _gf_true :
7f4c2a
+                                        _gf_false;
7f4c2a
+                is_promotion_triggered = ((current_time.tv_sec %
7f4c2a
+                                        freq_promote) == 0) ? _gf_true :
7f4c2a
+                                        _gf_false;
7f4c2a
+
7f4c2a
+                /* If no promotion and no demotion is
7f4c2a
+                 * scheduled/triggered skip a iteration */
7f4c2a
+                if (!is_promotion_triggered && !is_demotion_triggered)
7f4c2a
                         continue;
7f4c2a
 
7f4c2a
-                if (next_demote >= tick) {
7f4c2a
+                ret_promotion = -1;
7f4c2a
+                ret_demotion = -1;
7f4c2a
+
7f4c2a
+                if (is_demotion_triggered) {
7f4c2a
+
7f4c2a
                         demotion_args.this = this;
7f4c2a
                         demotion_args.brick_list = bricklist_hot;
7f4c2a
                         demotion_args.defrag = defrag;
7f4c2a
                         demotion_args.freq_time = freq_demote;
7f4c2a
-                        ret_demotion = pthread_create (&demote_thread, NULL,
7f4c2a
-                                        &tier_demote, &demotion_args);
7f4c2a
+                        ret_demotion = pthread_create (&demote_thread,
7f4c2a
+                                                NULL, &tier_demote,
7f4c2a
+                                                &demotion_args);
7f4c2a
+
7f4c2a
                         if (ret_demotion) {
7f4c2a
                                 gf_msg (this->name, GF_LOG_ERROR, 0,
7f4c2a
                                         DHT_MSG_LOG_TIER_ERROR,
7f4c2a
-                                        "Failed starting Demotion thread!");
7f4c2a
+                                        "Failed starting Demotion "
7f4c2a
+                                        "thread!");
7f4c2a
+
7f4c2a
                         }
7f4c2a
-                        freq_demote = defrag->tier_demote_frequency;
7f4c2a
-                        next_demote = (tick + freq_demote) % TIMER_SECS;
7f4c2a
                 }
7f4c2a
 
7f4c2a
-                if (next_promote >= tick) {
7f4c2a
+                if (is_promotion_triggered) {
7f4c2a
                         promotion_args.this = this;
7f4c2a
                         promotion_args.brick_list = bricklist_cold;
7f4c2a
                         promotion_args.defrag = defrag;
7f4c2a
                         promotion_args.freq_time = freq_promote;
7f4c2a
-                        ret_promotion = pthread_create (&promote_thread, NULL,
7f4c2a
-                                                &tier_promote, &promotion_args);
7f4c2a
+                        ret_promotion = pthread_create (&promote_thread,
7f4c2a
+                                                NULL, &tier_promote,
7f4c2a
+                                                &promotion_args);
7f4c2a
+
7f4c2a
                         if (ret_promotion) {
7f4c2a
                                 gf_msg (this->name, GF_LOG_ERROR, 0,
7f4c2a
                                         DHT_MSG_LOG_TIER_ERROR,
7f4c2a
-                                        "Failed starting Promotion thread!");
7f4c2a
+                                        "Failed starting Promotion "
7f4c2a
+                                        "thread!");
7f4c2a
+
7f4c2a
                         }
7f4c2a
-                        freq_promote = defrag->tier_promote_frequency;
7f4c2a
-                        next_promote = (tick + freq_promote) % TIMER_SECS;
7f4c2a
                 }
7f4c2a
 
7f4c2a
-                if (ret_demotion == 0) {
7f4c2a
+                if (is_promotion_triggered && (ret_demotion == 0)) {
7f4c2a
                         pthread_join (demote_thread, NULL);
7f4c2a
                         if (demotion_args.return_value) {
7f4c2a
                                 gf_msg (this->name, GF_LOG_ERROR, 0,
7f4c2a
@@ -874,7 +897,7 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
7f4c2a
                         ret_demotion = demotion_args.return_value;
7f4c2a
                 }
7f4c2a
 
7f4c2a
-                if (ret_promotion == 0) {
7f4c2a
+                if (is_demotion_triggered && (ret_promotion == 0)) {
7f4c2a
                         pthread_join (promote_thread, NULL);
7f4c2a
                         if (promotion_args.return_value) {
7f4c2a
                                 gf_msg (this->name, GF_LOG_ERROR, 0,
7f4c2a
@@ -884,10 +907,15 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
7f4c2a
                         ret_promotion = promotion_args.return_value;
7f4c2a
                 }
7f4c2a
 
7f4c2a
-                /*Collect previous and current cummulative status */
7f4c2a
-                ret = ret | ret_demotion | ret_promotion;
7f4c2a
+                /* Collect previous and current cummulative status */
7f4c2a
+                /* If demotion was not triggered just pass 0 to ret */
7f4c2a
+                ret = (is_demotion_triggered) ? ret_demotion : 0;
7f4c2a
+                /* If promotion was not triggered just pass 0 to ret */
7f4c2a
+                ret = ret | (is_promotion_triggered) ?
7f4c2a
+                                ret_promotion : 0;
7f4c2a
 
7f4c2a
-                /*reseting promotion and demotion arguments for next iteration*/
7f4c2a
+                /* reseting promotion and demotion arguments for
7f4c2a
+                 * next iteration*/
7f4c2a
                 memset (&demotion_args, 0, sizeof(demotion_args_t));
7f4c2a
                 memset (&promotion_args, 0, sizeof(promotion_args_t));
7f4c2a
 
7f4c2a
-- 
7f4c2a
1.7.1
7f4c2a