21ab4e
From df8fdd1d7fab692169a667fcc07c652fcc5e2ad7 Mon Sep 17 00:00:00 2001
21ab4e
From: Atin Mukherjee <amukherj@redhat.com>
21ab4e
Date: Wed, 5 Oct 2016 14:59:51 +0530
21ab4e
Subject: [PATCH 317/361] glusterd: daemon restart logic should adhere server
21ab4e
 side quorum
21ab4e
21ab4e
Just like brick processes, other daemon services should also follow the same
21ab4e
logic of quorum checks to see if a particular service needs to come up if
21ab4e
glusterd is restarted or the incoming friend add/update request is received
21ab4e
(in glusterd_restart_bricks () function)
21ab4e
21ab4e
mainline:
21ab4e
> BUG: 1383893
21ab4e
> Reviewed-on: https://review.gluster.org/15626
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Prashanth Pai <ppai@redhat.com>
21ab4e
(cherry picked from commit 5a6f509263a810ca21a22bbbd1e6ffcf43b70d18)
21ab4e
21ab4e
BUG: 1381825
21ab4e
Change-Id: I54a1fbdaa1571cc45eed627181b81463fead47a3
21ab4e
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/101298
21ab4e
Tested-by: Milind Changire <mchangir@redhat.com>
21ab4e
---
21ab4e
 .../bug-1383893-daemons-to-follow-quorum.t         |  57 +++++
21ab4e
 xlators/mgmt/glusterd/src/glusterd-utils.c         | 235 +--------------------
21ab4e
 2 files changed, 64 insertions(+), 228 deletions(-)
21ab4e
 create mode 100644 tests/bugs/glusterd/bug-1383893-daemons-to-follow-quorum.t
21ab4e
21ab4e
diff --git a/tests/bugs/glusterd/bug-1383893-daemons-to-follow-quorum.t b/tests/bugs/glusterd/bug-1383893-daemons-to-follow-quorum.t
21ab4e
new file mode 100644
21ab4e
index 0000000..105292a
21ab4e
--- /dev/null
21ab4e
+++ b/tests/bugs/glusterd/bug-1383893-daemons-to-follow-quorum.t
21ab4e
@@ -0,0 +1,57 @@
21ab4e
+#!/bin/bash
21ab4e
+
21ab4e
+# This test checks for if shd or any other daemons brought down (apart from
21ab4e
+# brick processes) is not brought up automatically when glusterd on the other
21ab4e
+# node is (re)started
21ab4e
+
21ab4e
+. $(dirname $0)/../../include.rc
21ab4e
+. $(dirname $0)/../../volume.rc
21ab4e
+. $(dirname $0)/../../cluster.rc
21ab4e
+
21ab4e
+function shd_up_status_1 {
21ab4e
+        $CLI_1 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $7}'
21ab4e
+}
21ab4e
+
21ab4e
+function shd_up_status_2 {
21ab4e
+        $CLI_2 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $7}'
21ab4e
+}
21ab4e
+
21ab4e
+function get_shd_pid_2 {
21ab4e
+        $CLI_2 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $8}'
21ab4e
+}
21ab4e
+cleanup;
21ab4e
+
21ab4e
+TEST launch_cluster 3
21ab4e
+
21ab4e
+TEST $CLI_1 peer probe $H2;
21ab4e
+EXPECT_WITHIN $PROBE_TIMEOUT 1  peer_count
21ab4e
+
21ab4e
+TEST $CLI_1 peer probe $H3;
21ab4e
+EXPECT_WITHIN $PROBE_TIMEOUT 2  peer_count
21ab4e
+
21ab4e
+# Lets create the volume
21ab4e
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/${V0}1 $H2:$B2/${V0}2
21ab4e
+
21ab4e
+# Start the volume
21ab4e
+TEST $CLI_1 volume start $V0
21ab4e
+
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}1
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0}2
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_1
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2
21ab4e
+
21ab4e
+# Bring down shd on 2nd node
21ab4e
+kill -15 $(get_shd_pid_2)
21ab4e
+
21ab4e
+# Bring down glusterd on 1st node
21ab4e
+TEST kill_glusterd 1
21ab4e
+
21ab4e
+#Bring back 1st glusterd
21ab4e
+TEST $glusterd_1
21ab4e
+
21ab4e
+# We need to wait till PROCESS_UP_TIMEOUT and then check shd service does not
21ab4e
+# come up on node 2
21ab4e
+sleep $PROCESS_UP_TIMEOUT
21ab4e
+EXPECT "N" shd_up_status_2
21ab4e
+
21ab4e
+cleanup;
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
index 9e9d609..91cc12e 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
@@ -4902,10 +4902,6 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
21ab4e
         cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
21ab4e
                 if (volinfo->status != GLUSTERD_STATUS_STARTED)
21ab4e
                         continue;
21ab4e
-                if (start_svcs == _gf_false) {
21ab4e
-                        start_svcs = _gf_true;
21ab4e
-                        glusterd_svcs_manager (NULL);
21ab4e
-                }
21ab4e
                 gf_msg_debug (this->name, 0, "starting the volume %s",
21ab4e
                         volinfo->volname);
21ab4e
 
21ab4e
@@ -4928,6 +4924,11 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
21ab4e
                          */
21ab4e
                         continue;
21ab4e
                 } else {
21ab4e
+                        if (start_svcs == _gf_false) {
21ab4e
+                                start_svcs = _gf_true;
21ab4e
+                                glusterd_svcs_manager (NULL);
21ab4e
+                        }
21ab4e
+
21ab4e
                         cds_list_for_each_entry (brickinfo, &volinfo->bricks,
21ab4e
                                                  brick_list) {
21ab4e
                                 glusterd_brick_start (volinfo, brickinfo,
21ab4e
@@ -4940,8 +4941,8 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
21ab4e
                 cds_list_for_each_entry (volinfo, &snap->volumes, vol_list) {
21ab4e
                         if (volinfo->status != GLUSTERD_STATUS_STARTED)
21ab4e
                                 continue;
21ab4e
-                        /* Check the quorum, if quorum is not met, don't start the
21ab4e
-                           bricks
21ab4e
+                        /* Check the quorum, if quorum is not met, don't start
21ab4e
+                         * the bricks
21ab4e
                         */
21ab4e
                         ret = check_quorum_for_brick_start (volinfo,
21ab4e
                                                             node_quorum);
21ab4e
@@ -9403,228 +9404,6 @@ out:
21ab4e
 }
21ab4e
 
21ab4e
 int
21ab4e
-<<<<<<< 07a9e00a5702e76932142e9d9cdc2df601632b7a
21ab4e
-=======
21ab4e
-glusterd_volume_tier_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
21ab4e
-{
21ab4e
-        char                 key[256]      = {0,};
21ab4e
-        char                *node_uuid     = NULL;
21ab4e
-        char                *node_uuid_str = NULL;
21ab4e
-        char                *volname       = NULL;
21ab4e
-        dict_t              *ctx_dict      = NULL;
21ab4e
-        double               elapsed_time  = 0;
21ab4e
-        glusterd_volinfo_t  *volinfo       = NULL;
21ab4e
-        int                  ret           = 0;
21ab4e
-        int32_t              index         = 0;
21ab4e
-        int32_t              count         = 0;
21ab4e
-        int32_t              value32       = 0;
21ab4e
-        uint64_t             value         = 0;
21ab4e
-        xlator_t            *this           = NULL;
21ab4e
-        char                *task_id_str   = NULL;
21ab4e
-
21ab4e
-        this = THIS;
21ab4e
-        GF_VALIDATE_OR_GOTO (this->name, this, out);
21ab4e
-        GF_VALIDATE_OR_GOTO (this->name, rsp_dict, out);
21ab4e
-
21ab4e
-        if (aggr) {
21ab4e
-                ctx_dict = aggr;
21ab4e
-
21ab4e
-        } else {
21ab4e
-                 gf_msg (this->name, GF_LOG_ERROR, 0,
21ab4e
-                         GD_MSG_OPCTX_GET_FAIL,
21ab4e
-                         "Operation Context is not present");
21ab4e
-                goto out;
21ab4e
-        }
21ab4e
-
21ab4e
-        if (!ctx_dict)
21ab4e
-                goto out;
21ab4e
-
21ab4e
-        ret = dict_get_str (ctx_dict, "volname", &volname);
21ab4e
-        if (ret) {
21ab4e
-                gf_msg (this->name, GF_LOG_ERROR, 0,
21ab4e
-                        GD_MSG_DICT_GET_FAILED,
21ab4e
-                        "Unable to get volume name");
21ab4e
-                goto out;
21ab4e
-        }
21ab4e
-
21ab4e
-        ret  = glusterd_volinfo_find (volname, &volinfo);
21ab4e
-
21ab4e
-        if (ret)
21ab4e
-                goto out;
21ab4e
-
21ab4e
-        ret = dict_get_int32 (rsp_dict, "count", &index);
21ab4e
-        if (ret)
21ab4e
-                gf_msg (this->name, GF_LOG_ERROR, 0,
21ab4e
-                        GD_MSG_DICT_GET_FAILED,
21ab4e
-                        "failed to get index");
21ab4e
-
21ab4e
-        memset (key, 0, 256);
21ab4e
-        snprintf (key, 256, "node-uuid-%d", index);
21ab4e
-        ret = dict_get_str (rsp_dict, key, &node_uuid);
21ab4e
-        if (!ret) {
21ab4e
-                node_uuid_str = gf_strdup (node_uuid);
21ab4e
-
21ab4e
-        }
21ab4e
-        ret = dict_get_int32 (ctx_dict, "count", &count);
21ab4e
-        count++;
21ab4e
-        ret = dict_set_int32 (ctx_dict, "count", count);
21ab4e
-        if (ret)
21ab4e
-                gf_msg (this->name, GF_LOG_ERROR, 0,
21ab4e
-                                GD_MSG_DICT_SET_FAILED,
21ab4e
-                                "Failed to set count");
21ab4e
-
21ab4e
-        memset (key, 0, 256);
21ab4e
-        snprintf (key, 256, "node-uuid-%d", count);
21ab4e
-        ret = dict_set_dynstr (ctx_dict, key, node_uuid_str);
21ab4e
-        if (ret) {
21ab4e
-                gf_msg_debug (this->name, 0,
21ab4e
-                                "failed to set node-uuid");
21ab4e
-        }
21ab4e
-
21ab4e
-        snprintf (key, 256, "files-%d", index);
21ab4e
-        ret = dict_get_uint64 (rsp_dict, key, &value);
21ab4e
-        if (!ret) {
21ab4e
-                memset (key, 0, 256);
21ab4e
-                snprintf (key, 256, "files-%d", count);
21ab4e
-                ret = dict_set_uint64 (ctx_dict, key, value);
21ab4e
-                if (ret) {
21ab4e
-                        gf_msg_debug (this->name, 0,
21ab4e
-                                "failed to set the file count");
21ab4e
-                }
21ab4e
-        }
21ab4e
-
21ab4e
-        memset (key, 0, 256);
21ab4e
-        snprintf (key, 256, "size-%d", index);
21ab4e
-        ret = dict_get_uint64 (rsp_dict, key, &value);
21ab4e
-        if (!ret) {
21ab4e
-                memset (key, 0, 256);
21ab4e
-                snprintf (key, 256, "size-%d", count);
21ab4e
-                ret = dict_set_uint64 (ctx_dict, key, value);
21ab4e
-                if (ret) {
21ab4e
-                        gf_msg_debug (this->name, 0,
21ab4e
-                                "failed to set the size of migration");
21ab4e
-                }
21ab4e
-        }
21ab4e
-
21ab4e
-        memset (key, 0, 256);
21ab4e
-        snprintf (key, 256, "lookups-%d", index);
21ab4e
-        ret = dict_get_uint64 (rsp_dict, key, &value);
21ab4e
-        if (!ret) {
21ab4e
-                memset (key, 0, 256);
21ab4e
-                snprintf (key, 256, "lookups-%d", count);
21ab4e
-                ret = dict_set_uint64 (ctx_dict, key, value);
21ab4e
-                if (ret) {
21ab4e
-                        gf_msg_debug (this->name, 0,
21ab4e
-                                "failed to set looked up file count");
21ab4e
-                }
21ab4e
-        }
21ab4e
-
21ab4e
-        memset (key, 0, 256);
21ab4e
-        snprintf (key, 256, "status-%d", index);
21ab4e
-        ret = dict_get_int32 (rsp_dict, key, &value32);
21ab4e
-        if (!ret) {
21ab4e
-                memset (key, 0, 256);
21ab4e
-                snprintf (key, 256, "status-%d", count);
21ab4e
-                ret = dict_set_int32 (ctx_dict, key, value32);
21ab4e
-                if (ret) {
21ab4e
-                        gf_msg_debug (this->name, 0,
21ab4e
-                                "failed to set status");
21ab4e
-                }
21ab4e
-        }
21ab4e
-
21ab4e
-        memset (key, 0, 256);
21ab4e
-        snprintf (key, 256, "failures-%d", index);
21ab4e
-        ret = dict_get_uint64 (rsp_dict, key, &value);
21ab4e
-        if (!ret) {
21ab4e
-                memset (key, 0, 256);
21ab4e
-                snprintf (key, 256, "failures-%d", count);
21ab4e
-                ret = dict_set_uint64 (ctx_dict, key, value);
21ab4e
-                if (ret) {
21ab4e
-                        gf_msg_debug (this->name, 0,
21ab4e
-                                "failed to set failure count");
21ab4e
-                }
21ab4e
-        }
21ab4e
-
21ab4e
-        memset (key, 0, 256);
21ab4e
-        snprintf (key, 256, "skipped-%d", index);
21ab4e
-        ret = dict_get_uint64 (rsp_dict, key, &value);
21ab4e
-        if (!ret) {
21ab4e
-                memset (key, 0, 256);
21ab4e
-                snprintf (key, 256, "skipped-%d", count);
21ab4e
-                ret = dict_set_uint64 (ctx_dict, key, value);
21ab4e
-                if (ret) {
21ab4e
-                        gf_msg_debug (this->name, 0,
21ab4e
-                                "failed to set skipped count");
21ab4e
-                }
21ab4e
-        }
21ab4e
-        memset (key, 0, 256);
21ab4e
-        snprintf (key, 256, "run-time-%d", index);
21ab4e
-        ret = dict_get_double (rsp_dict, key, &elapsed_time);
21ab4e
-        if (!ret) {
21ab4e
-                memset (key, 0, 256);
21ab4e
-                snprintf (key, 256, "run-time-%d", count);
21ab4e
-                ret = dict_set_double (ctx_dict, key, elapsed_time);
21ab4e
-                if (ret) {
21ab4e
-                        gf_msg_debug (this->name, 0,
21ab4e
-                                "failed to set run-time");
21ab4e
-                }
21ab4e
-        }
21ab4e
-
21ab4e
-        memset (key, 0, 256);
21ab4e
-        snprintf (key, 256, "demoted-%d", index);
21ab4e
-        ret = dict_get_uint64 (rsp_dict, key, &value);
21ab4e
-        if (!ret) {
21ab4e
-                memset (key, 0, 256);
21ab4e
-                snprintf (key, 256, "demoted-%d", count);
21ab4e
-                ret = dict_set_uint64 (ctx_dict, key, value);
21ab4e
-                if (ret) {
21ab4e
-                        gf_msg_debug (this->name, 0,
21ab4e
-                                "failed to set demoted count");
21ab4e
-                }
21ab4e
-        }
21ab4e
-        memset (key, 0, 256);
21ab4e
-        snprintf (key, 256, "promoted-%d", index);
21ab4e
-        ret = dict_get_uint64 (rsp_dict, key, &value);
21ab4e
-        if (!ret) {
21ab4e
-                memset (key, 0, 256);
21ab4e
-                snprintf (key, 256, "promoted-%d", count);
21ab4e
-                ret = dict_set_uint64 (ctx_dict, key, value);
21ab4e
-                if (ret) {
21ab4e
-                        gf_msg_debug (this->name, 0,
21ab4e
-                                "failed to set promoted count");
21ab4e
-                }
21ab4e
-        }
21ab4e
-
21ab4e
-        memset (key, 0, 256);
21ab4e
-        snprintf (key, 256, "time-left-%d", index);
21ab4e
-        ret = dict_get_uint64 (rsp_dict, key, &value);
21ab4e
-        if (!ret) {
21ab4e
-                memset (key, 0, 256);
21ab4e
-                snprintf (key, 256, "time-left-%d", count);
21ab4e
-                ret = dict_set_uint64 (ctx_dict, key, value);
21ab4e
-                if (ret) {
21ab4e
-                        gf_msg_debug (THIS->name, 0,
21ab4e
-                                "failed to set time-left");
21ab4e
-                }
21ab4e
-        }
21ab4e
-
21ab4e
-        ret = dict_get_str (rsp_dict, GF_REMOVE_BRICK_TID_KEY,
21ab4e
-                                &task_id_str);
21ab4e
-        if (ret) {
21ab4e
-                gf_msg_debug (this->name, errno,
21ab4e
-                                "Missing remove-brick-id");
21ab4e
-        } else
21ab4e
-                ret = dict_set_str (ctx_dict, GF_REMOVE_BRICK_TID_KEY,
21ab4e
-                                task_id_str);
21ab4e
-
21ab4e
-        ret = 0;
21ab4e
-
21ab4e
-out:
21ab4e
-        return ret;
21ab4e
-}
21ab4e
-
21ab4e
-int
21ab4e
->>>>>>> dht/rebalance Estimate time to complete rebalance
21ab4e
 glusterd_sys_exec_output_rsp_dict (dict_t *dst, dict_t *src)
21ab4e
 {
21ab4e
         char           output_name[PATH_MAX] = "";
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e