cb8e9e
From 0568fbdb19131f571e3ee61d7e275281cbc40dfd Mon Sep 17 00:00:00 2001
cb8e9e
From: Gaurav Kumar Garg <ggarg@redhat.com>
cb8e9e
Date: Tue, 25 Aug 2015 11:15:56 +0530
cb8e9e
Subject: [PATCH 299/304] glusterd: stop all the daemons services on peer detach
cb8e9e
cb8e9e
This patch is backport of: http://review.gluster.org/#/c/11509/
cb8e9e
cb8e9e
Currently glusterd is not stopping all the deamon service on peer detach
cb8e9e
cb8e9e
With this fix it will do peer detach cleanup properlly and will stop all
cb8e9e
the daemon which was running before peer detach on the node.
cb8e9e
cb8e9e
    >>Change-Id: Ifed403ed09187e84f2a60bf63135156ad1f15775
cb8e9e
    >>BUG: 1255386
cb8e9e
    >>Signed-off-by: Gaurav Kumar Garg <ggarg@redhat.com>
cb8e9e
cb8e9e
Change-Id: I632e61e4cb6b7cbfcb626ab2217b55071ed690ca
cb8e9e
BUG: 1238070
cb8e9e
Signed-off-by: Gaurav Kumar Garg <ggarg@redhat.com>
cb8e9e
Reviewed-on: https://code.engineering.redhat.com/gerrit/56148
cb8e9e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
cb8e9e
Tested-by: Atin Mukherjee <amukherj@redhat.com>
cb8e9e
---
cb8e9e
 .../bug-1238706-daemons-stop-on-peer-cleanup.t     |   41 ++++++++++++++++++++
cb8e9e
 tests/volume.rc                                    |   16 ++++----
cb8e9e
 xlators/mgmt/glusterd/src/glusterd-messages.h      |   16 ++++++++
cb8e9e
 xlators/mgmt/glusterd/src/glusterd-sm.c            |   40 ++++++++++++-------
cb8e9e
 4 files changed, 90 insertions(+), 23 deletions(-)
cb8e9e
 create mode 100644 tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t
cb8e9e
cb8e9e
diff --git a/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t b/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t
cb8e9e
new file mode 100644
cb8e9e
index 0000000..9ff1758
cb8e9e
--- /dev/null
cb8e9e
+++ b/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t
cb8e9e
@@ -0,0 +1,41 @@
cb8e9e
+#!/bin/bash
cb8e9e
+
cb8e9e
+## Test case for stopping all running daemons service on peer detach.
cb8e9e
+
cb8e9e
+. $(dirname $0)/../../include.rc
cb8e9e
+. $(dirname $0)/../../volume.rc
cb8e9e
+. $(dirname $0)/../../cluster.rc
cb8e9e
+
cb8e9e
+cleanup;
cb8e9e
+
cb8e9e
+
cb8e9e
+## Start a 2 node virtual cluster
cb8e9e
+TEST launch_cluster 2;
cb8e9e
+
cb8e9e
+## Peer probe server 2 from server 1 cli
cb8e9e
+TEST $CLI_1 peer probe $H2;
cb8e9e
+
cb8e9e
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
cb8e9e
+
cb8e9e
+
cb8e9e
+## Creating and starting volume
cb8e9e
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H1:$B1/${V0}1
cb8e9e
+TEST $CLI_1 volume start $V0
cb8e9e
+
cb8e9e
+## To Do: Add test case for quota and snapshot daemon. Currently quota
cb8e9e
+##        Daemon is not working in cluster framework. And sanpd daemon
cb8e9e
+##        Start only in one node in cluster framework. Add test case
cb8e9e
+##        once patch http://review.gluster.org/#/c/11666/ merged,
cb8e9e
+
cb8e9e
+## We are having 2 node "nfs" daemon should run on both node.
cb8e9e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_nfs_count
cb8e9e
+
cb8e9e
+## Detach 2nd node from the cluster.
cb8e9e
+TEST $CLI_1 peer detach $H2;
cb8e9e
+
cb8e9e
+
cb8e9e
+## After detaching 2nd node we will have only 1 nfs and quota daemon running.
cb8e9e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_nfs_count
cb8e9e
+
cb8e9e
+cleanup;
cb8e9e
+
cb8e9e
diff --git a/tests/volume.rc b/tests/volume.rc
cb8e9e
index aeffa4a..d90dda7 100644
cb8e9e
--- a/tests/volume.rc
cb8e9e
+++ b/tests/volume.rc
cb8e9e
@@ -547,6 +547,14 @@ function get_quotad_count {
cb8e9e
         ps auxww | grep glusterfs | grep quotad.pid | grep -v grep | wc -l
cb8e9e
 }
cb8e9e
 
cb8e9e
+function get_nfs_count {
cb8e9e
+        ps auxww | grep glusterfs | grep nfs.pid | grep -v grep | wc -l
cb8e9e
+}
cb8e9e
+
cb8e9e
+function get_snapd_count {
cb8e9e
+        ps auxww | grep glusterfs | grep snapd.pid | grep -v grep | wc -l
cb8e9e
+}
cb8e9e
+
cb8e9e
 function quota_list_field () {
cb8e9e
         local QUOTA_PATH=$1
cb8e9e
         local FIELD=$2
cb8e9e
@@ -565,11 +573,3 @@ function quota_usage()
cb8e9e
 {
cb8e9e
         quota_list_field $1 4
cb8e9e
 }
cb8e9e
-
cb8e9e
-function get_nfs_count {
cb8e9e
-        ps auxww | grep glusterfs | grep nfs.pid | grep -v grep | wc -l
cb8e9e
-}
cb8e9e
-
cb8e9e
-function get_snapd_count {
cb8e9e
-        ps auxww | grep glusterfs | grep snapd.pid | grep -v grep | wc -l
cb8e9e
-}
cb8e9e
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
cb8e9e
index addd3e2..d678372 100644
cb8e9e
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
cb8e9e
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
cb8e9e
@@ -4594,6 +4594,22 @@
cb8e9e
  */
cb8e9e
 #define GD_MSG_SVC_STOP_SUCCESS                     (GLUSTERD_COMP_BASE + 568)
cb8e9e
 
cb8e9e
+/*!
cb8e9e
+ * @messageid
cb8e9e
+ * @diagnosis
cb8e9e
+ * @recommendedaction
cb8e9e
+ *
cb8e9e
+ */
cb8e9e
+#define GD_MSG_PARAM_NULL                           (GLUSTERD_COMP_BASE + 569)
cb8e9e
+
cb8e9e
+/*!
cb8e9e
+ * @messageid
cb8e9e
+ * @diagnosis
cb8e9e
+ * @recommendedaction
cb8e9e
+ *
cb8e9e
+ */
cb8e9e
+#define GD_MSG_SVC_STOP_FAIL                        (GLUSTERD_COMP_BASE + 570)
cb8e9e
+
cb8e9e
 /*------------*/
cb8e9e
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
cb8e9e
 #endif /* !_GLUSTERD_MESSAGES_H_ */
cb8e9e
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
cb8e9e
index e8e9b3a..8e91cef 100644
cb8e9e
--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
cb8e9e
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
cb8e9e
@@ -35,6 +35,7 @@
cb8e9e
 #include "glusterd-op-sm.h"
cb8e9e
 #include "glusterd-utils.h"
cb8e9e
 #include "glusterd-store.h"
cb8e9e
+#include "glusterd-svc-helper.h"
cb8e9e
 #include "glusterd-snapshot-utils.h"
cb8e9e
 #include "glusterd-server-quorum.h"
cb8e9e
 
cb8e9e
@@ -600,17 +601,29 @@ out:
cb8e9e
 /* Clean up stale volumes on the peer being detached. The volumes which have
cb8e9e
  * bricks on other peers are stale with respect to the detached peer.
cb8e9e
  */
cb8e9e
-static int
cb8e9e
+static void
cb8e9e
 glusterd_peer_detach_cleanup (glusterd_conf_t *priv)
cb8e9e
 {
cb8e9e
-        int                     ret = -1;
cb8e9e
-        glusterd_volinfo_t      *volinfo = NULL;
cb8e9e
+        int                     ret          = -1;
cb8e9e
+        glusterd_volinfo_t      *volinfo     = NULL;
cb8e9e
         glusterd_volinfo_t      *tmp_volinfo = NULL;
cb8e9e
+        glusterd_svc_t          *svc         = NULL;
cb8e9e
 
cb8e9e
         GF_ASSERT (priv);
cb8e9e
 
cb8e9e
         cds_list_for_each_entry_safe (volinfo, tmp_volinfo, &priv->volumes,
cb8e9e
                                       vol_list) {
cb8e9e
+                /* Stop snapd daemon service if snapd daemon is running*/
cb8e9e
+                if (!volinfo->is_snap_volume) {
cb8e9e
+                        svc = &(volinfo->snapd.svc);
cb8e9e
+                        ret = svc->stop (svc, SIGTERM);
cb8e9e
+                        if (ret) {
cb8e9e
+                                gf_msg (THIS->name, GF_LOG_ERROR, 0,
cb8e9e
+                                        GD_MSG_SVC_STOP_FAIL, "Failed to "
cb8e9e
+                                        "stop snapd daemon service.");
cb8e9e
+                        }
cb8e9e
+                }
cb8e9e
+
cb8e9e
                 /* The peer detach checks make sure that, at this point in the
cb8e9e
                  * detach process, there are only volumes contained completely
cb8e9e
                  * within or completely outside the detached peer.
cb8e9e
@@ -627,14 +640,17 @@ glusterd_peer_detach_cleanup (glusterd_conf_t *priv)
cb8e9e
                                 gf_msg (THIS->name, GF_LOG_ERROR, 0,
cb8e9e
                                         GD_MSG_STALE_VOL_REMOVE_FAIL,
cb8e9e
                                         "Error deleting stale volume");
cb8e9e
-                                goto out;
cb8e9e
                         }
cb8e9e
                 }
cb8e9e
         }
cb8e9e
-        ret = 0;
cb8e9e
-out:
cb8e9e
-        gf_msg_debug (THIS->name, 0, "Returning %d", ret);
cb8e9e
-        return ret;
cb8e9e
+
cb8e9e
+        /* Stop all daemon services of Detaching node once  peer detached */
cb8e9e
+        ret = glusterd_svcs_stop ();
cb8e9e
+        if (ret) {
cb8e9e
+                gf_msg (THIS->name, GF_LOG_ERROR, 0,
cb8e9e
+                        GD_MSG_SVC_STOP_FAIL,
cb8e9e
+                        "Failed to stop all daemon services.");
cb8e9e
+        }
cb8e9e
 }
cb8e9e
 
cb8e9e
 static int
cb8e9e
@@ -679,13 +695,7 @@ glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event,
cb8e9e
         }
cb8e9e
         rcu_read_unlock ();
cb8e9e
 
cb8e9e
-        ret = glusterd_peer_detach_cleanup (priv);
cb8e9e
-        if (ret) {
cb8e9e
-                gf_msg (THIS->name, GF_LOG_WARNING, 0,
cb8e9e
-                        GD_MSG_PEER_DETACH_CLEANUP_FAIL,
cb8e9e
-                        "Peer detach cleanup was not successful");
cb8e9e
-                ret = 0;
cb8e9e
-        }
cb8e9e
+        glusterd_peer_detach_cleanup (priv);
cb8e9e
 out:
cb8e9e
         if (new_event)
cb8e9e
                 GF_FREE (new_event->peername);
cb8e9e
-- 
cb8e9e
1.7.1
cb8e9e