Blob Blame History Raw
From 0568fbdb19131f571e3ee61d7e275281cbc40dfd Mon Sep 17 00:00:00 2001
From: Gaurav Kumar Garg <ggarg@redhat.com>
Date: Tue, 25 Aug 2015 11:15:56 +0530
Subject: [PATCH 299/304] glusterd: stop all the daemons services on peer detach

This patch is backport of: http://review.gluster.org/#/c/11509/

Currently glusterd is not stopping all the deamon service on peer detach

With this fix it will do peer detach cleanup properlly and will stop all
the daemon which was running before peer detach on the node.

    >>Change-Id: Ifed403ed09187e84f2a60bf63135156ad1f15775
    >>BUG: 1255386
    >>Signed-off-by: Gaurav Kumar Garg <ggarg@redhat.com>

Change-Id: I632e61e4cb6b7cbfcb626ab2217b55071ed690ca
BUG: 1238070
Signed-off-by: Gaurav Kumar Garg <ggarg@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/56148
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Tested-by: Atin Mukherjee <amukherj@redhat.com>
---
 .../bug-1238706-daemons-stop-on-peer-cleanup.t     |   41 ++++++++++++++++++++
 tests/volume.rc                                    |   16 ++++----
 xlators/mgmt/glusterd/src/glusterd-messages.h      |   16 ++++++++
 xlators/mgmt/glusterd/src/glusterd-sm.c            |   40 ++++++++++++-------
 4 files changed, 90 insertions(+), 23 deletions(-)
 create mode 100644 tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t

diff --git a/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t b/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t
new file mode 100644
index 0000000..9ff1758
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+## Test case for stopping all running daemons service on peer detach.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+
+## Start a 2 node virtual cluster
+TEST launch_cluster 2;
+
+## Peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+
+## Creating and starting volume
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H1:$B1/${V0}1
+TEST $CLI_1 volume start $V0
+
+## To Do: Add test case for quota and snapshot daemon. Currently quota
+##        Daemon is not working in cluster framework. And sanpd daemon
+##        Start only in one node in cluster framework. Add test case
+##        once patch http://review.gluster.org/#/c/11666/ merged,
+
+## We are having 2 node "nfs" daemon should run on both node.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_nfs_count
+
+## Detach 2nd node from the cluster.
+TEST $CLI_1 peer detach $H2;
+
+
+## After detaching 2nd node we will have only 1 nfs and quota daemon running.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_nfs_count
+
+cleanup;
+
diff --git a/tests/volume.rc b/tests/volume.rc
index aeffa4a..d90dda7 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -547,6 +547,14 @@ function get_quotad_count {
         ps auxww | grep glusterfs | grep quotad.pid | grep -v grep | wc -l
 }
 
+function get_nfs_count {
+        ps auxww | grep glusterfs | grep nfs.pid | grep -v grep | wc -l
+}
+
+function get_snapd_count {
+        ps auxww | grep glusterfs | grep snapd.pid | grep -v grep | wc -l
+}
+
 function quota_list_field () {
         local QUOTA_PATH=$1
         local FIELD=$2
@@ -565,11 +573,3 @@ function quota_usage()
 {
         quota_list_field $1 4
 }
-
-function get_nfs_count {
-        ps auxww | grep glusterfs | grep nfs.pid | grep -v grep | wc -l
-}
-
-function get_snapd_count {
-        ps auxww | grep glusterfs | grep snapd.pid | grep -v grep | wc -l
-}
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index addd3e2..d678372 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -4594,6 +4594,22 @@
  */
 #define GD_MSG_SVC_STOP_SUCCESS                     (GLUSTERD_COMP_BASE + 568)
 
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_PARAM_NULL                           (GLUSTERD_COMP_BASE + 569)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_SVC_STOP_FAIL                        (GLUSTERD_COMP_BASE + 570)
+
 /*------------*/
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
 #endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
index e8e9b3a..8e91cef 100644
--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
@@ -35,6 +35,7 @@
 #include "glusterd-op-sm.h"
 #include "glusterd-utils.h"
 #include "glusterd-store.h"
+#include "glusterd-svc-helper.h"
 #include "glusterd-snapshot-utils.h"
 #include "glusterd-server-quorum.h"
 
@@ -600,17 +601,29 @@ out:
 /* Clean up stale volumes on the peer being detached. The volumes which have
  * bricks on other peers are stale with respect to the detached peer.
  */
-static int
+static void
 glusterd_peer_detach_cleanup (glusterd_conf_t *priv)
 {
-        int                     ret = -1;
-        glusterd_volinfo_t      *volinfo = NULL;
+        int                     ret          = -1;
+        glusterd_volinfo_t      *volinfo     = NULL;
         glusterd_volinfo_t      *tmp_volinfo = NULL;
+        glusterd_svc_t          *svc         = NULL;
 
         GF_ASSERT (priv);
 
         cds_list_for_each_entry_safe (volinfo, tmp_volinfo, &priv->volumes,
                                       vol_list) {
+                /* Stop snapd daemon service if snapd daemon is running*/
+                if (!volinfo->is_snap_volume) {
+                        svc = &(volinfo->snapd.svc);
+                        ret = svc->stop (svc, SIGTERM);
+                        if (ret) {
+                                gf_msg (THIS->name, GF_LOG_ERROR, 0,
+                                        GD_MSG_SVC_STOP_FAIL, "Failed to "
+                                        "stop snapd daemon service.");
+                        }
+                }
+
                 /* The peer detach checks make sure that, at this point in the
                  * detach process, there are only volumes contained completely
                  * within or completely outside the detached peer.
@@ -627,14 +640,17 @@ glusterd_peer_detach_cleanup (glusterd_conf_t *priv)
                                 gf_msg (THIS->name, GF_LOG_ERROR, 0,
                                         GD_MSG_STALE_VOL_REMOVE_FAIL,
                                         "Error deleting stale volume");
-                                goto out;
                         }
                 }
         }
-        ret = 0;
-out:
-        gf_msg_debug (THIS->name, 0, "Returning %d", ret);
-        return ret;
+
+        /* Stop all daemon services of Detaching node once  peer detached */
+        ret = glusterd_svcs_stop ();
+        if (ret) {
+                gf_msg (THIS->name, GF_LOG_ERROR, 0,
+                        GD_MSG_SVC_STOP_FAIL,
+                        "Failed to stop all daemon services.");
+        }
 }
 
 static int
@@ -679,13 +695,7 @@ glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event,
         }
         rcu_read_unlock ();
 
-        ret = glusterd_peer_detach_cleanup (priv);
-        if (ret) {
-                gf_msg (THIS->name, GF_LOG_WARNING, 0,
-                        GD_MSG_PEER_DETACH_CLEANUP_FAIL,
-                        "Peer detach cleanup was not successful");
-                ret = 0;
-        }
+        glusterd_peer_detach_cleanup (priv);
 out:
         if (new_event)
                 GF_FREE (new_event->peername);
-- 
1.7.1