21ab4e
From a085b557aa54302d5bbb08fc8139d7a8e146e30e Mon Sep 17 00:00:00 2001
21ab4e
From: Samikshan Bairagya <samikshan@gmail.com>
21ab4e
Date: Mon, 22 May 2017 12:13:25 +0530
21ab4e
Subject: [PATCH 461/473] glusterd: Don't spawn new glusterfsds on node reboot
21ab4e
 with brick-mux
21ab4e
21ab4e
With brick multiplexing enabled, upon a node reboot new bricks were
21ab4e
not being attached to the first spawned brick process even though
21ab4e
there wasn't any compatibility issues.
21ab4e
21ab4e
The reason for this is that upon glusterd restart after a node
21ab4e
reboot, since brick services aren't running, glusterd starts the
21ab4e
bricks in a "no-wait" mode. So after a brick process is spawned for
21ab4e
the first brick, there isn't enough time for the corresponding pid
21ab4e
file to get populated with a value before the compatibilty check is
21ab4e
made for the next brick.
21ab4e
21ab4e
This commit solves this by iteratively waiting for the pidfile to be
21ab4e
populated in the brick compatibility comparison stage before checking
21ab4e
if the brick process is alive.
21ab4e
21ab4e
> Reviewed-on: https://review.gluster.org/17307
21ab4e
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
21ab4e
Change-Id: Ibd1f8e54c63e4bb04162143c9d70f09918a44aa4
21ab4e
BUG: 1450889
21ab4e
Signed-off-by: Samikshan Bairagya <sbairagy@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/106803
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 tests/bugs/glusterd/bug-1451248-mux-reboot-node.t | 54 +++++++++++++++++++++++
21ab4e
 xlators/mgmt/glusterd/src/glusterd-handler.c      |  6 +++
21ab4e
 xlators/mgmt/glusterd/src/glusterd-pmap.c         |  1 +
21ab4e
 xlators/mgmt/glusterd/src/glusterd-utils.c        | 18 ++++++++
21ab4e
 4 files changed, 79 insertions(+)
21ab4e
 create mode 100644 tests/bugs/glusterd/bug-1451248-mux-reboot-node.t
21ab4e
21ab4e
diff --git a/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t
21ab4e
new file mode 100644
21ab4e
index 0000000..5d8ce6e
21ab4e
--- /dev/null
21ab4e
+++ b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t
21ab4e
@@ -0,0 +1,54 @@
21ab4e
+#!/bin/bash
21ab4e
+
21ab4e
+. $(dirname $0)/../../include.rc
21ab4e
+. $(dirname $0)/../../traps.rc
21ab4e
+. $(dirname $0)/../../volume.rc
21ab4e
+
21ab4e
+function count_up_bricks {
21ab4e
+        $CLI --xml volume status all | grep '<status>1' | wc -l
21ab4e
+}
21ab4e
+
21ab4e
+function count_brick_processes {
21ab4e
+	pgrep glusterfsd | wc -l
21ab4e
+}
21ab4e
+
21ab4e
+function count_brick_pids {
21ab4e
+        $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
21ab4e
+                                     | grep -v "N/A" | sort | uniq | wc -l
21ab4e
+}
21ab4e
+
21ab4e
+cleanup;
21ab4e
+
21ab4e
+TEST glusterd
21ab4e
+TEST $CLI volume set all cluster.brick-multiplex on
21ab4e
+push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
21ab4e
+push_trapfunc "cleanup"
21ab4e
+
21ab4e
+TEST $CLI volume create $V0 $H0:$B0/brick{0..2}
21ab4e
+TEST $CLI volume start $V0
21ab4e
+
21ab4e
+EXPECT 1 count_brick_processes
21ab4e
+EXPECT 1 count_brick_pids
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
21ab4e
+
21ab4e
+pkill gluster
21ab4e
+TEST glusterd
21ab4e
+
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
21ab4e
+
21ab4e
+pkill glusterd
21ab4e
+TEST glusterd
21ab4e
+
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
21ab4e
+
21ab4e
+TEST $CLI volume create $V1 $H0:$B0/brick{3..5}
21ab4e
+TEST $CLI volume start $V1
21ab4e
+
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
21ab4e
+
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
21ab4e
index 7c2ee1b..1fd7813 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
21ab4e
@@ -5650,7 +5650,10 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
21ab4e
                 rpc_clnt_set_connected (&rpc->conn);
21ab4e
                 gf_msg_debug (this->name, 0, "Connected to %s:%s",
21ab4e
                         brickinfo->hostname, brickinfo->path);
21ab4e
+
21ab4e
                 glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED);
21ab4e
+                brickinfo->started_here = _gf_true;
21ab4e
+
21ab4e
                 gf_event (EVENT_BRICK_CONNECTED, "peer=%s;volume=%s;brick=%s",
21ab4e
                           brickinfo->hostname, volinfo->volname,
21ab4e
                           brickinfo->path);
21ab4e
@@ -5680,6 +5683,9 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
21ab4e
                                 GD_MSG_BRICK_DISCONNECTED,
21ab4e
                                 "Brick %s:%s has disconnected from glusterd.",
21ab4e
                                 brickinfo->hostname, brickinfo->path);
21ab4e
+
21ab4e
+                        brickinfo->started_here = _gf_false;
21ab4e
+
21ab4e
                         ret = get_volinfo_from_brickid (brickid, &volinfo);
21ab4e
                         if (ret) {
21ab4e
                                 gf_msg (this->name, GF_LOG_ERROR, 0,
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
21ab4e
index d67117f..1670b38 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
21ab4e
@@ -593,6 +593,7 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
21ab4e
                         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo,
21ab4e
                                                     conf);
21ab4e
                         sys_unlink (pidfile);
21ab4e
+                        brickinfo->started_here = _gf_false;
21ab4e
                 }
21ab4e
         }
21ab4e
 
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
index 8af9fb1..f98f5fe 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
@@ -2133,6 +2133,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
21ab4e
         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
21ab4e
         gf_msg_debug (this->name,  0, "Unlinking pidfile %s", pidfile);
21ab4e
         (void) sys_unlink (pidfile);
21ab4e
+
21ab4e
+        brickinfo->started_here = _gf_false;
21ab4e
 out:
21ab4e
         return ret;
21ab4e
 }
21ab4e
@@ -5159,6 +5161,7 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
21ab4e
         glusterd_brickinfo_t    *other_brick;
21ab4e
         char                    pidfile2[PATH_MAX]      = {0};
21ab4e
         int32_t                 pid2                    = -1;
21ab4e
+        int16_t                 retries                 = 15;
21ab4e
 
21ab4e
         /*
21ab4e
          * If comp_vol is provided, we have to check *volume* compatibility
21ab4e
@@ -5201,8 +5204,22 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
21ab4e
                 if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
21ab4e
                         continue;
21ab4e
                 }
21ab4e
+
21ab4e
                 GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick,
21ab4e
                                             conf);
21ab4e
+
21ab4e
+                /* It is possible that the pidfile hasn't yet been populated,
21ab4e
+                 * when bricks are started in "no-wait" mode; for example
21ab4e
+                 * when bricks are started by glusterd_restart_bricks(). So
21ab4e
+                 * wait for the pidfile to be populated with a value before
21ab4e
+                 * checking if the service is running */
21ab4e
+                while (retries > 0) {
21ab4e
+                        if (sys_access (pidfile2, F_OK) == 0)
21ab4e
+                                break;
21ab4e
+                        sleep (1);
21ab4e
+                        retries--;
21ab4e
+                }
21ab4e
+
21ab4e
                 if (!gf_is_service_running (pidfile2, &pid2)) {
21ab4e
                         gf_log (this->name, GF_LOG_INFO,
21ab4e
                                 "cleaning up dead brick %s:%s",
21ab4e
@@ -5446,6 +5463,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
21ab4e
                                 socketpath, brickinfo->path, volinfo->volname);
21ab4e
                         (void) glusterd_brick_connect (volinfo, brickinfo,
21ab4e
                                         socketpath);
21ab4e
+                        brickinfo->started_here = _gf_true;
21ab4e
                 }
21ab4e
                 return 0;
21ab4e
         }
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e