Blob Blame History Raw
From a085b557aa54302d5bbb08fc8139d7a8e146e30e Mon Sep 17 00:00:00 2001
From: Samikshan Bairagya <samikshan@gmail.com>
Date: Mon, 22 May 2017 12:13:25 +0530
Subject: [PATCH 461/473] glusterd: Don't spawn new glusterfsds on node reboot
 with brick-mux

With brick multiplexing enabled, upon a node reboot new bricks were
not being attached to the first spawned brick process even though
there wasn't any compatibility issues.

The reason for this is that upon glusterd restart after a node
reboot, since brick services aren't running, glusterd starts the
bricks in a "no-wait" mode. So after a brick process is spawned for
the first brick, there isn't enough time for the corresponding pid
file to get populated with a value before the compatibilty check is
made for the next brick.

This commit solves this by iteratively waiting for the pidfile to be
populated in the brick compatibility comparison stage before checking
if the brick process is alive.

> Reviewed-on: https://review.gluster.org/17307
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>

Change-Id: Ibd1f8e54c63e4bb04162143c9d70f09918a44aa4
BUG: 1450889
Signed-off-by: Samikshan Bairagya <sbairagy@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/106803
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 tests/bugs/glusterd/bug-1451248-mux-reboot-node.t | 54 +++++++++++++++++++++++
 xlators/mgmt/glusterd/src/glusterd-handler.c      |  6 +++
 xlators/mgmt/glusterd/src/glusterd-pmap.c         |  1 +
 xlators/mgmt/glusterd/src/glusterd-utils.c        | 18 ++++++++
 4 files changed, 79 insertions(+)
 create mode 100644 tests/bugs/glusterd/bug-1451248-mux-reboot-node.t

diff --git a/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t
new file mode 100644
index 0000000..5d8ce6e
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../volume.rc
+
+function count_up_bricks {
+        $CLI --xml volume status all | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+	pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+        $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+                                     | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup;
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
+push_trapfunc "cleanup"
+
+TEST $CLI volume create $V0 $H0:$B0/brick{0..2}
+TEST $CLI volume start $V0
+
+EXPECT 1 count_brick_processes
+EXPECT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+
+pkill gluster
+TEST glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+
+pkill glusterd
+TEST glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+
+TEST $CLI volume create $V1 $H0:$B0/brick{3..5}
+TEST $CLI volume start $V1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 7c2ee1b..1fd7813 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -5650,7 +5650,10 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
                 rpc_clnt_set_connected (&rpc->conn);
                 gf_msg_debug (this->name, 0, "Connected to %s:%s",
                         brickinfo->hostname, brickinfo->path);
+
                 glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED);
+                brickinfo->started_here = _gf_true;
+
                 gf_event (EVENT_BRICK_CONNECTED, "peer=%s;volume=%s;brick=%s",
                           brickinfo->hostname, volinfo->volname,
                           brickinfo->path);
@@ -5680,6 +5683,9 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
                                 GD_MSG_BRICK_DISCONNECTED,
                                 "Brick %s:%s has disconnected from glusterd.",
                                 brickinfo->hostname, brickinfo->path);
+
+                        brickinfo->started_here = _gf_false;
+
                         ret = get_volinfo_from_brickid (brickid, &volinfo);
                         if (ret) {
                                 gf_msg (this->name, GF_LOG_ERROR, 0,
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index d67117f..1670b38 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -593,6 +593,7 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
                         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo,
                                                     conf);
                         sys_unlink (pidfile);
+                        brickinfo->started_here = _gf_false;
                 }
         }
 
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 8af9fb1..f98f5fe 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -2133,6 +2133,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
         gf_msg_debug (this->name,  0, "Unlinking pidfile %s", pidfile);
         (void) sys_unlink (pidfile);
+
+        brickinfo->started_here = _gf_false;
 out:
         return ret;
 }
@@ -5159,6 +5161,7 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
         glusterd_brickinfo_t    *other_brick;
         char                    pidfile2[PATH_MAX]      = {0};
         int32_t                 pid2                    = -1;
+        int16_t                 retries                 = 15;
 
         /*
          * If comp_vol is provided, we have to check *volume* compatibility
@@ -5201,8 +5204,22 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
                 if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
                         continue;
                 }
+
                 GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick,
                                             conf);
+
+                /* It is possible that the pidfile hasn't yet been populated,
+                 * when bricks are started in "no-wait" mode; for example
+                 * when bricks are started by glusterd_restart_bricks(). So
+                 * wait for the pidfile to be populated with a value before
+                 * checking if the service is running */
+                while (retries > 0) {
+                        if (sys_access (pidfile2, F_OK) == 0)
+                                break;
+                        sleep (1);
+                        retries--;
+                }
+
                 if (!gf_is_service_running (pidfile2, &pid2)) {
                         gf_log (this->name, GF_LOG_INFO,
                                 "cleaning up dead brick %s:%s",
@@ -5446,6 +5463,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
                                 socketpath, brickinfo->path, volinfo->volname);
                         (void) glusterd_brick_connect (volinfo, brickinfo,
                                         socketpath);
+                        brickinfo->started_here = _gf_true;
                 }
                 return 0;
         }
-- 
1.8.3.1