21ab4e
From 941241b33f424a6c4b27883482e0c7b101f712c5 Mon Sep 17 00:00:00 2001
21ab4e
From: Jeff Darcy <jdarcy@redhat.com>
21ab4e
Date: Wed, 1 Feb 2017 21:54:30 -0500
21ab4e
Subject: [PATCH 321/361] glusterd: double-check whether brick is alive for
21ab4e
 stats
21ab4e
21ab4e
With multiplexing, our tests detach bricks from their host processes
21ab4e
without glusterd being involved.  Thus, when we ask glusterd to fetch
21ab4e
profile info, it will try to fetch from a brick that's actually not
21ab4e
present any more.  While it can handle the process being dead and its
21ab4e
RPC connection being closed, it barfs if it gets a negative response
21ab4e
from a live brick process.  This is not a problem in normal use,
21ab4e
because the brick can't disappear without glusterd seeing it.  The fix
21ab4e
is to double check that the brick is actually running, by looking for
21ab4e
its pidfile which the tests *do* clean up as part of killing a brick.
21ab4e
21ab4e
mainline:
21ab4e
> BUG: 1385758
21ab4e
> Reviewed-on: https://review.gluster.org/16509
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
(cherry picked from commit f4b94ddd3034f2ac27890f75ec28aa75b4fc18eb)
21ab4e
21ab4e
BUG: 1417815
21ab4e
Change-Id: I098465b175ecf23538bd7207357c752a2bba8f4e
21ab4e
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/101302
21ab4e
Tested-by: Milind Changire <mchangir@redhat.com>
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 xlators/mgmt/glusterd/src/glusterd-op-sm.c | 17 ++++++++++++++---
21ab4e
 1 file changed, 14 insertions(+), 3 deletions(-)
21ab4e
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
21ab4e
index a3a0462..ef31cdb 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
21ab4e
@@ -6247,15 +6247,14 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr,
21ab4e
         glusterd_brickinfo_t                    *brickinfo = NULL;
21ab4e
         glusterd_pending_node_t                 *pending_node = NULL;
21ab4e
         char                                    *brick = NULL;
21ab4e
-
21ab4e
-
21ab4e
+        int32_t                                 pid = -1;
21ab4e
+        char                                    pidfile[PATH_MAX] = {0};
21ab4e
 
21ab4e
         this = THIS;
21ab4e
         GF_ASSERT (this);
21ab4e
         priv = this->private;
21ab4e
         GF_ASSERT (priv);
21ab4e
 
21ab4e
-
21ab4e
         ret = dict_get_str (dict, "volname", &volname);
21ab4e
         if (ret) {
21ab4e
                 gf_msg ("glusterd", GF_LOG_ERROR, 0,
21ab4e
@@ -6386,6 +6385,18 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr,
21ab4e
                 cds_list_for_each_entry (brickinfo, &volinfo->bricks,
21ab4e
                                          brick_list) {
21ab4e
                         if (glusterd_is_brick_started (brickinfo)) {
21ab4e
+                                /*
21ab4e
+                                 * In normal use, glusterd_is_brick_started
21ab4e
+                                 * will give us the answer we need.  However,
21ab4e
+                                 * in our tests the brick gets detached behind
21ab4e
+                                 * our back, so we need to double-check this
21ab4e
+                                 * way.
21ab4e
+                                 */
21ab4e
+                                GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo,
21ab4e
+                                                            brickinfo, priv);
21ab4e
+                                if (!gf_is_service_running (pidfile, &pid)) {
21ab4e
+                                        continue;
21ab4e
+                                }
21ab4e
                                 pending_node = GF_CALLOC (1, sizeof (*pending_node),
21ab4e
                                                           gf_gld_mt_pending_node_t);
21ab4e
                                 if (!pending_node) {
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e