e7a346
From 16b5177ca40cb5f8b37ea5331003ecc7dbb6d992 Mon Sep 17 00:00:00 2001
e7a346
From: Atin Mukherjee <amukherj@redhat.com>
e7a346
Date: Tue, 7 Aug 2018 10:25:49 +0530
e7a346
Subject: [PATCH 342/351] glusterd: more stricter checks of if brick is running
e7a346
 in multiplex mode
e7a346
e7a346
While gf_attach () utility can help in detaching a brick instance from
e7a346
the brick process which the kill_brick () function in tests/volume.rc
e7a346
uses it has a caveat which is as follows:
e7a346
1. It doesn't ensure the respective brick is marked as stopped which
e7a346
glusterd does from glusterd_brick_stop
e7a346
2. Sometimes if kill_brick () is executed just after a brick stack is
e7a346
up, the mgmt_rpc_notify () can take some time before marking
e7a346
priv->connected to 1 and before it if kill_brick () is executed, brick
e7a346
will fail to initiate the pmap_signout which would inturn cleans up the
e7a346
pidfile.
e7a346
e7a346
To avoid such possibilities, a more stricter check on if a brick is
e7a346
running or not in brick multiplexing has been brought in now where it
e7a346
not only checks for its pid's existance but checks if the respective
e7a346
process has the brick instance associated with it before checking for
e7a346
brick's status.
e7a346
e7a346
> Change-Id: I98b92df949076663b9686add7aab4ec2f24ad5ab
e7a346
> Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
e7a346
> (cherry pick from commit bb9b8f61501cc633e585593de4d9f2fe5494d5ce)
e7a346
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/20651/)
e7a346
e7a346
Change-Id: I47b70927c839ca4828a0499006b5c1f604d3d6a4
e7a346
BUG: 1612098
e7a346
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
e7a346
Reviewed-on: https://code.engineering.redhat.com/gerrit/146874
e7a346
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
e7a346
Tested-by: Atin Mukherjee <amukherj@redhat.com>
e7a346
---
e7a346
 xlators/mgmt/glusterd/src/glusterd-utils.c | 71 ++++++++++++++++--------------
e7a346
 1 file changed, 39 insertions(+), 32 deletions(-)
e7a346
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
e7a346
index 372d5f4..6f7c787 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
e7a346
@@ -6083,6 +6083,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
e7a346
         char                    *brickpath            = NULL;
e7a346
         glusterd_volinfo_t      *other_vol;
e7a346
         struct statvfs           brickstat = {0,};
e7a346
+        gf_boolean_t             is_service_running = _gf_false;
e7a346
 
e7a346
         this = THIS;
e7a346
         GF_ASSERT (this);
e7a346
@@ -6149,8 +6150,39 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
e7a346
                         brickinfo->path);
e7a346
                 goto out;
e7a346
         }
e7a346
-
e7a346
-        if (gf_is_service_running (pidfile, &pid)) {
e7a346
+        is_service_running = gf_is_service_running (pidfile, &pid;;
e7a346
+        if (is_service_running) {
e7a346
+                if (is_brick_mx_enabled ()) {
e7a346
+                        brickpath = search_brick_path_from_proc
e7a346
+                                                (pid, brickinfo->path);
e7a346
+                        if (!brickpath) {
e7a346
+                                gf_log (this->name, GF_LOG_INFO,
e7a346
+                                        "Either pid %d is not running or brick"
e7a346
+                                        " path %s is not consumed so cleanup pidfile",
e7a346
+                                        pid, brickinfo->path);
e7a346
+                                /* brick isn't running,so unlink stale pidfile
e7a346
+                                 * if any.
e7a346
+                                 */
e7a346
+                                if (sys_access (pidfile , R_OK) == 0) {
e7a346
+                                        sys_unlink (pidfile);
e7a346
+                                }
e7a346
+                                goto run;
e7a346
+                        }
e7a346
+                        GF_FREE (brickpath);
e7a346
+                        ret = glusterd_get_sock_from_brick_pid (pid, socketpath,
e7a346
+                                                                sizeof(socketpath));
e7a346
+                        if (ret) {
e7a346
+                                gf_log (this->name, GF_LOG_INFO,
e7a346
+                                        "Either pid %d is not running or does "
e7a346
+                                        "not match with any running brick "
e7a346
+                                        "processes", pid);
e7a346
+                                /* Fetch unix socket is failed so unlink pidfile */
e7a346
+                                if (sys_access (pidfile , R_OK) == 0) {
e7a346
+                                        sys_unlink (pidfile);
e7a346
+                                }
e7a346
+                                goto run;
e7a346
+                        }
e7a346
+                }
e7a346
                 if (brickinfo->status != GF_BRICK_STARTING &&
e7a346
                     brickinfo->status != GF_BRICK_STARTED) {
e7a346
                         gf_log (this->name, GF_LOG_INFO,
e7a346
@@ -6168,36 +6200,11 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
e7a346
                          * same port (on another brick) and re-use that.
e7a346
                          * TBD: re-use RPC connection across bricks
e7a346
                          */
e7a346
-                        if (is_brick_mx_enabled ()) {
e7a346
-                                brickpath = search_brick_path_from_proc (pid, brickinfo->path);
e7a346
-                                if (!brickpath) {
e7a346
-                                        gf_log (this->name, GF_LOG_INFO,
e7a346
-                                                "Either pid %d is not running or brick"
e7a346
-                                                " path %s is not consumed so cleanup pidfile",
e7a346
-                                                pid, brickinfo->path);
e7a346
-                                        /* search brick is failed so unlink pidfile */
e7a346
-                                        if (sys_access (pidfile , R_OK) == 0) {
e7a346
-                                                sys_unlink (pidfile);
e7a346
-                                        }
e7a346
-                                        goto run;
e7a346
-                                }
e7a346
-                                GF_FREE (brickpath);
e7a346
-                                ret = glusterd_get_sock_from_brick_pid (pid, socketpath,
e7a346
-                                                                        sizeof(socketpath));
e7a346
-                                if (ret) {
e7a346
-                                        gf_log (this->name, GF_LOG_INFO,
e7a346
-                                                "Either pid %d is not running or is not match"
e7a346
-                                                " with any running brick process ", pid);
e7a346
-                                        /* Fetch unix socket is failed so unlink pidfile */
e7a346
-                                        if (sys_access (pidfile , R_OK) == 0) {
e7a346
-                                                sys_unlink (pidfile);
e7a346
-                                        }
e7a346
-                                        goto run;
e7a346
-                                }
e7a346
-                        } else {
e7a346
-                                glusterd_set_brick_socket_filepath (volinfo, brickinfo,
e7a346
-                                                                    socketpath,
e7a346
-                                                                    sizeof (socketpath));
e7a346
+                        if (!is_brick_mx_enabled ()) {
e7a346
+                                        glusterd_set_brick_socket_filepath
e7a346
+                                                (volinfo, brickinfo,
e7a346
+                                                 socketpath,
e7a346
+                                                 sizeof (socketpath));
e7a346
                         }
e7a346
                         gf_log (this->name, GF_LOG_DEBUG,
e7a346
                                 "Using %s as sockfile for brick %s of volume %s ",
e7a346
-- 
e7a346
1.8.3.1
e7a346