21ab4e
From 70d5e4931e07f82ceed53d2934167b1ebd74f2e9 Mon Sep 17 00:00:00 2001
21ab4e
From: Mohit Agrawal <moagrawa@redhat.com>
21ab4e
Date: Thu, 22 Jun 2017 16:57:04 +0530
21ab4e
Subject: [PATCH 531/539] glusterd: brick process fails to restart after
21ab4e
 gluster pod failure
21ab4e
21ab4e
Problem: In container environment sometime after delete gluster pod
21ab4e
         and created new gluster pod brick process doesn't seem
21ab4e
         to come up.
21ab4e
21ab4e
Solution: On the basis of logs it seems glusterd is trying to attach
21ab4e
          with non glusterfs process.Change the code of function
21ab4e
          glusterd_get_sock_from_brick_pid to fetch socketpath from argument
21ab4e
          of running brick process.
21ab4e
21ab4e
> BUG: 1464072
21ab4e
> Change-Id: Ida6af00066341b683bbb4440d7a0d8042581656a
21ab4e
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
21ab4e
> Reviewed-on: https://review.gluster.org/17601
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
> (Cherry picked from commit b71059960f8c67d9a058244d2a1c748be4fe1323)
21ab4e
21ab4e
BUG: 1463221
21ab4e
Change-Id: I9ac21ee7150fb2f17157fab6dc6dde72f329d80e
21ab4e
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/110184
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 xlators/mgmt/glusterd/src/glusterd-utils.c | 41 ++++++++++++++++++++++--------
21ab4e
 1 file changed, 31 insertions(+), 10 deletions(-)
21ab4e
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
index fcb4340..18249ea 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
@@ -5334,10 +5334,11 @@ find_compatible_brick (glusterd_conf_t *conf,
21ab4e
 }
21ab4e
 
21ab4e
 /* Below function is use to populate sockpath based on passed pid
21ab4e
-   value as a argument after check the value from proc
21ab4e
+   value as a argument after check the value from proc and also
21ab4e
+   check if passed pid is match with running  glusterfs process
21ab4e
 */
21ab4e
 
21ab4e
-void
21ab4e
+int
21ab4e
 glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
21ab4e
 {
21ab4e
         char fname[128] = {0,};
21ab4e
@@ -5350,6 +5351,7 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
21ab4e
         char   *brptr   = NULL;
21ab4e
         char tmpsockpath[PATH_MAX] = {0,};
21ab4e
         size_t blen    = 0;
21ab4e
+        int    ret     = -1;
21ab4e
 
21ab4e
         this = THIS;
21ab4e
         GF_ASSERT (this);
21ab4e
@@ -5359,7 +5361,7 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
21ab4e
         if (sys_access (fname , R_OK) != 0) {
21ab4e
                 gf_log (this->name, GF_LOG_ERROR,
21ab4e
                          "brick process %d is not running", pid);
21ab4e
-                return;
21ab4e
+                return ret;
21ab4e
         }
21ab4e
 
21ab4e
         fd = open(fname, O_RDONLY);
21ab4e
@@ -5369,7 +5371,7 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
21ab4e
                 gf_log (this->name, GF_LOG_ERROR,
21ab4e
                          "open failed %s to open a file %s", strerror (errno),
21ab4e
                                                               fname);
21ab4e
-                return;
21ab4e
+                return ret;
21ab4e
         }
21ab4e
 
21ab4e
         /* convert cmdline to single string */
21ab4e
@@ -5388,10 +5390,18 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
21ab4e
         cmdline[j] = '\0';
21ab4e
         if (fd)
21ab4e
                 sys_close(fd);
21ab4e
+        if (!strstr (cmdline, "glusterfs"))
21ab4e
+                return ret;
21ab4e
 
21ab4e
-        ptr =   strstr(cmdline, "-S ");
21ab4e
-        ptr =   strchr(ptr, '/');
21ab4e
+        ptr = strstr(cmdline, "-S ");
21ab4e
+        if (!ptr)
21ab4e
+                return ret;
21ab4e
+        ptr = strchr(ptr, '/');
21ab4e
+        if (!ptr)
21ab4e
+                return ret;
21ab4e
         brptr = strstr(ptr, "--brick-name");
21ab4e
+        if (!brptr)
21ab4e
+                return ret;
21ab4e
         i = 0;
21ab4e
 
21ab4e
         while (ptr < brptr) {
21ab4e
@@ -5402,8 +5412,10 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
21ab4e
 
21ab4e
         if (tmpsockpath[0]) {
21ab4e
                 strncpy (sockpath, tmpsockpath , i);
21ab4e
+                ret = 0;
21ab4e
         }
21ab4e
 
21ab4e
+        return ret;
21ab4e
 }
21ab4e
 
21ab4e
 
21ab4e
@@ -5466,22 +5478,31 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
21ab4e
                          * same port (on another brick) and re-use that.
21ab4e
                          * TBD: re-use RPC connection across bricks
21ab4e
                          */
21ab4e
-                        if (is_brick_mx_enabled ())
21ab4e
-                                glusterd_get_sock_from_brick_pid (pid, socketpath,
21ab4e
-                                                                  sizeof(socketpath));
21ab4e
-                        else
21ab4e
+                        if (is_brick_mx_enabled ()) {
21ab4e
+                                ret = glusterd_get_sock_from_brick_pid (pid, socketpath,
21ab4e
+                                                                        sizeof(socketpath));
21ab4e
+                                if (ret) {
21ab4e
+                                        gf_log (this->name, GF_LOG_DEBUG,
21ab4e
+                                                "Either pid %d is not running or is not match"
21ab4e
+                                                " with any running brick process ", pid);
21ab4e
+                                        goto run;
21ab4e
+                                }
21ab4e
+                        } else {
21ab4e
                                 glusterd_set_brick_socket_filepath (volinfo, brickinfo,
21ab4e
                                                                     socketpath,
21ab4e
                                                                     sizeof (socketpath));
21ab4e
+                        }
21ab4e
                         gf_log (this->name, GF_LOG_DEBUG,
21ab4e
                                 "Using %s as sockfile for brick %s of volume %s ",
21ab4e
                                 socketpath, brickinfo->path, volinfo->volname);
21ab4e
+
21ab4e
                         (void) glusterd_brick_connect (volinfo, brickinfo,
21ab4e
                                         socketpath);
21ab4e
                 }
21ab4e
                 return 0;
21ab4e
         }
21ab4e
 
21ab4e
+run:
21ab4e
         ret = _mk_rundir_p (volinfo);
21ab4e
         if (ret)
21ab4e
                 goto out;
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e