Blob Blame History Raw
From 70d5e4931e07f82ceed53d2934167b1ebd74f2e9 Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Thu, 22 Jun 2017 16:57:04 +0530
Subject: [PATCH 531/539] glusterd: brick process fails to restart after
 gluster pod failure

Problem: In container environment sometime after delete gluster pod
         and created new gluster pod brick process doesn't seem
         to come up.

Solution: On the basis of logs it seems glusterd is trying to attach
          with non glusterfs process.Change the code of function
          glusterd_get_sock_from_brick_pid to fetch socketpath from argument
          of running brick process.

> BUG: 1464072
> Change-Id: Ida6af00066341b683bbb4440d7a0d8042581656a
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> Reviewed-on: https://review.gluster.org/17601
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
> (Cherry picked from commit b71059960f8c67d9a058244d2a1c748be4fe1323)

BUG: 1463221
Change-Id: I9ac21ee7150fb2f17157fab6dc6dde72f329d80e
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/110184
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 xlators/mgmt/glusterd/src/glusterd-utils.c | 41 ++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index fcb4340..18249ea 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -5334,10 +5334,11 @@ find_compatible_brick (glusterd_conf_t *conf,
 }
 
 /* Below function is use to populate sockpath based on passed pid
-   value as a argument after check the value from proc
+   value as a argument after check the value from proc and also
+   check if passed pid is match with running  glusterfs process
 */
 
-void
+int
 glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
 {
         char fname[128] = {0,};
@@ -5350,6 +5351,7 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
         char   *brptr   = NULL;
         char tmpsockpath[PATH_MAX] = {0,};
         size_t blen    = 0;
+        int    ret     = -1;
 
         this = THIS;
         GF_ASSERT (this);
@@ -5359,7 +5361,7 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
         if (sys_access (fname , R_OK) != 0) {
                 gf_log (this->name, GF_LOG_ERROR,
                          "brick process %d is not running", pid);
-                return;
+                return ret;
         }
 
         fd = open(fname, O_RDONLY);
@@ -5369,7 +5371,7 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
                 gf_log (this->name, GF_LOG_ERROR,
                          "open failed %s to open a file %s", strerror (errno),
                                                               fname);
-                return;
+                return ret;
         }
 
         /* convert cmdline to single string */
@@ -5388,10 +5390,18 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
         cmdline[j] = '\0';
         if (fd)
                 sys_close(fd);
+        if (!strstr (cmdline, "glusterfs"))
+                return ret;
 
-        ptr =   strstr(cmdline, "-S ");
-        ptr =   strchr(ptr, '/');
+        ptr = strstr(cmdline, "-S ");
+        if (!ptr)
+                return ret;
+        ptr = strchr(ptr, '/');
+        if (!ptr)
+                return ret;
         brptr = strstr(ptr, "--brick-name");
+        if (!brptr)
+                return ret;
         i = 0;
 
         while (ptr < brptr) {
@@ -5402,8 +5412,10 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
 
         if (tmpsockpath[0]) {
                 strncpy (sockpath, tmpsockpath , i);
+                ret = 0;
         }
 
+        return ret;
 }
 
 
@@ -5466,22 +5478,31 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
                          * same port (on another brick) and re-use that.
                          * TBD: re-use RPC connection across bricks
                          */
-                        if (is_brick_mx_enabled ())
-                                glusterd_get_sock_from_brick_pid (pid, socketpath,
-                                                                  sizeof(socketpath));
-                        else
+                        if (is_brick_mx_enabled ()) {
+                                ret = glusterd_get_sock_from_brick_pid (pid, socketpath,
+                                                                        sizeof(socketpath));
+                                if (ret) {
+                                        gf_log (this->name, GF_LOG_DEBUG,
+                                                "Either pid %d is not running or is not match"
+                                                " with any running brick process ", pid);
+                                        goto run;
+                                }
+                        } else {
                                 glusterd_set_brick_socket_filepath (volinfo, brickinfo,
                                                                     socketpath,
                                                                     sizeof (socketpath));
+                        }
                         gf_log (this->name, GF_LOG_DEBUG,
                                 "Using %s as sockfile for brick %s of volume %s ",
                                 socketpath, brickinfo->path, volinfo->volname);
+
                         (void) glusterd_brick_connect (volinfo, brickinfo,
                                         socketpath);
                 }
                 return 0;
         }
 
+run:
         ret = _mk_rundir_p (volinfo);
         if (ret)
                 goto out;
-- 
1.8.3.1