e7a346
From 4bf98e63a481aea6143e8f404aa4650f7a80e317 Mon Sep 17 00:00:00 2001
e7a346
From: Atin Mukherjee <amukherj@redhat.com>
e7a346
Date: Wed, 3 Jan 2018 14:29:51 +0530
e7a346
Subject: [PATCH 120/128] glusterd: connect to an existing brick process when
e7a346
 qourum status is NOT_APPLICABLE_QUORUM
e7a346
e7a346
First of all, this patch reverts commit 635c1c3 as the same is causing a
e7a346
regression with bricks not coming up on time when a node is rebooted.
e7a346
This patch tries to fix the problem in a different way by just trying to
e7a346
connect to an existing running brick when quorum status is not
e7a346
applicable.
e7a346
e7a346
> upstream patch : https://review.gluster.org/#/c/19134/
e7a346
e7a346
Change-Id: I0efb5901832824b1c15dcac529bffac85173e097
e7a346
BUG: 1509102
e7a346
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
e7a346
Reviewed-on: https://code.engineering.redhat.com/gerrit/126996
e7a346
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e7a346
---
e7a346
 xlators/mgmt/glusterd/src/glusterd-brick-ops.c     |  2 +-
e7a346
 xlators/mgmt/glusterd/src/glusterd-handshake.c     |  2 +-
e7a346
 xlators/mgmt/glusterd/src/glusterd-op-sm.c         |  1 +
e7a346
 xlators/mgmt/glusterd/src/glusterd-replace-brick.c |  3 ++-
e7a346
 xlators/mgmt/glusterd/src/glusterd-server-quorum.c | 27 ++++++++++++++++++----
e7a346
 xlators/mgmt/glusterd/src/glusterd-utils.c         | 13 +++++++----
e7a346
 xlators/mgmt/glusterd/src/glusterd-utils.h         |  3 ++-
e7a346
 xlators/mgmt/glusterd/src/glusterd-volume-ops.c    |  3 ++-
e7a346
 8 files changed, 40 insertions(+), 14 deletions(-)
e7a346
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
e7a346
index e88fa3f..416412e 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
e7a346
@@ -1554,7 +1554,7 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
e7a346
                         }
e7a346
                 }
e7a346
                 ret = glusterd_brick_start (volinfo, brickinfo,
e7a346
-                                            _gf_true);
e7a346
+                                            _gf_true, _gf_false);
e7a346
                 if (ret)
e7a346
                         goto out;
e7a346
                 i++;
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
e7a346
index 35aeca3..3d1dfb2 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
e7a346
@@ -658,7 +658,7 @@ glusterd_create_missed_snap (glusterd_missed_snap_info *missed_snapinfo,
e7a346
         }
e7a346
 
e7a346
         brickinfo->snap_status = 0;
e7a346
-        ret = glusterd_brick_start (snap_vol, brickinfo, _gf_false);
e7a346
+        ret = glusterd_brick_start (snap_vol, brickinfo, _gf_false, _gf_false);
e7a346
         if (ret) {
e7a346
                 gf_msg (this->name, GF_LOG_WARNING, 0,
e7a346
                         GD_MSG_BRICK_DISCONNECTED, "starting the "
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
e7a346
index 86f18f0..b1a6e06 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
e7a346
@@ -2437,6 +2437,7 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)
e7a346
                         pthread_mutex_lock (&brickinfo->restart_mutex);
e7a346
                         {
e7a346
                                 ret = glusterd_brick_start (volinfo, brickinfo,
e7a346
+                                                            _gf_false,
e7a346
                                                             _gf_false);
e7a346
                         }
e7a346
                         pthread_mutex_unlock (&brickinfo->restart_mutex);
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
e7a346
index b11adf1..a037323 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
e7a346
@@ -429,7 +429,8 @@ glusterd_op_perform_replace_brick (glusterd_volinfo_t  *volinfo,
e7a346
                 goto out;
e7a346
 
e7a346
         if (GLUSTERD_STATUS_STARTED == volinfo->status) {
e7a346
-                ret = glusterd_brick_start (volinfo, new_brickinfo, _gf_false);
e7a346
+                ret = glusterd_brick_start (volinfo, new_brickinfo, _gf_false,
e7a346
+                                            _gf_false);
e7a346
                 if (ret)
e7a346
                         goto out;
e7a346
         }
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
e7a346
index 995a568..b01bfaa 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
e7a346
@@ -314,6 +314,7 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
e7a346
         glusterd_brickinfo_t *brickinfo     = NULL;
e7a346
         gd_quorum_status_t   quorum_status  = NOT_APPLICABLE_QUORUM;
e7a346
         gf_boolean_t         follows_quorum = _gf_false;
e7a346
+        gf_boolean_t         quorum_status_unchanged = _gf_false;
e7a346
 
e7a346
         if (volinfo->status != GLUSTERD_STATUS_STARTED) {
e7a346
                 volinfo->quorum_status = NOT_APPLICABLE_QUORUM;
e7a346
@@ -341,9 +342,10 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
e7a346
          * the bricks that are down are brought up again. In this process it
e7a346
          * also brings up the brick that is purposefully taken down.
e7a346
          */
e7a346
-        if (quorum_status != NOT_APPLICABLE_QUORUM &&
e7a346
-            volinfo->quorum_status == quorum_status)
e7a346
+        if (volinfo->quorum_status == quorum_status) {
e7a346
+                quorum_status_unchanged = _gf_true;
e7a346
                 goto out;
e7a346
+        }
e7a346
 
e7a346
         if (quorum_status == MEETS_QUORUM) {
e7a346
                 gf_msg (this->name, GF_LOG_CRITICAL, 0,
e7a346
@@ -368,9 +370,10 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
e7a346
                         if (!brickinfo->start_triggered) {
e7a346
                                 pthread_mutex_lock (&brickinfo->restart_mutex);
e7a346
                                 {
e7a346
-                                        glusterd_brick_start (volinfo,
e7a346
-                                                              brickinfo,
e7a346
-                                                              _gf_false);
e7a346
+                                        ret = glusterd_brick_start (volinfo,
e7a346
+                                                                    brickinfo,
e7a346
+                                                                    _gf_false,
e7a346
+                                                                    _gf_false);
e7a346
                                 }
e7a346
                                 pthread_mutex_unlock (&brickinfo->restart_mutex);
e7a346
                         }
e7a346
@@ -392,6 +395,20 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
e7a346
                 }
e7a346
         }
e7a346
 out:
e7a346
+        if (quorum_status_unchanged) {
e7a346
+                list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
e7a346
+                        if (!glusterd_is_local_brick (this, volinfo, brickinfo))
e7a346
+                                continue;
e7a346
+                        ret = glusterd_brick_start (volinfo, brickinfo,
e7a346
+                                                    _gf_false, _gf_true);
e7a346
+                        if (ret) {
e7a346
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
+                                        GD_MSG_BRICK_DISCONNECTED, "Failed to "
e7a346
+                                        "connect to %s:%s", brickinfo->hostname,
e7a346
+                                        brickinfo->path);
e7a346
+                        }
e7a346
+                }
e7a346
+        }
e7a346
         return;
e7a346
 }
e7a346
 
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
e7a346
index 1b2cc43..f1b365f 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
e7a346
@@ -5796,7 +5796,8 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
e7a346
 int
e7a346
 glusterd_brick_start (glusterd_volinfo_t *volinfo,
e7a346
                       glusterd_brickinfo_t *brickinfo,
e7a346
-                      gf_boolean_t wait)
e7a346
+                      gf_boolean_t wait,
e7a346
+                      gf_boolean_t only_connect)
e7a346
 {
e7a346
         int                     ret   = -1;
e7a346
         xlator_t                *this = NULL;
e7a346
@@ -5847,7 +5848,9 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
e7a346
                 ret = 0;
e7a346
                 goto out;
e7a346
         }
e7a346
-        brickinfo->start_triggered = _gf_true;
e7a346
+        if (!only_connect)
e7a346
+                brickinfo->start_triggered = _gf_true;
e7a346
+
e7a346
         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
e7a346
         if (gf_is_service_running (pidfile, &pid)) {
e7a346
                 if (brickinfo->status != GF_BRICK_STARTING &&
e7a346
@@ -5905,6 +5908,8 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
e7a346
                 }
e7a346
                 return 0;
e7a346
         }
e7a346
+        if (only_connect)
e7a346
+                return 0;
e7a346
 
e7a346
 run:
e7a346
         ret = _mk_rundir_p (volinfo);
e7a346
@@ -6032,7 +6037,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
e7a346
                                         {
e7a346
                                                 glusterd_brick_start
e7a346
                                                          (volinfo, brickinfo,
e7a346
-                                                          _gf_false);
e7a346
+                                                          _gf_false, _gf_false);
e7a346
                                         }
e7a346
                                         pthread_mutex_unlock
e7a346
                                                 (&brickinfo->restart_mutex);
e7a346
@@ -6081,7 +6086,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
e7a346
                                         {
e7a346
                                                 glusterd_brick_start
e7a346
                                                          (volinfo, brickinfo,
e7a346
-                                                          _gf_false);
e7a346
+                                                          _gf_false, _gf_false);
e7a346
                                         }
e7a346
                                         pthread_mutex_unlock
e7a346
                                                 (&brickinfo->restart_mutex);
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
e7a346
index abaec4b..9194da0 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
e7a346
@@ -277,7 +277,8 @@ glusterd_all_volume_cond_check (glusterd_condition_func func, int status,
e7a346
 int
e7a346
 glusterd_brick_start (glusterd_volinfo_t *volinfo,
e7a346
                       glusterd_brickinfo_t *brickinfo,
e7a346
-                      gf_boolean_t wait);
e7a346
+                      gf_boolean_t wait,
e7a346
+                      gf_boolean_t only_connect);
e7a346
 int
e7a346
 glusterd_brick_stop (glusterd_volinfo_t *volinfo,
e7a346
                      glusterd_brickinfo_t *brickinfo,
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
e7a346
index de97e6a..414f9ba 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
e7a346
@@ -2564,7 +2564,8 @@ glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags,
e7a346
                 if (flags & GF_CLI_FLAG_OP_FORCE) {
e7a346
                         brickinfo->start_triggered = _gf_false;
e7a346
                 }
e7a346
-                ret = glusterd_brick_start (volinfo, brickinfo, wait);
e7a346
+                ret = glusterd_brick_start (volinfo, brickinfo, wait,
e7a346
+                                            _gf_false);
e7a346
                 /* If 'force' try to start all bricks regardless of success or
e7a346
                  * failure
e7a346
                  */
e7a346
-- 
e7a346
1.8.3.1
e7a346