d1681e
From 029fbbdaa7c4ddcc2479f507345a5c3ab1035313 Mon Sep 17 00:00:00 2001
d1681e
From: Ravishankar N <ravishankar@redhat.com>
d1681e
Date: Mon, 2 Jul 2018 16:05:39 +0530
d1681e
Subject: [PATCH 306/325] glusterfsd: Do not process GLUSTERD_BRICK_XLATOR_OP
d1681e
 if graph is not ready
d1681e
d1681e
Patch in upstream master: https://review.gluster.org/#/c/20435/
d1681e
Patch in release-3.12: https://review.gluster.org/#/c/20436/
d1681e
d1681e
Problem:
d1681e
If glustershd gets restarted by glusterd due to node reboot/volume start force/
d1681e
or any thing that changes shd graph (add/remove brick), and index heal
d1681e
is launched via CLI, there can be a chance that shd receives this IPC
d1681e
before the graph is fully active. Thus when it accesses
d1681e
glusterfsd_ctx->active, it crashes.
d1681e
d1681e
Fix:
d1681e
Since glusterd does not really wait for the daemons it spawned to be
d1681e
fully initialized and can send the request as soon as rpc initialization has
d1681e
succeeded, we just handle it at shd. If glusterfs_graph_activate() is
d1681e
not yet done in shd but glusterd sends GD_OP_HEAL_VOLUME to shd,
d1681e
we fail the request.
d1681e
d1681e
Change-Id: If6cc07bc5455c4ba03458a36c28b63664496b17d
d1681e
BUG: 1593865
d1681e
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
d1681e
Reviewed-on: https://code.engineering.redhat.com/gerrit/143097
d1681e
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d1681e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
d1681e
---
d1681e
 glusterfsd/src/glusterfsd-messages.h | 4 +++-
d1681e
 glusterfsd/src/glusterfsd-mgmt.c     | 6 ++++++
d1681e
 2 files changed, 9 insertions(+), 1 deletion(-)
d1681e
d1681e
diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h
d1681e
index e9c28f7..e38a88b 100644
d1681e
--- a/glusterfsd/src/glusterfsd-messages.h
d1681e
+++ b/glusterfsd/src/glusterfsd-messages.h
d1681e
@@ -36,7 +36,7 @@
d1681e
  */
d1681e
 
d1681e
 #define GLFS_COMP_BASE          GLFS_MSGID_COMP_GLUSTERFSD
d1681e
-#define GLFS_NUM_MESSAGES       37
d1681e
+#define GLFS_NUM_MESSAGES       38
d1681e
 #define GLFS_MSGID_END          (GLFS_COMP_BASE + GLFS_NUM_MESSAGES + 1)
d1681e
 /* Messaged with message IDs */
d1681e
 #define glfs_msg_start_x GLFS_COMP_BASE, "Invalid: Start of messages"
d1681e
@@ -109,6 +109,8 @@
d1681e
 #define glusterfsd_msg_36 (GLFS_COMP_BASE + 36), "problem in xlator " \
d1681e
                         " loading."
d1681e
 #define glusterfsd_msg_37 (GLFS_COMP_BASE + 37), "failed to get dict value"
d1681e
+#define glusterfsd_msg_38 (GLFS_COMP_BASE + 38), "Not processing brick-op no."\
d1681e
+                        " %d since volume graph is not yet active."
d1681e
 
d1681e
 /*------------*/
d1681e
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
d1681e
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
d1681e
index 665b62c..2167241 100644
d1681e
--- a/glusterfsd/src/glusterfsd-mgmt.c
d1681e
+++ b/glusterfsd/src/glusterfsd-mgmt.c
d1681e
@@ -790,6 +790,12 @@ glusterfs_handle_translator_op (rpcsvc_request_t *req)
d1681e
 
d1681e
         ctx = glusterfsd_ctx;
d1681e
         active = ctx->active;
d1681e
+        if (!active) {
d1681e
+                ret = -1;
d1681e
+                gf_msg (this->name, GF_LOG_ERROR, EAGAIN, glusterfsd_msg_38,
d1681e
+                        xlator_req.op);
d1681e
+                goto out;
d1681e
+        }
d1681e
         any = active->first;
d1681e
         input = dict_new ();
d1681e
         ret = dict_unserialize (xlator_req.input.input_val,
d1681e
-- 
d1681e
1.8.3.1
d1681e