Blob Blame History Raw
From 2cb67dea078a2d3f3f650e02eb91cedb3663f5d6 Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Mon, 14 Sep 2015 15:43:31 +0530
Subject: [PATCH 330/330] afr: perform replace-brick in a synctask

Patch in master: http://review.gluster.org/#/c/12169/
Patch in release-3.7: http://review.gluster.org/#/c/12172/

Problem:
replace-brick setxattr is not performed inside a synctask. This can lead
to hangs if the setxattr is executed by epoll thread, as the epoll
thread will be waiting for replies to come where as epoll thread is the
thread that needs to epoll_ctl for reading from socket and listen.

Fix:
Move replace-brick to synctask to prevent epoll thread hang.

This patch is in line with the fix performed in
http://review.gluster.org/#/c/12163/

Change-Id: I0e04fe549428a91b8ca5cfe359ce0526c6c0336d
BUG: 1262291
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/57735
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
 xlators/cluster/afr/src/afr-inode-write.c |   70 ++++++++++++++++++++++++-----
 xlators/cluster/afr/src/afr-mem-types.h   |    1 +
 xlators/cluster/afr/src/afr-messages.h    |   10 +++-
 xlators/cluster/afr/src/afr.h             |    6 +++
 4 files changed, 73 insertions(+), 14 deletions(-)

diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 01390f0..3968b24 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -1099,27 +1099,45 @@ out:
 }
 
 int
-_afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
-                           int rb_index)
+_afr_handle_replace_brick_cbk (int ret, call_frame_t *frame, void *opaque)
+{
+        afr_replace_brick_args_t *data = NULL;
+
+        data = opaque;
+        loc_wipe (&data->loc);
+        GF_FREE (data);
+        return 0;
+}
+
+int
+_afr_handle_replace_brick (void *opaque)
 {
 
         afr_local_t     *local          = NULL;
         afr_private_t   *priv           = NULL;
+        int              rb_index       = -1;
         int              ret            = -1;
         int              op_errno       = ENOMEM;
+        call_frame_t    *frame          = NULL;
+        xlator_t        *this           = NULL;
+        afr_replace_brick_args_t *data  = NULL;
 
+        data = opaque;
+        frame = data->frame;
+        rb_index = data->rb_index;
+        this = frame->this;
         priv = this->private;
 
         local = AFR_FRAME_INIT (frame, op_errno);
         if (!local)
                 goto out;
 
-        loc_copy (&local->loc, loc);
+        loc_copy (&local->loc, &data->loc);
 
         gf_log (this->name, GF_LOG_DEBUG, "Child being replaced is : %s",
                 priv->children[rb_index]->name);
 
-        ret = _afr_handle_replace_brick_type (this, frame, loc, rb_index,
+        ret = _afr_handle_replace_brick_type (this, frame, &local->loc, rb_index,
                                               AFR_METADATA_TRANSACTION);
         if (ret) {
                 op_errno = -ret;
@@ -1132,7 +1150,7 @@ _afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
         local->pending = NULL;
         local->xdata_req = NULL;
 
-        ret = _afr_handle_replace_brick_type (this, frame, loc, rb_index,
+        ret = _afr_handle_replace_brick_type (this, frame, &local->loc, rb_index,
                                               AFR_ENTRY_TRANSACTION);
         if (ret) {
                 op_errno = -ret;
@@ -1359,32 +1377,60 @@ afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
 {
         int             ret               = -1;
         int             rb_index          = -1;
+        int             op_errno          = EPERM;
         char           *replace_brick     = NULL;
+        afr_replace_brick_args_t *data    = NULL;
 
         ret =  dict_get_str (dict, GF_AFR_REPLACE_BRICK, &replace_brick);
 
         if (!ret) {
                 if (frame->root->pid != GF_CLIENT_PID_AFR_SELF_HEALD) {
+                        gf_log (this->name, GF_LOG_ERROR, "'%s' is an internal"
+                                " extended attribute : %s.",
+                                GF_AFR_REPLACE_BRICK, strerror (EPERM));
                         ret = 1;
                         goto out;
                 }
                 rb_index = afr_get_child_index_from_name (this, replace_brick);
 
-                if (rb_index < 0)
+                if (rb_index < 0) {
                          /* Didn't belong to this replica pair
                           * Just do a no-op
                           */
                         AFR_STACK_UNWIND (setxattr, frame, 0, 0, NULL);
-                else
-                        _afr_handle_replace_brick (this, frame, loc, rb_index);
+                        return 0;
+                } else {
+                        data = GF_CALLOC (1, sizeof (*data),
+                                          gf_afr_mt_replace_brick_t);
+                        if (!data) {
+                                ret = 1;
+                                op_errno = ENOMEM;
+                                goto out;
+                        }
+                        data->frame = frame;
+                        loc_copy (&data->loc, loc);
+                        data->rb_index = rb_index;
+                        ret = synctask_new (this->ctx->env,
+                                            _afr_handle_replace_brick,
+                                            _afr_handle_replace_brick_cbk,
+                                            NULL, data);
+                        if (ret) {
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
+                                        AFR_MSG_REPLACE_BRICK_FAILED,
+                                        "Failed to create synctask. Unable to "
+                                        "perform replace-brick.");
+                                ret = 1;
+                                op_errno = ENOMEM;
+                                loc_wipe (&data->loc);
+                                GF_FREE (data);
+                                goto out;
+                        }
+                }
                 ret = 0;
         }
 out:
         if (ret == 1) {
-                gf_log (this->name, GF_LOG_ERROR, "'%s' is an internal"
-                        " extended attribute : %s.",
-                        GF_AFR_REPLACE_BRICK, strerror (EPERM));
-                AFR_STACK_UNWIND (setxattr, frame, -1, EPERM, NULL);
+                AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
                 ret = 0;
         }
         return ret;
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index fd484e4..6f1eee9 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -45,6 +45,7 @@ enum gf_afr_mem_types_ {
 	gf_afr_mt_subvol_healer_t,
 	gf_afr_mt_spbc_timeout_t,
         gf_afr_mt_spb_status_t,
+        gf_afr_mt_replace_brick_t,
         gf_afr_mt_end
 };
 #endif
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index 4793413..d07c7bc 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -45,7 +45,7 @@
  */
 
 #define GLFS_COMP_BASE_AFR      GLFS_MSGID_COMP_AFR
-#define GLFS_NUM_MESSAGES       38
+#define GLFS_NUM_MESSAGES       39
 #define GLFS_MSGID_END          (GLFS_COMP_BASE_AFR + GLFS_NUM_MESSAGES + 1)
 
 #define glfs_msg_start_x GLFS_COMP_BASE_AFR, "Invalid: Start of messages"
@@ -332,7 +332,6 @@
 */
 #define AFR_MSG_SELF_HEAL_FAILED                (GLFS_COMP_BASE_AFR + 37)
 
-
 /*!
  * @messageid 108038
  * @diagnosis
@@ -340,6 +339,13 @@
 */
 #define AFR_MSG_SPLIT_BRAIN_STATUS      (GLFS_COMP_BASE_AFR + 38)
 
+/*!
+ * @messageid 108039
+ * @diagnosis
+ * @recommendedaction
+*/
+#define AFR_MSG_REPLACE_BRICK_FAILED                (GLFS_COMP_BASE_AFR + 39)
+
 
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
 #endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 316eeea..19e764a 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -760,6 +760,12 @@ typedef struct afr_spb_status {
         loc_t        *loc;
 } afr_spb_status_t;
 
+typedef struct afr_replace_brick_args {
+        call_frame_t *frame;
+        loc_t loc;
+        int rb_index;
+} afr_replace_brick_args_t;
+
 typedef struct afr_read_subvol_args {
         ia_type_t ia_type;
         uuid_t gfid;
-- 
1.7.1