|
|
cb8e9e |
From 2cb67dea078a2d3f3f650e02eb91cedb3663f5d6 Mon Sep 17 00:00:00 2001
|
|
|
cb8e9e |
From: Ravishankar N <ravishankar@redhat.com>
|
|
|
cb8e9e |
Date: Mon, 14 Sep 2015 15:43:31 +0530
|
|
|
cb8e9e |
Subject: [PATCH 330/330] afr: perform replace-brick in a synctask
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Patch in master: http://review.gluster.org/#/c/12169/
|
|
|
cb8e9e |
Patch in release-3.7: http://review.gluster.org/#/c/12172/
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Problem:
|
|
|
cb8e9e |
replace-brick setxattr is not performed inside a synctask. This can lead
|
|
|
cb8e9e |
to hangs if the setxattr is executed by epoll thread, as the epoll
|
|
|
cb8e9e |
thread will be waiting for replies to come where as epoll thread is the
|
|
|
cb8e9e |
thread that needs to epoll_ctl for reading from socket and listen.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Fix:
|
|
|
cb8e9e |
Move replace-brick to synctask to prevent epoll thread hang.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
This patch is in line with the fix performed in
|
|
|
cb8e9e |
http://review.gluster.org/#/c/12163/
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Change-Id: I0e04fe549428a91b8ca5cfe359ce0526c6c0336d
|
|
|
cb8e9e |
BUG: 1262291
|
|
|
cb8e9e |
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
cb8e9e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/57735
|
|
|
cb8e9e |
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
cb8e9e |
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
cb8e9e |
---
|
|
|
cb8e9e |
xlators/cluster/afr/src/afr-inode-write.c | 70 ++++++++++++++++++++++++-----
|
|
|
cb8e9e |
xlators/cluster/afr/src/afr-mem-types.h | 1 +
|
|
|
cb8e9e |
xlators/cluster/afr/src/afr-messages.h | 10 +++-
|
|
|
cb8e9e |
xlators/cluster/afr/src/afr.h | 6 +++
|
|
|
cb8e9e |
4 files changed, 73 insertions(+), 14 deletions(-)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
|
|
|
cb8e9e |
index 01390f0..3968b24 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/afr/src/afr-inode-write.c
|
|
|
cb8e9e |
+++ b/xlators/cluster/afr/src/afr-inode-write.c
|
|
|
cb8e9e |
@@ -1099,27 +1099,45 @@ out:
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
int
|
|
|
cb8e9e |
-_afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
|
|
|
cb8e9e |
- int rb_index)
|
|
|
cb8e9e |
+_afr_handle_replace_brick_cbk (int ret, call_frame_t *frame, void *opaque)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ afr_replace_brick_args_t *data = NULL;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ data = opaque;
|
|
|
cb8e9e |
+ loc_wipe (&data->loc);
|
|
|
cb8e9e |
+ GF_FREE (data);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+int
|
|
|
cb8e9e |
+_afr_handle_replace_brick (void *opaque)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
|
|
|
cb8e9e |
afr_local_t *local = NULL;
|
|
|
cb8e9e |
afr_private_t *priv = NULL;
|
|
|
cb8e9e |
+ int rb_index = -1;
|
|
|
cb8e9e |
int ret = -1;
|
|
|
cb8e9e |
int op_errno = ENOMEM;
|
|
|
cb8e9e |
+ call_frame_t *frame = NULL;
|
|
|
cb8e9e |
+ xlator_t *this = NULL;
|
|
|
cb8e9e |
+ afr_replace_brick_args_t *data = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ data = opaque;
|
|
|
cb8e9e |
+ frame = data->frame;
|
|
|
cb8e9e |
+ rb_index = data->rb_index;
|
|
|
cb8e9e |
+ this = frame->this;
|
|
|
cb8e9e |
priv = this->private;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
local = AFR_FRAME_INIT (frame, op_errno);
|
|
|
cb8e9e |
if (!local)
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- loc_copy (&local->loc, loc);
|
|
|
cb8e9e |
+ loc_copy (&local->loc, &data->loc);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
gf_log (this->name, GF_LOG_DEBUG, "Child being replaced is : %s",
|
|
|
cb8e9e |
priv->children[rb_index]->name);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- ret = _afr_handle_replace_brick_type (this, frame, loc, rb_index,
|
|
|
cb8e9e |
+ ret = _afr_handle_replace_brick_type (this, frame, &local->loc, rb_index,
|
|
|
cb8e9e |
AFR_METADATA_TRANSACTION);
|
|
|
cb8e9e |
if (ret) {
|
|
|
cb8e9e |
op_errno = -ret;
|
|
|
cb8e9e |
@@ -1132,7 +1150,7 @@ _afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
|
|
|
cb8e9e |
local->pending = NULL;
|
|
|
cb8e9e |
local->xdata_req = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- ret = _afr_handle_replace_brick_type (this, frame, loc, rb_index,
|
|
|
cb8e9e |
+ ret = _afr_handle_replace_brick_type (this, frame, &local->loc, rb_index,
|
|
|
cb8e9e |
AFR_ENTRY_TRANSACTION);
|
|
|
cb8e9e |
if (ret) {
|
|
|
cb8e9e |
op_errno = -ret;
|
|
|
cb8e9e |
@@ -1359,32 +1377,60 @@ afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
int ret = -1;
|
|
|
cb8e9e |
int rb_index = -1;
|
|
|
cb8e9e |
+ int op_errno = EPERM;
|
|
|
cb8e9e |
char *replace_brick = NULL;
|
|
|
cb8e9e |
+ afr_replace_brick_args_t *data = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
ret = dict_get_str (dict, GF_AFR_REPLACE_BRICK, &replace_brick);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (!ret) {
|
|
|
cb8e9e |
if (frame->root->pid != GF_CLIENT_PID_AFR_SELF_HEALD) {
|
|
|
cb8e9e |
+ gf_log (this->name, GF_LOG_ERROR, "'%s' is an internal"
|
|
|
cb8e9e |
+ " extended attribute : %s.",
|
|
|
cb8e9e |
+ GF_AFR_REPLACE_BRICK, strerror (EPERM));
|
|
|
cb8e9e |
ret = 1;
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
rb_index = afr_get_child_index_from_name (this, replace_brick);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if (rb_index < 0)
|
|
|
cb8e9e |
+ if (rb_index < 0) {
|
|
|
cb8e9e |
/* Didn't belong to this replica pair
|
|
|
cb8e9e |
* Just do a no-op
|
|
|
cb8e9e |
*/
|
|
|
cb8e9e |
AFR_STACK_UNWIND (setxattr, frame, 0, 0, NULL);
|
|
|
cb8e9e |
- else
|
|
|
cb8e9e |
- _afr_handle_replace_brick (this, frame, loc, rb_index);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
+ } else {
|
|
|
cb8e9e |
+ data = GF_CALLOC (1, sizeof (*data),
|
|
|
cb8e9e |
+ gf_afr_mt_replace_brick_t);
|
|
|
cb8e9e |
+ if (!data) {
|
|
|
cb8e9e |
+ ret = 1;
|
|
|
cb8e9e |
+ op_errno = ENOMEM;
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ data->frame = frame;
|
|
|
cb8e9e |
+ loc_copy (&data->loc, loc);
|
|
|
cb8e9e |
+ data->rb_index = rb_index;
|
|
|
cb8e9e |
+ ret = synctask_new (this->ctx->env,
|
|
|
cb8e9e |
+ _afr_handle_replace_brick,
|
|
|
cb8e9e |
+ _afr_handle_replace_brick_cbk,
|
|
|
cb8e9e |
+ NULL, data);
|
|
|
cb8e9e |
+ if (ret) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
+ AFR_MSG_REPLACE_BRICK_FAILED,
|
|
|
cb8e9e |
+ "Failed to create synctask. Unable to "
|
|
|
cb8e9e |
+ "perform replace-brick.");
|
|
|
cb8e9e |
+ ret = 1;
|
|
|
cb8e9e |
+ op_errno = ENOMEM;
|
|
|
cb8e9e |
+ loc_wipe (&data->loc);
|
|
|
cb8e9e |
+ GF_FREE (data);
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
ret = 0;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
out:
|
|
|
cb8e9e |
if (ret == 1) {
|
|
|
cb8e9e |
- gf_log (this->name, GF_LOG_ERROR, "'%s' is an internal"
|
|
|
cb8e9e |
- " extended attribute : %s.",
|
|
|
cb8e9e |
- GF_AFR_REPLACE_BRICK, strerror (EPERM));
|
|
|
cb8e9e |
- AFR_STACK_UNWIND (setxattr, frame, -1, EPERM, NULL);
|
|
|
cb8e9e |
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
|
|
|
cb8e9e |
ret = 0;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
return ret;
|
|
|
cb8e9e |
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
|
|
|
cb8e9e |
index fd484e4..6f1eee9 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/afr/src/afr-mem-types.h
|
|
|
cb8e9e |
+++ b/xlators/cluster/afr/src/afr-mem-types.h
|
|
|
cb8e9e |
@@ -45,6 +45,7 @@ enum gf_afr_mem_types_ {
|
|
|
cb8e9e |
gf_afr_mt_subvol_healer_t,
|
|
|
cb8e9e |
gf_afr_mt_spbc_timeout_t,
|
|
|
cb8e9e |
gf_afr_mt_spb_status_t,
|
|
|
cb8e9e |
+ gf_afr_mt_replace_brick_t,
|
|
|
cb8e9e |
gf_afr_mt_end
|
|
|
cb8e9e |
};
|
|
|
cb8e9e |
#endif
|
|
|
cb8e9e |
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
|
|
|
cb8e9e |
index 4793413..d07c7bc 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/afr/src/afr-messages.h
|
|
|
cb8e9e |
+++ b/xlators/cluster/afr/src/afr-messages.h
|
|
|
cb8e9e |
@@ -45,7 +45,7 @@
|
|
|
cb8e9e |
*/
|
|
|
cb8e9e |
|
|
|
cb8e9e |
#define GLFS_COMP_BASE_AFR GLFS_MSGID_COMP_AFR
|
|
|
cb8e9e |
-#define GLFS_NUM_MESSAGES 38
|
|
|
cb8e9e |
+#define GLFS_NUM_MESSAGES 39
|
|
|
cb8e9e |
#define GLFS_MSGID_END (GLFS_COMP_BASE_AFR + GLFS_NUM_MESSAGES + 1)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
#define glfs_msg_start_x GLFS_COMP_BASE_AFR, "Invalid: Start of messages"
|
|
|
cb8e9e |
@@ -332,7 +332,6 @@
|
|
|
cb8e9e |
*/
|
|
|
cb8e9e |
#define AFR_MSG_SELF_HEAL_FAILED (GLFS_COMP_BASE_AFR + 37)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
/*!
|
|
|
cb8e9e |
* @messageid 108038
|
|
|
cb8e9e |
* @diagnosis
|
|
|
cb8e9e |
@@ -340,6 +339,13 @@
|
|
|
cb8e9e |
*/
|
|
|
cb8e9e |
#define AFR_MSG_SPLIT_BRAIN_STATUS (GLFS_COMP_BASE_AFR + 38)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+/*!
|
|
|
cb8e9e |
+ * @messageid 108039
|
|
|
cb8e9e |
+ * @diagnosis
|
|
|
cb8e9e |
+ * @recommendedaction
|
|
|
cb8e9e |
+*/
|
|
|
cb8e9e |
+#define AFR_MSG_REPLACE_BRICK_FAILED (GLFS_COMP_BASE_AFR + 39)
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
|
|
|
cb8e9e |
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
|
|
|
cb8e9e |
#endif /* !_AFR_MESSAGES_H_ */
|
|
|
cb8e9e |
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
|
|
|
cb8e9e |
index 316eeea..19e764a 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/afr/src/afr.h
|
|
|
cb8e9e |
+++ b/xlators/cluster/afr/src/afr.h
|
|
|
cb8e9e |
@@ -760,6 +760,12 @@ typedef struct afr_spb_status {
|
|
|
cb8e9e |
loc_t *loc;
|
|
|
cb8e9e |
} afr_spb_status_t;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+typedef struct afr_replace_brick_args {
|
|
|
cb8e9e |
+ call_frame_t *frame;
|
|
|
cb8e9e |
+ loc_t loc;
|
|
|
cb8e9e |
+ int rb_index;
|
|
|
cb8e9e |
+} afr_replace_brick_args_t;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
typedef struct afr_read_subvol_args {
|
|
|
cb8e9e |
ia_type_t ia_type;
|
|
|
cb8e9e |
uuid_t gfid;
|
|
|
cb8e9e |
--
|
|
|
cb8e9e |
1.7.1
|
|
|
cb8e9e |
|