|
|
cb8e9e |
From 12b3a9ec4c661ae6bc8fc546eb29114191c9c404 Mon Sep 17 00:00:00 2001
|
|
|
cb8e9e |
From: anand <anekkunt@redhat.com>
|
|
|
cb8e9e |
Date: Tue, 21 Jul 2015 15:42:24 +0530
|
|
|
cb8e9e |
Subject: [PATCH 255/279] glusterd: getting txn_id from frame->cookie in op_sm call back
|
|
|
cb8e9e |
|
|
|
cb8e9e |
RCA: If rebalance start is triggered from one node and one of other nodes in the cluster goes down simultaneously
|
|
|
cb8e9e |
we might end up in a case where callback will use the txn_id from priv->global_txn_id which is always zeros and
|
|
|
cb8e9e |
this means injecting an event with an incorrect txn_id will result into op-sm getting stuck.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
fix: set txn_id in frame->cookie during sumbit_and_request, so that we can get txn_id in call back
|
|
|
cb8e9e |
functions.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
>Change-Id: I519176c259ea9d37897791a77a7c92eb96d10052
|
|
|
cb8e9e |
>BUG: 1245142
|
|
|
cb8e9e |
>Signed-off-by: anand <anekkunt@redhat.com>
|
|
|
cb8e9e |
>Reviewed-on: http://review.gluster.org/11728
|
|
|
cb8e9e |
>Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
cb8e9e |
>Tested-by: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
cb8e9e |
>Tested-by: Gluster Build System <jenkins@build.gluster.com>
|
|
|
cb8e9e |
>Signed-off-by: Anand <anekkunt@redhat.com>
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Change-Id: I4f5df48890953651fc80bc3c3e66711b230d8fd1
|
|
|
cb8e9e |
BUG: 1244527
|
|
|
cb8e9e |
Signed-off-by: Anand <anekkunt@redhat.com>
|
|
|
cb8e9e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/55080
|
|
|
cb8e9e |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
cb8e9e |
Tested-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
cb8e9e |
---
|
|
|
cb8e9e |
tests/bugs/glusterd/bug-1245142-rebalance_test.t | 28 ++++++++
|
|
|
cb8e9e |
xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 77 +++++++++++++++++-----
|
|
|
cb8e9e |
2 files changed, 89 insertions(+), 16 deletions(-)
|
|
|
cb8e9e |
create mode 100644 tests/bugs/glusterd/bug-1245142-rebalance_test.t
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/tests/bugs/glusterd/bug-1245142-rebalance_test.t b/tests/bugs/glusterd/bug-1245142-rebalance_test.t
|
|
|
cb8e9e |
new file mode 100644
|
|
|
cb8e9e |
index 0000000..a28810e
|
|
|
cb8e9e |
--- /dev/null
|
|
|
cb8e9e |
+++ b/tests/bugs/glusterd/bug-1245142-rebalance_test.t
|
|
|
cb8e9e |
@@ -0,0 +1,28 @@
|
|
|
cb8e9e |
+#!/bin/bash
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+. $(dirname $0)/../../include.rc
|
|
|
cb8e9e |
+. $(dirname $0)/../../cluster.rc
|
|
|
cb8e9e |
+. $(dirname $0)/../../volume.rc
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+cleanup;
|
|
|
cb8e9e |
+TEST launch_cluster 2;
|
|
|
cb8e9e |
+TEST $CLI_1 peer probe $H2;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
|
|
|
cb8e9e |
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+$CLI_1 volume start $V0
|
|
|
cb8e9e |
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+$CLI_1 volume rebalance $V0 start &
|
|
|
cb8e9e |
+#kill glusterd2 after requst sent, so that call back is called
|
|
|
cb8e9e |
+#with rpc->status fail ,so roughly 1sec delay is introduced to get this scenario.
|
|
|
cb8e9e |
+sleep 1
|
|
|
cb8e9e |
+kill_glusterd 2
|
|
|
cb8e9e |
+#check glusterd commands are working after rebalance start command
|
|
|
cb8e9e |
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+cleanup;
|
|
|
cb8e9e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
|
|
|
cb8e9e |
index 0890b02..bcdffdc 100644
|
|
|
cb8e9e |
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
|
|
|
cb8e9e |
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
|
|
|
cb8e9e |
@@ -1131,14 +1131,17 @@ __glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov,
|
|
|
cb8e9e |
xlator_t *this = NULL;
|
|
|
cb8e9e |
glusterd_conf_t *priv = NULL;
|
|
|
cb8e9e |
uuid_t *txn_id = NULL;
|
|
|
cb8e9e |
+ call_frame_t *frame = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
this = THIS;
|
|
|
cb8e9e |
GF_ASSERT (this);
|
|
|
cb8e9e |
GF_ASSERT (req);
|
|
|
cb8e9e |
priv = this->private;
|
|
|
cb8e9e |
GF_ASSERT (priv);
|
|
|
cb8e9e |
+ GF_ASSERT(myframe);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- txn_id = &priv->global_txn_id;
|
|
|
cb8e9e |
+ frame = myframe;
|
|
|
cb8e9e |
+ txn_id = frame->cookie;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (-1 == req->rpc_status) {
|
|
|
cb8e9e |
rsp.op_ret = -1;
|
|
|
cb8e9e |
@@ -1196,10 +1199,6 @@ out:
|
|
|
cb8e9e |
uuid_utoa (rsp.uuid));
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id);
|
|
|
cb8e9e |
- gf_msg_debug (this->name, 0, "transaction ID = %s",
|
|
|
cb8e9e |
- uuid_utoa (*txn_id));
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
rcu_read_lock ();
|
|
|
cb8e9e |
peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL);
|
|
|
cb8e9e |
if (peerinfo == NULL) {
|
|
|
cb8e9e |
@@ -1246,6 +1245,7 @@ out:
|
|
|
cb8e9e |
} else {
|
|
|
cb8e9e |
free (rsp.dict.dict_val); //malloced by xdr
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
+ GF_FREE (frame->cookie);
|
|
|
cb8e9e |
GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
|
|
|
cb8e9e |
return ret;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
@@ -1274,14 +1274,17 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
|
|
|
cb8e9e |
glusterd_conf_t *priv = NULL;
|
|
|
cb8e9e |
uuid_t *txn_id = NULL;
|
|
|
cb8e9e |
glusterd_op_info_t txn_op_info = {{0},};
|
|
|
cb8e9e |
+ call_frame_t *frame = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
this = THIS;
|
|
|
cb8e9e |
GF_ASSERT (this);
|
|
|
cb8e9e |
GF_ASSERT (req);
|
|
|
cb8e9e |
priv = this->private;
|
|
|
cb8e9e |
GF_ASSERT (priv);
|
|
|
cb8e9e |
+ GF_ASSERT(myframe);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- txn_id = &priv->global_txn_id;
|
|
|
cb8e9e |
+ frame = myframe;
|
|
|
cb8e9e |
+ txn_id = frame->cookie;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (-1 == req->rpc_status) {
|
|
|
cb8e9e |
rsp.op_ret = -1;
|
|
|
cb8e9e |
@@ -1339,9 +1342,6 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
|
|
|
cb8e9e |
"Received commit ACC from uuid: %s",
|
|
|
cb8e9e |
uuid_utoa (rsp.uuid));
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
- ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id);
|
|
|
cb8e9e |
- gf_msg_debug (this->name, 0, "transaction ID = %s",
|
|
|
cb8e9e |
- uuid_utoa (*txn_id));
|
|
|
cb8e9e |
|
|
|
cb8e9e |
ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info);
|
|
|
cb8e9e |
if (ret) {
|
|
|
cb8e9e |
@@ -1414,6 +1414,7 @@ out:
|
|
|
cb8e9e |
if (dict)
|
|
|
cb8e9e |
dict_unref (dict);
|
|
|
cb8e9e |
free (rsp.op_errstr); //malloced by xdr
|
|
|
cb8e9e |
+ GF_FREE (frame->cookie);
|
|
|
cb8e9e |
GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
|
|
|
cb8e9e |
return ret;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
@@ -1898,9 +1899,9 @@ glusterd_stage_op (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
int ret = -1;
|
|
|
cb8e9e |
glusterd_peerinfo_t *peerinfo = NULL;
|
|
|
cb8e9e |
glusterd_conf_t *priv = NULL;
|
|
|
cb8e9e |
- call_frame_t *dummy_frame = NULL;
|
|
|
cb8e9e |
dict_t *dict = NULL;
|
|
|
cb8e9e |
gf_boolean_t is_alloc = _gf_true;
|
|
|
cb8e9e |
+ uuid_t *txn_id = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (!this) {
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
@@ -1930,13 +1931,34 @@ glusterd_stage_op (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
"to request buffer");
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
+ /* Sending valid transaction ID to peers */
|
|
|
cb8e9e |
+ ret = dict_get_bin (dict, "transaction_id",
|
|
|
cb8e9e |
+ (void **)&txn_id);
|
|
|
cb8e9e |
+ if (ret) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
+ GD_MSG_TRANS_ID_GET_FAIL,
|
|
|
cb8e9e |
+ "Failed to get transaction id.");
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+ } else {
|
|
|
cb8e9e |
+ gf_msg_debug (this->name, 0,
|
|
|
cb8e9e |
+ "Transaction_id = %s", uuid_utoa (*txn_id));
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ if (!frame)
|
|
|
cb8e9e |
+ frame = create_frame (this, this->ctx->pool);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- dummy_frame = create_frame (this, this->ctx->pool);
|
|
|
cb8e9e |
- if (!dummy_frame)
|
|
|
cb8e9e |
+ if (!frame) {
|
|
|
cb8e9e |
+ ret = -1;
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ frame->cookie = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
|
|
|
cb8e9e |
+ if (!frame->cookie) {
|
|
|
cb8e9e |
+ ret = -1;
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ gf_uuid_copy (frame->cookie, *txn_id);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
|
|
|
cb8e9e |
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame,
|
|
|
cb8e9e |
peerinfo->mgmt, GLUSTERD_MGMT_STAGE_OP,
|
|
|
cb8e9e |
NULL,
|
|
|
cb8e9e |
this, glusterd_stage_op_cbk,
|
|
|
cb8e9e |
@@ -1961,6 +1983,7 @@ glusterd_commit_op (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
call_frame_t *dummy_frame = NULL;
|
|
|
cb8e9e |
dict_t *dict = NULL;
|
|
|
cb8e9e |
gf_boolean_t is_alloc = _gf_true;
|
|
|
cb8e9e |
+ uuid_t *txn_id = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (!this) {
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
@@ -1989,12 +2012,34 @@ glusterd_commit_op (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
"request buffer");
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
+ /* Sending valid transaction ID to peers */
|
|
|
cb8e9e |
+ ret = dict_get_bin (dict, "transaction_id",
|
|
|
cb8e9e |
+ (void **)&txn_id);
|
|
|
cb8e9e |
+ if (ret) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
+ GD_MSG_TRANS_ID_GET_FAIL,
|
|
|
cb8e9e |
+ "Failed to get transaction id.");
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+ } else {
|
|
|
cb8e9e |
+ gf_msg_debug (this->name, 0,
|
|
|
cb8e9e |
+ "Transaction_id = %s", uuid_utoa (*txn_id));
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- dummy_frame = create_frame (this, this->ctx->pool);
|
|
|
cb8e9e |
- if (!dummy_frame)
|
|
|
cb8e9e |
+ if (!frame)
|
|
|
cb8e9e |
+ frame = create_frame (this, this->ctx->pool);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ if (!frame) {
|
|
|
cb8e9e |
+ ret = -1;
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ frame->cookie = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
|
|
|
cb8e9e |
+ if (!frame->cookie) {
|
|
|
cb8e9e |
+ ret = -1;
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ gf_uuid_copy (frame->cookie, *txn_id);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
|
|
|
cb8e9e |
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame,
|
|
|
cb8e9e |
peerinfo->mgmt, GLUSTERD_MGMT_COMMIT_OP,
|
|
|
cb8e9e |
NULL,
|
|
|
cb8e9e |
this, glusterd_commit_op_cbk,
|
|
|
cb8e9e |
--
|
|
|
cb8e9e |
1.7.1
|
|
|
cb8e9e |
|