|
|
21ab4e |
From 615dd37c770519bf3a91445187afcbf6ab9922e5 Mon Sep 17 00:00:00 2001
|
|
|
21ab4e |
From: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
Date: Thu, 1 Jun 2017 22:05:51 +0530
|
|
|
21ab4e |
Subject: [PATCH 489/509] glusterd: fix brick start race
|
|
|
21ab4e |
|
|
|
21ab4e |
This commit tries to handle a race where we might end up trying to spawn
|
|
|
21ab4e |
the brick process twice with two different set of ports resulting into
|
|
|
21ab4e |
glusterd portmapper having the same brick entry in two different ports
|
|
|
21ab4e |
which will result into clients to fail connect to bricks because of
|
|
|
21ab4e |
incorrect ports been communicated back by glusterd.
|
|
|
21ab4e |
|
|
|
21ab4e |
In glusterd_brick_start () checking brickinfo->status flag to identify
|
|
|
21ab4e |
whether a brick has been started by glusterd or not is not sufficient as
|
|
|
21ab4e |
there might be cases where while glusterd restarts
|
|
|
21ab4e |
glusterd_restart_bricks () will be called through glusterd_spawn_daemons
|
|
|
21ab4e |
() in synctask and immediately glusterd_do_volume_quorum_action () with
|
|
|
21ab4e |
server-side-quorum set to on will again try to start the brick and in
|
|
|
21ab4e |
case if the RPC_CLNT_CONNECT event for the same brick hasn't been processed by
|
|
|
21ab4e |
glusterd by that time, brickinfo->status will still be marked as
|
|
|
21ab4e |
GF_BRICK_STOPPED resulting into a reattempt to start the brick with a different
|
|
|
21ab4e |
port and that would result portmap go for a toss and resulting clients to fetch
|
|
|
21ab4e |
incorrect port.
|
|
|
21ab4e |
|
|
|
21ab4e |
Fix would be to introduce another enum value called GF_BRICK_STARTING in
|
|
|
21ab4e |
brickinfo->status which will be set when a brick start is attempted by
|
|
|
21ab4e |
glusterd and will be set to started through RPC_CLNT_CONNECT event. For
|
|
|
21ab4e |
brick multiplexing, on attach brick request given the brickinfo->status
|
|
|
21ab4e |
flag is marked to started directly this value will not have any effect.
|
|
|
21ab4e |
Also this patch removes started_here flag as it looks to be redundant as
|
|
|
21ab4e |
brickinfo->status.
|
|
|
21ab4e |
|
|
|
21ab4e |
>Reviewed-on: https://review.gluster.org/17447
|
|
|
21ab4e |
>Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
>Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
|
|
|
21ab4e |
|
|
|
21ab4e |
Change-Id: I9dda1a9a531b67734a6e8c7619677867b520dcb2
|
|
|
21ab4e |
BUG: 1451756
|
|
|
21ab4e |
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/108338
|
|
|
21ab4e |
---
|
|
|
21ab4e |
tests/volume.rc | 5 -----
|
|
|
21ab4e |
xlators/mgmt/glusterd/src/glusterd-handler.c | 3 ---
|
|
|
21ab4e |
xlators/mgmt/glusterd/src/glusterd-op-sm.c | 2 --
|
|
|
21ab4e |
xlators/mgmt/glusterd/src/glusterd-pmap.c | 1 -
|
|
|
21ab4e |
xlators/mgmt/glusterd/src/glusterd-utils.c | 23 +++++++++--------------
|
|
|
21ab4e |
xlators/mgmt/glusterd/src/glusterd.h | 2 +-
|
|
|
21ab4e |
6 files changed, 10 insertions(+), 26 deletions(-)
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/tests/volume.rc b/tests/volume.rc
|
|
|
21ab4e |
index b3786c3..495d8e5 100644
|
|
|
21ab4e |
--- a/tests/volume.rc
|
|
|
21ab4e |
+++ b/tests/volume.rc
|
|
|
21ab4e |
@@ -271,11 +271,6 @@ function kill_brick {
|
|
|
21ab4e |
local socket=$(cat $cmdline | tr '\0' '\n' | grep '\.socket$')
|
|
|
21ab4e |
|
|
|
21ab4e |
gf_attach -d $socket $brick
|
|
|
21ab4e |
- # Since we're not going through glusterd, we need to clean up the
|
|
|
21ab4e |
- # pidfile ourselves. However, other state in glusterd (e.g.
|
|
|
21ab4e |
- # started_here) won't be updated. A "stop-brick" CLI command would
|
|
|
21ab4e |
- # sure be useful.
|
|
|
21ab4e |
- rm -f $pidfile
|
|
|
21ab4e |
|
|
|
21ab4e |
# When the last brick in a process is terminated, the process has to
|
|
|
21ab4e |
# sleep for a second to give the RPC response a chance to get back to
|
|
|
21ab4e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
|
|
|
21ab4e |
index 1fd7813..3c793d9 100644
|
|
|
21ab4e |
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
|
|
|
21ab4e |
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
|
|
|
21ab4e |
@@ -5652,7 +5652,6 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
|
|
|
21ab4e |
brickinfo->hostname, brickinfo->path);
|
|
|
21ab4e |
|
|
|
21ab4e |
glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED);
|
|
|
21ab4e |
- brickinfo->started_here = _gf_true;
|
|
|
21ab4e |
|
|
|
21ab4e |
gf_event (EVENT_BRICK_CONNECTED, "peer=%s;volume=%s;brick=%s",
|
|
|
21ab4e |
brickinfo->hostname, volinfo->volname,
|
|
|
21ab4e |
@@ -5684,8 +5683,6 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
|
|
|
21ab4e |
"Brick %s:%s has disconnected from glusterd.",
|
|
|
21ab4e |
brickinfo->hostname, brickinfo->path);
|
|
|
21ab4e |
|
|
|
21ab4e |
- brickinfo->started_here = _gf_false;
|
|
|
21ab4e |
-
|
|
|
21ab4e |
ret = get_volinfo_from_brickid (brickid, &volinfo);
|
|
|
21ab4e |
if (ret) {
|
|
|
21ab4e |
gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
|
|
|
21ab4e |
index f82a8f1..5fbf15f 100644
|
|
|
21ab4e |
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
|
|
|
21ab4e |
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
|
|
|
21ab4e |
@@ -6210,7 +6210,6 @@ glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr,
|
|
|
21ab4e |
* TBD: move this to *after* the RPC
|
|
|
21ab4e |
*/
|
|
|
21ab4e |
brickinfo->status = GF_BRICK_STOPPED;
|
|
|
21ab4e |
- brickinfo->started_here = _gf_false;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -6311,7 +6310,6 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
|
|
|
21ab4e |
* TBD: move this to *after* the RPC
|
|
|
21ab4e |
*/
|
|
|
21ab4e |
brickinfo->status = GF_BRICK_STOPPED;
|
|
|
21ab4e |
- brickinfo->started_here = _gf_false;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
i++;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
|
|
|
21ab4e |
index 13b347e..f43324c 100644
|
|
|
21ab4e |
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
|
|
|
21ab4e |
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
|
|
|
21ab4e |
@@ -593,7 +593,6 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
|
|
|
21ab4e |
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo,
|
|
|
21ab4e |
conf);
|
|
|
21ab4e |
sys_unlink (pidfile);
|
|
|
21ab4e |
- brickinfo->started_here = _gf_false;
|
|
|
21ab4e |
|
|
|
21ab4e |
/* Setting the brick status to GF_BRICK_STOPPED to
|
|
|
21ab4e |
* ensure correct brick status is maintained on the
|
|
|
21ab4e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
21ab4e |
index 903ee80..5450be4 100644
|
|
|
21ab4e |
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
21ab4e |
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
21ab4e |
@@ -1972,7 +1972,7 @@ retry:
|
|
|
21ab4e |
|
|
|
21ab4e |
brickinfo->port = port;
|
|
|
21ab4e |
brickinfo->rdma_port = rdma_port;
|
|
|
21ab4e |
- brickinfo->started_here = _gf_true;
|
|
|
21ab4e |
+ brickinfo->status = GF_BRICK_STARTING;
|
|
|
21ab4e |
|
|
|
21ab4e |
if (wait) {
|
|
|
21ab4e |
synclock_unlock (&priv->big_lock);
|
|
|
21ab4e |
@@ -2020,6 +2020,8 @@ connect:
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
out:
|
|
|
21ab4e |
+ if (ret)
|
|
|
21ab4e |
+ brickinfo->status = GF_BRICK_STOPPED;
|
|
|
21ab4e |
return ret;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -2131,7 +2133,6 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
|
|
|
21ab4e |
gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile);
|
|
|
21ab4e |
(void) sys_unlink (pidfile);
|
|
|
21ab4e |
|
|
|
21ab4e |
- brickinfo->started_here = _gf_false;
|
|
|
21ab4e |
brickinfo->status = GF_BRICK_STOPPED;
|
|
|
21ab4e |
|
|
|
21ab4e |
if (del_brick)
|
|
|
21ab4e |
@@ -5053,7 +5054,6 @@ attach_brick (xlator_t *this,
|
|
|
21ab4e |
|
|
|
21ab4e |
brickinfo->port = other_brick->port;
|
|
|
21ab4e |
brickinfo->status = GF_BRICK_STARTED;
|
|
|
21ab4e |
- brickinfo->started_here = _gf_true;
|
|
|
21ab4e |
brickinfo->rpc = rpc_clnt_ref (other_brick->rpc);
|
|
|
21ab4e |
|
|
|
21ab4e |
GLUSTERD_GET_BRICK_PIDFILE (pidfile1, other_vol, other_brick, conf);
|
|
|
21ab4e |
@@ -5200,10 +5200,11 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
|
|
|
21ab4e |
if (other_brick == brickinfo) {
|
|
|
21ab4e |
continue;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
- if (!other_brick->started_here) {
|
|
|
21ab4e |
+ if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
|
|
|
21ab4e |
continue;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
- if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
|
|
|
21ab4e |
+ if (other_brick->status != GF_BRICK_STARTED &&
|
|
|
21ab4e |
+ other_brick->status != GF_BRICK_STARTING) {
|
|
|
21ab4e |
continue;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -5229,7 +5230,7 @@ find_compat_brick_in_vol (glusterd_conf_t *conf,
|
|
|
21ab4e |
gf_log (this->name, GF_LOG_INFO,
|
|
|
21ab4e |
"cleaning up dead brick %s:%s",
|
|
|
21ab4e |
other_brick->hostname, other_brick->path);
|
|
|
21ab4e |
- other_brick->started_here = _gf_false;
|
|
|
21ab4e |
+ other_brick->status = GF_BRICK_STOPPED;
|
|
|
21ab4e |
sys_unlink (pidfile2);
|
|
|
21ab4e |
continue;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
@@ -5435,16 +5436,11 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
|
|
|
21ab4e |
|
|
|
21ab4e |
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
|
|
|
21ab4e |
if (gf_is_service_running (pidfile, &pid)) {
|
|
|
21ab4e |
- /*
|
|
|
21ab4e |
- * In general, if the pidfile exists and points to a running
|
|
|
21ab4e |
- * process, this will already be set. However, that's not the
|
|
|
21ab4e |
- * case when we're starting up and bricks are already running.
|
|
|
21ab4e |
- */
|
|
|
21ab4e |
- if (brickinfo->status != GF_BRICK_STARTED) {
|
|
|
21ab4e |
+ if (brickinfo->status != GF_BRICK_STARTING &&
|
|
|
21ab4e |
+ brickinfo->status != GF_BRICK_STARTED) {
|
|
|
21ab4e |
gf_log (this->name, GF_LOG_INFO,
|
|
|
21ab4e |
"discovered already-running brick %s",
|
|
|
21ab4e |
brickinfo->path);
|
|
|
21ab4e |
- //brickinfo->status = GF_BRICK_STARTED;
|
|
|
21ab4e |
(void) pmap_registry_bind (this,
|
|
|
21ab4e |
brickinfo->port, brickinfo->path,
|
|
|
21ab4e |
GF_PMAP_PORT_BRICKSERVER, NULL);
|
|
|
21ab4e |
@@ -5468,7 +5464,6 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
|
|
|
21ab4e |
socketpath, brickinfo->path, volinfo->volname);
|
|
|
21ab4e |
(void) glusterd_brick_connect (volinfo, brickinfo,
|
|
|
21ab4e |
socketpath);
|
|
|
21ab4e |
- brickinfo->started_here = _gf_true;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
return 0;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
21ab4e |
index dde06bd..b5a0a3b 100644
|
|
|
21ab4e |
--- a/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
21ab4e |
+++ b/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
21ab4e |
@@ -192,6 +192,7 @@ typedef enum gf_brick_status {
|
|
|
21ab4e |
GF_BRICK_STOPPED,
|
|
|
21ab4e |
GF_BRICK_STARTED,
|
|
|
21ab4e |
GF_BRICK_STOPPING,
|
|
|
21ab4e |
+ GF_BRICK_STARTING
|
|
|
21ab4e |
} gf_brick_status_t;
|
|
|
21ab4e |
|
|
|
21ab4e |
struct glusterd_brickinfo {
|
|
|
21ab4e |
@@ -223,7 +224,6 @@ struct glusterd_brickinfo {
|
|
|
21ab4e |
* a replica 3 volume with arbiter enabled.
|
|
|
21ab4e |
*/
|
|
|
21ab4e |
uint16_t group;
|
|
|
21ab4e |
- gf_boolean_t started_here;
|
|
|
21ab4e |
};
|
|
|
21ab4e |
|
|
|
21ab4e |
typedef struct glusterd_brickinfo glusterd_brickinfo_t;
|
|
|
21ab4e |
--
|
|
|
21ab4e |
1.8.3.1
|
|
|
21ab4e |
|