|
|
d1681e |
From 938ee38c02cce2a743c672f9c03798ebcbb1e348 Mon Sep 17 00:00:00 2001
|
|
|
d1681e |
From: Atin Mukherjee <amukherj@redhat.com>
|
|
|
d1681e |
Date: Thu, 26 Oct 2017 14:26:30 +0530
|
|
|
d1681e |
Subject: [PATCH 44/74] glusterd: fix brick restart parallelism
|
|
|
d1681e |
|
|
|
d1681e |
glusterd's brick restart logic is not always sequential as there is
|
|
|
d1681e |
atleast three different ways how the bricks are restarted.
|
|
|
d1681e |
1. through friend-sm and glusterd_spawn_daemons ()
|
|
|
d1681e |
2. through friend-sm and handling volume quorum action
|
|
|
d1681e |
3. through friend handshaking when there is a mimatch on quorum on
|
|
|
d1681e |
friend import.
|
|
|
d1681e |
|
|
|
d1681e |
In a brick multiplexing setup, glusterd ended up trying to spawn the
|
|
|
d1681e |
same brick process couple of times as almost in fraction of milliseconds
|
|
|
d1681e |
two threads hit glusterd_brick_start () because of which glusterd didn't
|
|
|
d1681e |
have any choice of rejecting any one of them as for both the case brick
|
|
|
d1681e |
start criteria met.
|
|
|
d1681e |
|
|
|
d1681e |
As a solution, it'd be better to control this madness by two different
|
|
|
d1681e |
flags, one is a boolean called start_triggered which indicates a brick
|
|
|
d1681e |
start has been triggered and it continues to be true till a brick dies
|
|
|
d1681e |
or killed, the second is a mutex lock to ensure for a particular brick
|
|
|
d1681e |
we don't end up getting into glusterd_brick_start () more than once at
|
|
|
d1681e |
same point of time.
|
|
|
d1681e |
|
|
|
d1681e |
>mainline patch : https://review.gluster.org/#/c/18577
|
|
|
d1681e |
|
|
|
d1681e |
Change-Id: I292f1e58d6971e111725e1baea1fe98b890b43e2
|
|
|
d1681e |
BUG: 1505363
|
|
|
d1681e |
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
d1681e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/123056
|
|
|
d1681e |
Reviewed-by: Gaurav Yadav <gyadav@redhat.com>
|
|
|
d1681e |
---
|
|
|
d1681e |
xlators/mgmt/glusterd/src/glusterd-handler.c | 24 ++++++++-----
|
|
|
d1681e |
xlators/mgmt/glusterd/src/glusterd-op-sm.c | 31 ++++++++++-------
|
|
|
d1681e |
xlators/mgmt/glusterd/src/glusterd-server-quorum.c | 15 +++++++--
|
|
|
d1681e |
xlators/mgmt/glusterd/src/glusterd-utils.c | 39 +++++++++++++++++-----
|
|
|
d1681e |
xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 8 +++++
|
|
|
d1681e |
xlators/mgmt/glusterd/src/glusterd.h | 2 ++
|
|
|
d1681e |
6 files changed, 87 insertions(+), 32 deletions(-)
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
|
|
|
d1681e |
index 34e751c..c3b9252 100644
|
|
|
d1681e |
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
|
|
|
d1681e |
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
|
|
|
d1681e |
@@ -5946,16 +5946,22 @@ glusterd_mark_bricks_stopped_by_proc (glusterd_brick_proc_t *brick_proc) {
|
|
|
d1681e |
int ret = -1;
|
|
|
d1681e |
|
|
|
d1681e |
cds_list_for_each_entry (brickinfo, &brick_proc->bricks, brick_list) {
|
|
|
d1681e |
- ret = glusterd_get_volinfo_from_brick (brickinfo->path, &volinfo);
|
|
|
d1681e |
+ ret = glusterd_get_volinfo_from_brick (brickinfo->path,
|
|
|
d1681e |
+ &volinfo);
|
|
|
d1681e |
if (ret) {
|
|
|
d1681e |
- gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
|
|
|
d1681e |
- "Failed to get volinfo from brick(%s)",
|
|
|
d1681e |
- brickinfo->path);
|
|
|
d1681e |
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
|
|
|
d1681e |
+ GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo"
|
|
|
d1681e |
+ " from brick(%s)", brickinfo->path);
|
|
|
d1681e |
goto out;
|
|
|
d1681e |
}
|
|
|
d1681e |
- cds_list_for_each_entry (brickinfo_tmp, &volinfo->bricks, brick_list) {
|
|
|
d1681e |
- if (strcmp (brickinfo->path, brickinfo_tmp->path) == 0)
|
|
|
d1681e |
- glusterd_set_brick_status (brickinfo_tmp, GF_BRICK_STOPPED);
|
|
|
d1681e |
+ cds_list_for_each_entry (brickinfo_tmp, &volinfo->bricks,
|
|
|
d1681e |
+ brick_list) {
|
|
|
d1681e |
+ if (strcmp (brickinfo->path,
|
|
|
d1681e |
+ brickinfo_tmp->path) == 0) {
|
|
|
d1681e |
+ glusterd_set_brick_status (brickinfo_tmp,
|
|
|
d1681e |
+ GF_BRICK_STOPPED);
|
|
|
d1681e |
+ brickinfo_tmp->start_triggered = _gf_false;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
}
|
|
|
d1681e |
}
|
|
|
d1681e |
return 0;
|
|
|
d1681e |
@@ -6129,8 +6135,10 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
|
|
|
d1681e |
if (temp == 1)
|
|
|
d1681e |
break;
|
|
|
d1681e |
}
|
|
|
d1681e |
- } else
|
|
|
d1681e |
+ } else {
|
|
|
d1681e |
glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
|
|
|
d1681e |
+ brickinfo->start_triggered = _gf_false;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
break;
|
|
|
d1681e |
|
|
|
d1681e |
case RPC_CLNT_DESTROY:
|
|
|
d1681e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
|
|
|
d1681e |
index 9641b4f..5b8f833 100644
|
|
|
d1681e |
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
|
|
|
d1681e |
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
|
|
|
d1681e |
@@ -2402,18 +2402,25 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)
|
|
|
d1681e |
GF_ASSERT (volinfo);
|
|
|
d1681e |
|
|
|
d1681e |
cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
|
|
|
d1681e |
- ret = glusterd_brick_start (volinfo, brickinfo, _gf_false);
|
|
|
d1681e |
- if (ret) {
|
|
|
d1681e |
- gf_msg (THIS->name, GF_LOG_ERROR, 0,
|
|
|
d1681e |
- GD_MSG_BRICK_DISCONNECTED,
|
|
|
d1681e |
- "Failed to start %s:%s for %s",
|
|
|
d1681e |
- brickinfo->hostname, brickinfo->path,
|
|
|
d1681e |
- volinfo->volname);
|
|
|
d1681e |
- gf_event (EVENT_BRICK_START_FAILED,
|
|
|
d1681e |
- "peer=%s;volume=%s;brick=%s",
|
|
|
d1681e |
- brickinfo->hostname, volinfo->volname,
|
|
|
d1681e |
- brickinfo->path);
|
|
|
d1681e |
- goto out;
|
|
|
d1681e |
+ if (!brickinfo->start_triggered) {
|
|
|
d1681e |
+ pthread_mutex_lock (&brickinfo->restart_mutex);
|
|
|
d1681e |
+ {
|
|
|
d1681e |
+ ret = glusterd_brick_start (volinfo, brickinfo,
|
|
|
d1681e |
+ _gf_false);
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+ pthread_mutex_unlock (&brickinfo->restart_mutex);
|
|
|
d1681e |
+ if (ret) {
|
|
|
d1681e |
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
|
|
|
d1681e |
+ GD_MSG_BRICK_DISCONNECTED,
|
|
|
d1681e |
+ "Failed to start %s:%s for %s",
|
|
|
d1681e |
+ brickinfo->hostname, brickinfo->path,
|
|
|
d1681e |
+ volinfo->volname);
|
|
|
d1681e |
+ gf_event (EVENT_BRICK_START_FAILED,
|
|
|
d1681e |
+ "peer=%s;volume=%s;brick=%s",
|
|
|
d1681e |
+ brickinfo->hostname, volinfo->volname,
|
|
|
d1681e |
+ brickinfo->path);
|
|
|
d1681e |
+ goto out;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
}
|
|
|
d1681e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
|
|
|
d1681e |
index 4706403..995a568 100644
|
|
|
d1681e |
--- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
|
|
|
d1681e |
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
|
|
|
d1681e |
@@ -362,10 +362,19 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
|
|
|
d1681e |
list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
|
|
|
d1681e |
if (!glusterd_is_local_brick (this, volinfo, brickinfo))
|
|
|
d1681e |
continue;
|
|
|
d1681e |
- if (quorum_status == DOESNT_MEET_QUORUM)
|
|
|
d1681e |
+ if (quorum_status == DOESNT_MEET_QUORUM) {
|
|
|
d1681e |
glusterd_brick_stop (volinfo, brickinfo, _gf_false);
|
|
|
d1681e |
- else
|
|
|
d1681e |
- glusterd_brick_start (volinfo, brickinfo, _gf_false);
|
|
|
d1681e |
+ } else {
|
|
|
d1681e |
+ if (!brickinfo->start_triggered) {
|
|
|
d1681e |
+ pthread_mutex_lock (&brickinfo->restart_mutex);
|
|
|
d1681e |
+ {
|
|
|
d1681e |
+ glusterd_brick_start (volinfo,
|
|
|
d1681e |
+ brickinfo,
|
|
|
d1681e |
+ _gf_false);
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+ pthread_mutex_unlock (&brickinfo->restart_mutex);
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+ }
|
|
|
d1681e |
}
|
|
|
d1681e |
volinfo->quorum_status = quorum_status;
|
|
|
d1681e |
if (quorum_status == MEETS_QUORUM) {
|
|
|
d1681e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
d1681e |
index bb236df..18de517 100644
|
|
|
d1681e |
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
d1681e |
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
d1681e |
@@ -1084,7 +1084,7 @@ glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo)
|
|
|
d1681e |
goto out;
|
|
|
d1681e |
|
|
|
d1681e |
CDS_INIT_LIST_HEAD (&new_brickinfo->brick_list);
|
|
|
d1681e |
-
|
|
|
d1681e |
+ pthread_mutex_init (&new_brickinfo->restart_mutex, NULL);
|
|
|
d1681e |
*brickinfo = new_brickinfo;
|
|
|
d1681e |
|
|
|
d1681e |
ret = 0;
|
|
|
d1681e |
@@ -2481,7 +2481,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
|
|
|
d1681e |
(void) sys_unlink (pidfile);
|
|
|
d1681e |
|
|
|
d1681e |
brickinfo->status = GF_BRICK_STOPPED;
|
|
|
d1681e |
-
|
|
|
d1681e |
+ brickinfo->start_triggered = _gf_false;
|
|
|
d1681e |
if (del_brick)
|
|
|
d1681e |
glusterd_delete_brick (volinfo, brickinfo);
|
|
|
d1681e |
out:
|
|
|
d1681e |
@@ -5817,13 +5817,14 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
|
|
|
d1681e |
* three different triggers for an attempt to start the brick process
|
|
|
d1681e |
* due to the quorum handling code in glusterd_friend_sm.
|
|
|
d1681e |
*/
|
|
|
d1681e |
- if (brickinfo->status == GF_BRICK_STARTING) {
|
|
|
d1681e |
+ if (brickinfo->status == GF_BRICK_STARTING ||
|
|
|
d1681e |
+ brickinfo->start_triggered) {
|
|
|
d1681e |
gf_msg_debug (this->name, 0, "brick %s is already in starting "
|
|
|
d1681e |
"phase", brickinfo->path);
|
|
|
d1681e |
ret = 0;
|
|
|
d1681e |
goto out;
|
|
|
d1681e |
}
|
|
|
d1681e |
-
|
|
|
d1681e |
+ brickinfo->start_triggered = _gf_true;
|
|
|
d1681e |
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
|
|
|
d1681e |
if (gf_is_service_running (pidfile, &pid)) {
|
|
|
d1681e |
if (brickinfo->status != GF_BRICK_STARTING &&
|
|
|
d1681e |
@@ -5936,6 +5937,9 @@ run:
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
out:
|
|
|
d1681e |
+ if (ret && brickinfo) {
|
|
|
d1681e |
+ brickinfo->start_triggered = _gf_false;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
gf_msg_debug (this->name, 0, "returning %d ", ret);
|
|
|
d1681e |
return ret;
|
|
|
d1681e |
}
|
|
|
d1681e |
@@ -5997,11 +6001,19 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
|
|
|
d1681e |
start_svcs = _gf_true;
|
|
|
d1681e |
glusterd_svcs_manager (NULL);
|
|
|
d1681e |
}
|
|
|
d1681e |
-
|
|
|
d1681e |
cds_list_for_each_entry (brickinfo, &volinfo->bricks,
|
|
|
d1681e |
brick_list) {
|
|
|
d1681e |
- glusterd_brick_start (volinfo, brickinfo,
|
|
|
d1681e |
- _gf_false);
|
|
|
d1681e |
+ if (!brickinfo->start_triggered) {
|
|
|
d1681e |
+ pthread_mutex_lock
|
|
|
d1681e |
+ (&brickinfo->restart_mutex);
|
|
|
d1681e |
+ {
|
|
|
d1681e |
+ glusterd_brick_start
|
|
|
d1681e |
+ (volinfo, brickinfo,
|
|
|
d1681e |
+ _gf_false);
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+ pthread_mutex_unlock
|
|
|
d1681e |
+ (&brickinfo->restart_mutex);
|
|
|
d1681e |
+ }
|
|
|
d1681e |
}
|
|
|
d1681e |
ret = glusterd_store_volinfo
|
|
|
d1681e |
(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
|
|
|
d1681e |
@@ -6040,8 +6052,17 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
|
|
|
d1681e |
"volume %s", volinfo->volname);
|
|
|
d1681e |
cds_list_for_each_entry (brickinfo, &volinfo->bricks,
|
|
|
d1681e |
brick_list) {
|
|
|
d1681e |
- glusterd_brick_start (volinfo, brickinfo,
|
|
|
d1681e |
- _gf_false);
|
|
|
d1681e |
+ if (!brickinfo->start_triggered) {
|
|
|
d1681e |
+ pthread_mutex_lock
|
|
|
d1681e |
+ (&brickinfo->restart_mutex);
|
|
|
d1681e |
+ {
|
|
|
d1681e |
+ glusterd_brick_start
|
|
|
d1681e |
+ (volinfo, brickinfo,
|
|
|
d1681e |
+ _gf_false);
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+ pthread_mutex_unlock
|
|
|
d1681e |
+ (&brickinfo->restart_mutex);
|
|
|
d1681e |
+ }
|
|
|
d1681e |
}
|
|
|
d1681e |
ret = glusterd_store_volinfo
|
|
|
d1681e |
(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
|
|
|
d1681e |
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
|
|
|
d1681e |
index 834acab..bec5f72 100644
|
|
|
d1681e |
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
|
|
|
d1681e |
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
|
|
|
d1681e |
@@ -2545,6 +2545,14 @@ glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags,
|
|
|
d1681e |
GF_ASSERT (volinfo);
|
|
|
d1681e |
|
|
|
d1681e |
cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
|
|
|
d1681e |
+ /* Mark start_triggered to false so that in case if this brick
|
|
|
d1681e |
+ * was brought down through gf_attach utility, the
|
|
|
d1681e |
+ * brickinfo->start_triggered wouldn't have been updated to
|
|
|
d1681e |
+ * _gf_false
|
|
|
d1681e |
+ */
|
|
|
d1681e |
+ if (flags & GF_CLI_FLAG_OP_FORCE) {
|
|
|
d1681e |
+ brickinfo->start_triggered = _gf_false;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
ret = glusterd_brick_start (volinfo, brickinfo, wait);
|
|
|
d1681e |
/* If 'force' try to start all bricks regardless of success or
|
|
|
d1681e |
* failure
|
|
|
d1681e |
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
d1681e |
index 722d2f8..d4bb236 100644
|
|
|
d1681e |
--- a/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
d1681e |
+++ b/xlators/mgmt/glusterd/src/glusterd.h
|
|
|
d1681e |
@@ -240,6 +240,8 @@ struct glusterd_brickinfo {
|
|
|
d1681e |
uint64_t statfs_fsid;
|
|
|
d1681e |
uint32_t fs_share_count;
|
|
|
d1681e |
gf_boolean_t port_registered;
|
|
|
d1681e |
+ gf_boolean_t start_triggered;
|
|
|
d1681e |
+ pthread_mutex_t restart_mutex;
|
|
|
d1681e |
};
|
|
|
d1681e |
|
|
|
d1681e |
typedef struct glusterd_brickinfo glusterd_brickinfo_t;
|
|
|
d1681e |
--
|
|
|
d1681e |
1.8.3.1
|
|
|
d1681e |
|