From 16b5177ca40cb5f8b37ea5331003ecc7dbb6d992 Mon Sep 17 00:00:00 2001 From: Atin Mukherjee Date: Tue, 7 Aug 2018 10:25:49 +0530 Subject: [PATCH 342/351] glusterd: more stricter checks of if brick is running in multiplex mode While gf_attach () utility can help in detaching a brick instance from the brick process which the kill_brick () function in tests/volume.rc uses it has a caveat which is as follows: 1. It doesn't ensure the respective brick is marked as stopped which glusterd does from glusterd_brick_stop 2. Sometimes if kill_brick () is executed just after a brick stack is up, the mgmt_rpc_notify () can take some time before marking priv->connected to 1 and before it if kill_brick () is executed, brick will fail to initiate the pmap_signout which would inturn cleans up the pidfile. To avoid such possibilities, a more stricter check on if a brick is running or not in brick multiplexing has been brought in now where it not only checks for its pid's existance but checks if the respective process has the brick instance associated with it before checking for brick's status. > Change-Id: I98b92df949076663b9686add7aab4ec2f24ad5ab > Signed-off-by: Atin Mukherjee > (cherry pick from commit bb9b8f61501cc633e585593de4d9f2fe5494d5ce) > (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/20651/) Change-Id: I47b70927c839ca4828a0499006b5c1f604d3d6a4 BUG: 1612098 Signed-off-by: Mohit Agrawal Reviewed-on: https://code.engineering.redhat.com/gerrit/146874 Reviewed-by: Atin Mukherjee Tested-by: Atin Mukherjee --- xlators/mgmt/glusterd/src/glusterd-utils.c | 71 ++++++++++++++++-------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 372d5f4..6f7c787 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -6083,6 +6083,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, char *brickpath = NULL; glusterd_volinfo_t *other_vol; struct statvfs brickstat = {0,}; + gf_boolean_t is_service_running = _gf_false; this = THIS; GF_ASSERT (this); @@ -6149,8 +6150,39 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, brickinfo->path); goto out; } - - if (gf_is_service_running (pidfile, &pid)) { + is_service_running = gf_is_service_running (pidfile, &pid); + if (is_service_running) { + if (is_brick_mx_enabled ()) { + brickpath = search_brick_path_from_proc + (pid, brickinfo->path); + if (!brickpath) { + gf_log (this->name, GF_LOG_INFO, + "Either pid %d is not running or brick" + " path %s is not consumed so cleanup pidfile", + pid, brickinfo->path); + /* brick isn't running,so unlink stale pidfile + * if any. + */ + if (sys_access (pidfile , R_OK) == 0) { + sys_unlink (pidfile); + } + goto run; + } + GF_FREE (brickpath); + ret = glusterd_get_sock_from_brick_pid (pid, socketpath, + sizeof(socketpath)); + if (ret) { + gf_log (this->name, GF_LOG_INFO, + "Either pid %d is not running or does " + "not match with any running brick " + "processes", pid); + /* Fetch unix socket is failed so unlink pidfile */ + if (sys_access (pidfile , R_OK) == 0) { + sys_unlink (pidfile); + } + goto run; + } + } if (brickinfo->status != GF_BRICK_STARTING && brickinfo->status != GF_BRICK_STARTED) { gf_log (this->name, GF_LOG_INFO, @@ -6168,36 +6200,11 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, * same port (on another brick) and re-use that. * TBD: re-use RPC connection across bricks */ - if (is_brick_mx_enabled ()) { - brickpath = search_brick_path_from_proc (pid, brickinfo->path); - if (!brickpath) { - gf_log (this->name, GF_LOG_INFO, - "Either pid %d is not running or brick" - " path %s is not consumed so cleanup pidfile", - pid, brickinfo->path); - /* search brick is failed so unlink pidfile */ - if (sys_access (pidfile , R_OK) == 0) { - sys_unlink (pidfile); - } - goto run; - } - GF_FREE (brickpath); - ret = glusterd_get_sock_from_brick_pid (pid, socketpath, - sizeof(socketpath)); - if (ret) { - gf_log (this->name, GF_LOG_INFO, - "Either pid %d is not running or is not match" - " with any running brick process ", pid); - /* Fetch unix socket is failed so unlink pidfile */ - if (sys_access (pidfile , R_OK) == 0) { - sys_unlink (pidfile); - } - goto run; - } - } else { - glusterd_set_brick_socket_filepath (volinfo, brickinfo, - socketpath, - sizeof (socketpath)); + if (!is_brick_mx_enabled ()) { + glusterd_set_brick_socket_filepath + (volinfo, brickinfo, + socketpath, + sizeof (socketpath)); } gf_log (this->name, GF_LOG_DEBUG, "Using %s as sockfile for brick %s of volume %s ", -- 1.8.3.1