Tree - rpms/glusterfs - CentOS Git server

rpms / glusterfs

Blame SOURCES/0366-glusterd-hold-off-volume-deletes-while-still-restart.patch

Blob History Raw

		21ab4e	`From ed98a2c88a54dd14790632f97abf61403557ab7a Mon Sep 17 00:00:00 2001`
		21ab4e	`From: Jeff Darcy <jdarcy@redhat.com>`
		21ab4e	`Date: Mon, 20 Mar 2017 12:32:33 -0400`
		21ab4e	`Subject: [PATCH 366/369] glusterd: hold off volume deletes while still`
		21ab4e	`restarting bricks`
		21ab4e
		21ab4e	`We need to do this because modifying the volume/brick tree while`
		21ab4e	`glusterd_restart_bricks is still walking it can lead to segfaults.`
		21ab4e	`Without waiting we could accidentally "slip in" while attach_brick has`
		21ab4e	`released big_lock between retries and make such a modification.`
		21ab4e
		21ab4e	`>Reviewed-on: https://review.gluster.org/16927`
		21ab4e	`>Smoke: Gluster Build System <jenkins@build.gluster.org>`
		21ab4e	`>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>`
		21ab4e	`>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>`
		21ab4e	`>Reviewed-by: Atin Mukherjee <amukherj@redhat.com>`
		21ab4e
		21ab4e	`Change-Id: I30ccc4efa8d286aae847250f5d4fb28956a74b03`
		21ab4e	`BUG: 1438052`
		21ab4e	`Signed-off-by: Jeff Darcy <jeff@pl.atyp.us>`
		21ab4e	`Reviewed-on: https://code.engineering.redhat.com/gerrit/102298`
		21ab4e	`Reviewed-by: Atin Mukherjee <amukherj@redhat.com>`
		21ab4e	`Tested-by: Atin Mukherjee <amukherj@redhat.com>`
		21ab4e	`---`
		21ab4e	`...ports.t => bug-1421590-brick-mux-reuse-ports.t} \| 5 ++`
		21ab4e	`tests/bugs/core/bug-1432542-mpx-restart-crash.t \| 91 ++++++++++++++++++++++`
		21ab4e	`xlators/mgmt/glusterd/src/glusterd-op-sm.c \| 15 ++++`
		21ab4e	`xlators/mgmt/glusterd/src/glusterd-utils.c \| 39 +++++++---`
		21ab4e	`xlators/mgmt/glusterd/src/glusterd-volume-ops.c \| 3 -`
		21ab4e	`xlators/mgmt/glusterd/src/glusterd.c \| 2 +-`
		21ab4e	`xlators/mgmt/glusterd/src/glusterd.h \| 1 +`
		21ab4e	`7 files changed, 141 insertions(+), 15 deletions(-)`
		21ab4e	`rename tests/bugs/core/{bug-1421590-brick-mux-resuse-ports.t => bug-1421590-brick-mux-reuse-ports.t} (86%)`
		21ab4e	`create mode 100644 tests/bugs/core/bug-1432542-mpx-restart-crash.t`
		21ab4e
		21ab4e	`diff --git a/tests/bugs/core/bug-1421590-brick-mux-resuse-ports.t b/tests/bugs/core/bug-1421590-brick-mux-reuse-ports.t`
		21ab4e	`similarity index 86%`
		21ab4e	`rename from tests/bugs/core/bug-1421590-brick-mux-resuse-ports.t`
		21ab4e	`rename to tests/bugs/core/bug-1421590-brick-mux-reuse-ports.t`
		21ab4e	`index ed401f6..a227f82 100644`
		21ab4e	`--- a/tests/bugs/core/bug-1421590-brick-mux-resuse-ports.t`
		21ab4e	`+++ b/tests/bugs/core/bug-1421590-brick-mux-reuse-ports.t`
		21ab4e	`@@ -21,6 +21,11 @@ push_trapfunc "cleanup"`
		21ab4e	`TEST $CLI volume create $V0 $H0:$B0/brick{0,1}`
		21ab4e	`TEST $CLI volume start $V0`
		21ab4e
		21ab4e	`+# We can't expect a valid port number instantly. We need to wait for the`
		21ab4e	`+# bricks to finish coming up. In every other case we use EXPECT_WITHIN, but`
		21ab4e	`+# this first time we need to wait more explicitly.`
		21ab4e	`+sleep $PROCESS_UP_TIMEOUT`
		21ab4e	`+`
		21ab4e	`port_brick0=$(get_nth_brick_port_for_volume $V0 1)`
		21ab4e
		21ab4e	`# restart the volume`
		21ab4e	`diff --git a/tests/bugs/core/bug-1432542-mpx-restart-crash.t b/tests/bugs/core/bug-1432542-mpx-restart-crash.t`
		21ab4e	`new file mode 100644`
		21ab4e	`index 0000000..970a181`
		21ab4e	`--- /dev/null`
		21ab4e	`+++ b/tests/bugs/core/bug-1432542-mpx-restart-crash.t`
		21ab4e	`@@ -0,0 +1,91 @@`
		21ab4e	`+#!/bin/bash`
		21ab4e	`+`
		21ab4e	`+. $(dirname $0)/../../include.rc`
		21ab4e	`+. $(dirname $0)/../../volume.rc`
		21ab4e	`+. $(dirname $0)/../../traps.rc`
		21ab4e	`+`
		21ab4e	`+NUM_VOLS=20`
		21ab4e	`+MOUNT_BASE=$(dirname $M0)`
		21ab4e	`+`
		21ab4e	`+# GlusterD reports that bricks are started when in fact their attach requests`
		21ab4e	`+# might still need to be retried. That's a bit of a hack, but there's no`
		21ab4e	`+# feasible way to wait at that point (in attach_brick) and the rest of the`
		21ab4e	`+# code is unprepared to deal with transient errors so the whole "brick start"`
		21ab4e	`+# would fail. Meanwhile, glusterfsd can only handle attach requests at a`
		21ab4e	`+# rather slow rate. After GlusterD tries to start a couple of hundred bricks,`
		21ab4e	`+# glusterfsd can fall behind and we start getting mount failures. Arguably,`
		21ab4e	`+# those are spurious because we will eventually catch up. We're just not`
		21ab4e	`+# ready yet. More to the point, even if the errors aren't spurious that's`
		21ab4e	`+# not what we're testing right now. Therefore, we give glusterfsd a bit more`
		21ab4e	`+# breathing room for this test than we would otherwise.`
		21ab4e	`+MOUNT_TIMEOUT=15`
		21ab4e	`+`
		21ab4e	`+get_brick_base () {`
		21ab4e	`+ printf "%s/vol%02d" $B0 $1`
		21ab4e	`+}`
		21ab4e	`+`
		21ab4e	`+get_mount_point () {`
		21ab4e	`+ printf "%s/vol%02d" $MOUNT_BASE $1`
		21ab4e	`+}`
		21ab4e	`+`
		21ab4e	`+create_volume () {`
		21ab4e	`+`
		21ab4e	`+ local vol_name=$(printf "%s-vol%02d" $V0 $1)`
		21ab4e	`+`
		21ab4e	`+ local brick_base=$(get_brick_base $1)`
		21ab4e	`+ local cmd="$CLI volume create $vol_name replica 2"`
		21ab4e	`+ local b`
		21ab4e	`+ for b in $(seq 0 5); do`
		21ab4e	`+ local this_brick=${brick_base}/brick$b`
		21ab4e	`+ mkdir -p $this_brick`
		21ab4e	`+ cmd="$cmd $H0:$this_brick"`
		21ab4e	`+ done`
		21ab4e	`+ TEST $cmd`
		21ab4e	`+ TEST $CLI volume start $vol_name`
		21ab4e	`+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $vol_name "Status"`
		21ab4e	`+ local mount_point=$(get_mount_point $1)`
		21ab4e	`+ mkdir -p $mount_point`
		21ab4e	`+ TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point`
		21ab4e	`+}`
		21ab4e	`+`
		21ab4e	`+cleanup_func () {`
		21ab4e	`+ local v`
		21ab4e	`+ for v in $(seq 1 $NUM_VOLS); do`
		21ab4e	`+ local mount_point=$(get_mount_point $v)`
		21ab4e	`+ force_umount $mount_point`
		21ab4e	`+ rm -rf $mount_point`
		21ab4e	`+ local vol_name=$(printf "%s-vol%02d" $V0 $v)`
		21ab4e	`+ $CLI volume stop $vol_name`
		21ab4e	`+ $CLI volume delete $vol_name`
		21ab4e	`+ rm -rf $(get_brick_base $1) &`
		21ab4e	`+ done &> /dev/null`
		21ab4e	`+ wait`
		21ab4e	`+}`
		21ab4e	`+push_trapfunc cleanup_func`
		21ab4e	`+`
		21ab4e	`+TEST glusterd`
		21ab4e	`+TEST $CLI volume set all cluster.brick-multiplex on`
		21ab4e	`+`
		21ab4e	`+# Our infrastructure can't handle an arithmetic expression here. The formula`
		21ab4e	`+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other`
		21ab4e	`+# NUM_VOLS-1 and there are 5 such statements in each iteration.`
		21ab4e	`+TESTS_EXPECTED_IN_LOOP=95`
		21ab4e	`+for i in $(seq 1 $NUM_VOLS); do`
		21ab4e	`+ create_volume $i`
		21ab4e	`+ TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1`
		21ab4e	`+done`
		21ab4e	`+`
		21ab4e	`+# Kill glusterd, and wait a bit for all traces to disappear.`
		21ab4e	`+TEST killall -9 glusterd`
		21ab4e	`+sleep 5`
		21ab4e	`+TEST killall -9 glusterfsd`
		21ab4e	`+sleep 5`
		21ab4e	`+`
		21ab4e	`+# Restart glusterd. This is where the brick daemon supposedly dumps core,`
		21ab4e	`+# though I (jdarcy) have yet to see that. Again, give it a while to settle,`
		21ab4e	`+# just to be sure.`
		21ab4e	`+TEST glusterd`
		21ab4e	`+`
		21ab4e	`+cleanup_func`
		21ab4e	`+trap - EXIT`
		21ab4e	`+cleanup`
		21ab4e	`diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c`
		21ab4e	`index 98ae7b6..dae6c1e 100644`
		21ab4e	`--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c`
		21ab4e	`+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c`
		21ab4e	`@@ -5935,6 +5935,15 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t dict, char *op_errstr,`
		21ab4e	`return ret;`
		21ab4e	`}`
		21ab4e
		21ab4e	`+static void`
		21ab4e	`+glusterd_wait_for_blockers (glusterd_conf_t *priv)`
		21ab4e	`+{`
		21ab4e	`+ while (priv->blockers) {`
		21ab4e	`+ synclock_unlock (&priv->big_lock);`
		21ab4e	`+ sleep (1);`
		21ab4e	`+ synclock_lock (&priv->big_lock);`
		21ab4e	`+ }`
		21ab4e	`+}`
		21ab4e
		21ab4e	`int32_t`
		21ab4e	`glusterd_op_commit_perform (glusterd_op_t op, dict_t dict, char *op_errstr,`
		21ab4e	`@@ -5954,18 +5963,22 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t dict, char *op_errstr,`
		21ab4e	`break;`
		21ab4e
		21ab4e	`case GD_OP_STOP_VOLUME:`
		21ab4e	`+ glusterd_wait_for_blockers (this->private);`
		21ab4e	`ret = glusterd_op_stop_volume (dict);`
		21ab4e	`break;`
		21ab4e
		21ab4e	`case GD_OP_DELETE_VOLUME:`
		21ab4e	`+ glusterd_wait_for_blockers (this->private);`
		21ab4e	`ret = glusterd_op_delete_volume (dict);`
		21ab4e	`break;`
		21ab4e
		21ab4e	`case GD_OP_ADD_BRICK:`
		21ab4e	`+ glusterd_wait_for_blockers (this->private);`
		21ab4e	`ret = glusterd_op_add_brick (dict, op_errstr);`
		21ab4e	`break;`
		21ab4e
		21ab4e	`case GD_OP_REPLACE_BRICK:`
		21ab4e	`+ glusterd_wait_for_blockers (this->private);`
		21ab4e	`ret = glusterd_op_replace_brick (dict, rsp_dict);`
		21ab4e	`break;`
		21ab4e
		21ab4e	`@@ -5976,11 +5989,13 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t dict, char *op_errstr,`
		21ab4e	`ret = glusterd_op_set_ganesha (dict, op_errstr);`
		21ab4e	`break;`
		21ab4e
		21ab4e	`+`
		21ab4e	`case GD_OP_RESET_VOLUME:`
		21ab4e	`ret = glusterd_op_reset_volume (dict, op_errstr);`
		21ab4e	`break;`
		21ab4e
		21ab4e	`case GD_OP_REMOVE_BRICK:`
		21ab4e	`+ glusterd_wait_for_blockers (this->private);`
		21ab4e	`ret = glusterd_op_remove_brick (dict, op_errstr);`
		21ab4e	`break;`
		21ab4e
		21ab4e	`diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c`
		21ab4e	`index 77f97a5..2adfb47 100644`
		21ab4e	`--- a/xlators/mgmt/glusterd/src/glusterd-utils.c`
		21ab4e	`+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c`
		21ab4e	`@@ -3123,8 +3123,8 @@ out:`
		21ab4e	`int`
		21ab4e	`glusterd_spawn_daemons (void *opaque)`
		21ab4e	`{`
		21ab4e	`- glusterd_conf_t *conf = THIS->private;`
		21ab4e	`- int ret = -1;`
		21ab4e	`+ glusterd_conf_t *conf = THIS->private;`
		21ab4e	`+ int ret = -1;`
		21ab4e
		21ab4e	`synclock_lock (&conf->big_lock);`
		21ab4e	`glusterd_restart_bricks (conf);`
		21ab4e	`@@ -4891,9 +4891,13 @@ static int32_t`
		21ab4e	`my_callback (struct rpc_req req, struct iovec iov, int count, void *v_frame)`
		21ab4e	`{`
		21ab4e	`call_frame_t *frame = v_frame;`
		21ab4e	`+ glusterd_conf_t *conf = frame->this->private;`
		21ab4e
		21ab4e	`- STACK_DESTROY (frame->root);`
		21ab4e	`+ synclock_lock (&conf->big_lock);`
		21ab4e	`+ --(conf->blockers);`
		21ab4e	`+ synclock_unlock (&conf->big_lock);`
		21ab4e
		21ab4e	`+ STACK_DESTROY (frame->root);`
		21ab4e	`return 0;`
		21ab4e	`}`
		21ab4e
		21ab4e	`@@ -4910,6 +4914,7 @@ send_attach_req (xlator_t this, struct rpc_clnt rpc, char *path, int op)`
		21ab4e	`void *req = &brick_req;`
		21ab4e	`void *errlbl = &&err;`
		21ab4e	`struct rpc_clnt_connection *conn;`
		21ab4e	`+ glusterd_conf_t *conf = this->private;`
		21ab4e	`extern struct rpc_clnt_program gd_brick_prog;`
		21ab4e
		21ab4e	`if (!rpc) {`
		21ab4e	`@@ -4969,9 +4974,13 @@ send_attach_req (xlator_t this, struct rpc_clnt rpc, char *path, int op)`
		21ab4e	`iov.iov_len = ret;`
		21ab4e
		21ab4e	`/* Send the msg */`
		21ab4e	`+ ++(conf->blockers);`
		21ab4e	`ret = rpc_clnt_submit (rpc, &gd_brick_prog, op,`
		21ab4e	`- my_callback, &iov, 1, NULL, 0, iobref, frame,`
		21ab4e	`- NULL, 0, NULL, 0, NULL);`
		21ab4e	`+ my_callback, &iov, 1, NULL, 0, iobref,`
		21ab4e	`+ frame, NULL, 0, NULL, 0, NULL);`
		21ab4e	`+ if (ret) {`
		21ab4e	`+ --(conf->blockers);`
		21ab4e	`+ }`
		21ab4e	`return ret;`
		21ab4e
		21ab4e	`free_iobref:`
		21ab4e	`@@ -5003,6 +5012,8 @@ attach_brick (xlator_t *this,`
		21ab4e	`char full_id[PATH_MAX] = {'\0',};`
		21ab4e	`char path[PATH_MAX] = {'\0',};`
		21ab4e	`int ret;`
		21ab4e	`+ int tries;`
		21ab4e	`+ rpc_clnt_t *rpc;`
		21ab4e
		21ab4e	`gf_log (this->name, GF_LOG_INFO,`
		21ab4e	`"add brick %s to existing process for %s",`
		21ab4e	`@@ -5039,12 +5050,15 @@ attach_brick (xlator_t *this,`
		21ab4e	`}`
		21ab4e	`(void) build_volfile_path (full_id, path, sizeof(path), NULL);`
		21ab4e
		21ab4e	`- int tries = 0;`
		21ab4e	`- while (tries++ <= 15) {`
		21ab4e	`- ret = send_attach_req (this, other_brick->rpc, path,`
		21ab4e	`- GLUSTERD_BRICK_ATTACH);`
		21ab4e	`- if (!ret) {`
		21ab4e	`- return 0;`
		21ab4e	`+ for (tries = 15; tries > 0; --tries) {`
		21ab4e	`+ rpc = rpc_clnt_ref (other_brick->rpc);`
		21ab4e	`+ if (rpc) {`
		21ab4e	`+ ret = send_attach_req (this, rpc, path,`
		21ab4e	`+ GLUSTERD_BRICK_ATTACH);`
		21ab4e	`+ rpc_clnt_unref (rpc);`
		21ab4e	`+ if (!ret) {`
		21ab4e	`+ return 0;`
		21ab4e	`+ }`
		21ab4e	`}`
		21ab4e	`/*`
		21ab4e	`* It might not actually be safe to manipulate the lock like`
		21ab4e	`@@ -5410,6 +5424,8 @@ glusterd_restart_bricks (glusterd_conf_t *conf)`
		21ab4e	`conf = this->private;`
		21ab4e	`GF_VALIDATE_OR_GOTO (this->name, conf, out);`
		21ab4e
		21ab4e	`+ ++(conf->blockers);`
		21ab4e	`+`
		21ab4e	`ret = glusterd_get_quorum_cluster_counts (this, &active_count,`
		21ab4e	`&quorum_count);`
		21ab4e	`if (ret)`
		21ab4e	`@@ -5489,6 +5505,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)`
		21ab4e	`ret = 0;`
		21ab4e
		21ab4e	`out:`
		21ab4e	`+ --(conf->blockers);`
		21ab4e	`conf->restart_done = _gf_true;`
		21ab4e	`return ret;`
		21ab4e	`}`
		21ab4e	`diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c`
		21ab4e	`index 3941b06..08906ba 100644`
		21ab4e	`--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c`
		21ab4e	`+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c`
		21ab4e	`@@ -2788,14 +2788,11 @@ glusterd_op_delete_volume (dict_t *dict)`
		21ab4e	`{`
		21ab4e	`int ret = 0;`
		21ab4e	`char *volname = NULL;`
		21ab4e	`- glusterd_conf_t *priv = NULL;`
		21ab4e	`glusterd_volinfo_t *volinfo = NULL;`
		21ab4e	`xlator_t *this = NULL;`
		21ab4e
		21ab4e	`this = THIS;`
		21ab4e	`GF_ASSERT (this);`
		21ab4e	`- priv = this->private;`
		21ab4e	`- GF_ASSERT (priv);`
		21ab4e
		21ab4e	`ret = dict_get_str (dict, "volname", &volname);`
		21ab4e	`if (ret) {`
		21ab4e	`diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c`
		21ab4e	`index ae81de6..7d66718 100644`
		21ab4e	`--- a/xlators/mgmt/glusterd/src/glusterd.c`
		21ab4e	`+++ b/xlators/mgmt/glusterd/src/glusterd.c`
		21ab4e	`@@ -1869,10 +1869,10 @@ init (xlator_t *this)`
		21ab4e	`if (ret < 0)`
		21ab4e	`goto out;`
		21ab4e
		21ab4e	`+ conf->blockers = 0;`
		21ab4e	`/* If there are no 'friends', this would be the best time to`
		21ab4e	`* spawn process/bricks that may need (re)starting since last`
		21ab4e	`* time (this) glusterd was up.*/`
		21ab4e	`-`
		21ab4e	`if (cds_list_empty (&conf->peers)) {`
		21ab4e	`glusterd_launch_synctask (glusterd_spawn_daemons, NULL);`
		21ab4e	`}`
		21ab4e	`diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h`
		21ab4e	`index d80ad20..671ec58 100644`
		21ab4e	`--- a/xlators/mgmt/glusterd/src/glusterd.h`
		21ab4e	`+++ b/xlators/mgmt/glusterd/src/glusterd.h`
		21ab4e	`@@ -184,6 +184,7 @@ typedef struct {`
		21ab4e	`int ping_timeout;`
		21ab4e	`uint32_t generation;`
		21ab4e	`int32_t workers;`
		21ab4e	`+ uint32_t blockers;`
		21ab4e	`} glusterd_conf_t;`
		21ab4e
		21ab4e
		21ab4e	`--`
		21ab4e	`1.8.3.1`
		21ab4e

rpms / glusterfs

Source Code

Blame SOURCES/0366-glusterd-hold-off-volume-deletes-while-still-restart.patch