From aed804b0f10c93ade88e109dd89a5b593ff1b1e5 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Fri, 18 May 2018 15:38:29 +0530 Subject: [PATCH 275/282] afr: fix bug-1363721.t failure Backport of https://review.gluster.org/#/c/20036/ Problem: In the .t, when the only good brick was brought down, writes on the fd were still succeeding on the bad bricks. The inflight split-brain check was marking the write as failure but since the write succeeded on all the bad bricks, afr_txn_nothing_failed() was set to true and we were unwinding writev with success to DHT and then catching the failure in post-op in the background. Fix: Don't wind the FOP phase if the write_subvol (which is populated with readable subvols obtained in pre-op cbk) does not have at least 1 good brick which was up when the transaction started. Note: This fix is not related to brick muliplexing. I ran the .t 10 times with this fix and brick-mux enabled without any failures. Change-Id: I915c9c366aa32cd342b1565827ca2d83cb02ae85 BUG: 1581057 Signed-off-by: Ravishankar N Reviewed-on: https://code.engineering.redhat.com/gerrit/139440 Tested-by: RHGS Build Bot Reviewed-by: Pranith Kumar Karampuri --- tests/bugs/replicate/bug-1363721.t | 12 +++++++--- xlators/cluster/afr/src/afr-common.c | 14 ++++++++++++ xlators/cluster/afr/src/afr-transaction.c | 38 +++++++++++++++++++++++++++++++ xlators/cluster/afr/src/afr.h | 3 +++ 4 files changed, 64 insertions(+), 3 deletions(-) diff --git a/tests/bugs/replicate/bug-1363721.t b/tests/bugs/replicate/bug-1363721.t index ec39889..0ed34d8 100644 --- a/tests/bugs/replicate/bug-1363721.t +++ b/tests/bugs/replicate/bug-1363721.t @@ -18,6 +18,10 @@ function size_increased { fi } +function has_write_failed { + local pid=$1 + if [ -d /proc/$pid ]; then echo "N"; else echo "Y"; fi +} TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} @@ -27,7 +31,7 @@ TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume start $V0 -TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 --direct-io-mode=enable +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --direct-io-mode=enable $M0 cd $M0 @@ -67,8 +71,10 @@ sleep 3 # Now kill the second brick kill_brick $V0 $H0 $B0/${V0}2 -# At this point the write should have been failed. But make sure that the second -# brick is never an accused. +# At this point the write should have been failed. +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "Y" has_write_failed $dd_pid + +# Also make sure that the second brick is never an accused. md5sum_2=$(md5sum $B0/${V0}2/file1 | awk '{print $1}') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 6025a60..a85549b 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -6270,6 +6270,20 @@ out: return ret; } +uint64_t +afr_write_subvol_get (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + uint64_t write_subvol = 0; + + local = frame->local; + LOCK(&local->inode->lock); + write_subvol = local->inode_ctx->write_subvol; + UNLOCK (&local->inode->lock); + + return write_subvol; +} + int afr_write_subvol_set (call_frame_t *frame, xlator_t *this) { diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 0506a78..ff07319 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -167,6 +167,34 @@ afr_changelog_has_quorum (afr_local_t *local, xlator_t *this) return _gf_false; } + +gf_boolean_t +afr_is_write_subvol_valid (call_frame_t *frame, xlator_t *this) +{ + int i = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + uint64_t write_subvol = 0; + unsigned char *writable = NULL; + uint16_t datamap = 0; + + local = frame->local; + priv = this->private; + writable = alloca0 (priv->child_count); + + write_subvol = afr_write_subvol_get (frame, this); + datamap = (write_subvol & 0x00000000ffff0000) >> 16; + for (i = 0; i < priv->child_count; i++) { + if (datamap & (1 << i)) + writable[i] = 1; + + if (writable[i] && !local->transaction.failed_subvols[i]) + return _gf_true; + } + + return _gf_false; +} + int afr_transaction_fop (call_frame_t *frame, xlator_t *this) { @@ -189,6 +217,16 @@ afr_transaction_fop (call_frame_t *frame, xlator_t *this) afr_transaction_resume (frame, this); return 0; } + + /* Fail if at least one writeable brick isn't up.*/ + if (local->transaction.type == AFR_DATA_TRANSACTION && + !afr_is_write_subvol_valid (frame, this)) { + local->op_ret = -1; + local->op_errno = EIO; + afr_transaction_resume (frame, this); + return 0; + } + local->call_count = call_count; for (i = 0; i < priv->child_count; i++) { if (local->transaction.pre_op[i] && !failed_subvols[i]) { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 6be59dc..35928a9 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1199,6 +1199,9 @@ afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this, int __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx); +uint64_t +afr_write_subvol_get (call_frame_t *frame, xlator_t *this); + int afr_write_subvol_set (call_frame_t *frame, xlator_t *this); -- 1.8.3.1