|
|
d1681e |
From aed804b0f10c93ade88e109dd89a5b593ff1b1e5 Mon Sep 17 00:00:00 2001
|
|
|
d1681e |
From: Ravishankar N <ravishankar@redhat.com>
|
|
|
d1681e |
Date: Fri, 18 May 2018 15:38:29 +0530
|
|
|
d1681e |
Subject: [PATCH 275/282] afr: fix bug-1363721.t failure
|
|
|
d1681e |
|
|
|
d1681e |
Backport of https://review.gluster.org/#/c/20036/
|
|
|
d1681e |
|
|
|
d1681e |
Problem:
|
|
|
d1681e |
In the .t, when the only good brick was brought down, writes on the fd were
|
|
|
d1681e |
still succeeding on the bad bricks. The inflight split-brain check was
|
|
|
d1681e |
marking the write as failure but since the write succeeded on all the
|
|
|
d1681e |
bad bricks, afr_txn_nothing_failed() was set to true and we were
|
|
|
d1681e |
unwinding writev with success to DHT and then catching the failure in
|
|
|
d1681e |
post-op in the background.
|
|
|
d1681e |
|
|
|
d1681e |
Fix:
|
|
|
d1681e |
Don't wind the FOP phase if the write_subvol (which is populated with readable
|
|
|
d1681e |
subvols obtained in pre-op cbk) does not have at least 1 good brick which was up
|
|
|
d1681e |
when the transaction started.
|
|
|
d1681e |
|
|
|
d1681e |
Note: This fix is not related to brick muliplexing. I ran the .t
|
|
|
d1681e |
10 times with this fix and brick-mux enabled without any failures.
|
|
|
d1681e |
|
|
|
d1681e |
Change-Id: I915c9c366aa32cd342b1565827ca2d83cb02ae85
|
|
|
d1681e |
BUG: 1581057
|
|
|
d1681e |
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
d1681e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/139440
|
|
|
d1681e |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
d1681e |
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
d1681e |
---
|
|
|
d1681e |
tests/bugs/replicate/bug-1363721.t | 12 +++++++---
|
|
|
d1681e |
xlators/cluster/afr/src/afr-common.c | 14 ++++++++++++
|
|
|
d1681e |
xlators/cluster/afr/src/afr-transaction.c | 38 +++++++++++++++++++++++++++++++
|
|
|
d1681e |
xlators/cluster/afr/src/afr.h | 3 +++
|
|
|
d1681e |
4 files changed, 64 insertions(+), 3 deletions(-)
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/tests/bugs/replicate/bug-1363721.t b/tests/bugs/replicate/bug-1363721.t
|
|
|
d1681e |
index ec39889..0ed34d8 100644
|
|
|
d1681e |
--- a/tests/bugs/replicate/bug-1363721.t
|
|
|
d1681e |
+++ b/tests/bugs/replicate/bug-1363721.t
|
|
|
d1681e |
@@ -18,6 +18,10 @@ function size_increased {
|
|
|
d1681e |
fi
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
+function has_write_failed {
|
|
|
d1681e |
+ local pid=$1
|
|
|
d1681e |
+ if [ -d /proc/$pid ]; then echo "N"; else echo "Y"; fi
|
|
|
d1681e |
+}
|
|
|
d1681e |
TEST glusterd
|
|
|
d1681e |
TEST pidof glusterd
|
|
|
d1681e |
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
|
|
|
d1681e |
@@ -27,7 +31,7 @@ TEST $CLI volume set $V0 cluster.data-self-heal off
|
|
|
d1681e |
TEST $CLI volume set $V0 cluster.metadata-self-heal off
|
|
|
d1681e |
TEST $CLI volume set $V0 cluster.entry-self-heal off
|
|
|
d1681e |
TEST $CLI volume start $V0
|
|
|
d1681e |
-TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 --direct-io-mode=enable
|
|
|
d1681e |
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --direct-io-mode=enable $M0
|
|
|
d1681e |
|
|
|
d1681e |
cd $M0
|
|
|
d1681e |
|
|
|
d1681e |
@@ -67,8 +71,10 @@ sleep 3
|
|
|
d1681e |
# Now kill the second brick
|
|
|
d1681e |
kill_brick $V0 $H0 $B0/${V0}2
|
|
|
d1681e |
|
|
|
d1681e |
-# At this point the write should have been failed. But make sure that the second
|
|
|
d1681e |
-# brick is never an accused.
|
|
|
d1681e |
+# At this point the write should have been failed.
|
|
|
d1681e |
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "Y" has_write_failed $dd_pid
|
|
|
d1681e |
+
|
|
|
d1681e |
+# Also make sure that the second brick is never an accused.
|
|
|
d1681e |
|
|
|
d1681e |
md5sum_2=$(md5sum $B0/${V0}2/file1 | awk '{print $1}')
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
|
|
|
d1681e |
index 6025a60..a85549b 100644
|
|
|
d1681e |
--- a/xlators/cluster/afr/src/afr-common.c
|
|
|
d1681e |
+++ b/xlators/cluster/afr/src/afr-common.c
|
|
|
d1681e |
@@ -6270,6 +6270,20 @@ out:
|
|
|
d1681e |
return ret;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
+uint64_t
|
|
|
d1681e |
+afr_write_subvol_get (call_frame_t *frame, xlator_t *this)
|
|
|
d1681e |
+{
|
|
|
d1681e |
+ afr_local_t *local = NULL;
|
|
|
d1681e |
+ uint64_t write_subvol = 0;
|
|
|
d1681e |
+
|
|
|
d1681e |
+ local = frame->local;
|
|
|
d1681e |
+ LOCK(&local->inode->lock);
|
|
|
d1681e |
+ write_subvol = local->inode_ctx->write_subvol;
|
|
|
d1681e |
+ UNLOCK (&local->inode->lock);
|
|
|
d1681e |
+
|
|
|
d1681e |
+ return write_subvol;
|
|
|
d1681e |
+}
|
|
|
d1681e |
+
|
|
|
d1681e |
int
|
|
|
d1681e |
afr_write_subvol_set (call_frame_t *frame, xlator_t *this)
|
|
|
d1681e |
{
|
|
|
d1681e |
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
|
|
|
d1681e |
index 0506a78..ff07319 100644
|
|
|
d1681e |
--- a/xlators/cluster/afr/src/afr-transaction.c
|
|
|
d1681e |
+++ b/xlators/cluster/afr/src/afr-transaction.c
|
|
|
d1681e |
@@ -167,6 +167,34 @@ afr_changelog_has_quorum (afr_local_t *local, xlator_t *this)
|
|
|
d1681e |
return _gf_false;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
+
|
|
|
d1681e |
+gf_boolean_t
|
|
|
d1681e |
+afr_is_write_subvol_valid (call_frame_t *frame, xlator_t *this)
|
|
|
d1681e |
+{
|
|
|
d1681e |
+ int i = 0;
|
|
|
d1681e |
+ afr_local_t *local = NULL;
|
|
|
d1681e |
+ afr_private_t *priv = NULL;
|
|
|
d1681e |
+ uint64_t write_subvol = 0;
|
|
|
d1681e |
+ unsigned char *writable = NULL;
|
|
|
d1681e |
+ uint16_t datamap = 0;
|
|
|
d1681e |
+
|
|
|
d1681e |
+ local = frame->local;
|
|
|
d1681e |
+ priv = this->private;
|
|
|
d1681e |
+ writable = alloca0 (priv->child_count);
|
|
|
d1681e |
+
|
|
|
d1681e |
+ write_subvol = afr_write_subvol_get (frame, this);
|
|
|
d1681e |
+ datamap = (write_subvol & 0x00000000ffff0000) >> 16;
|
|
|
d1681e |
+ for (i = 0; i < priv->child_count; i++) {
|
|
|
d1681e |
+ if (datamap & (1 << i))
|
|
|
d1681e |
+ writable[i] = 1;
|
|
|
d1681e |
+
|
|
|
d1681e |
+ if (writable[i] && !local->transaction.failed_subvols[i])
|
|
|
d1681e |
+ return _gf_true;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+
|
|
|
d1681e |
+ return _gf_false;
|
|
|
d1681e |
+}
|
|
|
d1681e |
+
|
|
|
d1681e |
int
|
|
|
d1681e |
afr_transaction_fop (call_frame_t *frame, xlator_t *this)
|
|
|
d1681e |
{
|
|
|
d1681e |
@@ -189,6 +217,16 @@ afr_transaction_fop (call_frame_t *frame, xlator_t *this)
|
|
|
d1681e |
afr_transaction_resume (frame, this);
|
|
|
d1681e |
return 0;
|
|
|
d1681e |
}
|
|
|
d1681e |
+
|
|
|
d1681e |
+ /* Fail if at least one writeable brick isn't up.*/
|
|
|
d1681e |
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
|
|
|
d1681e |
+ !afr_is_write_subvol_valid (frame, this)) {
|
|
|
d1681e |
+ local->op_ret = -1;
|
|
|
d1681e |
+ local->op_errno = EIO;
|
|
|
d1681e |
+ afr_transaction_resume (frame, this);
|
|
|
d1681e |
+ return 0;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+
|
|
|
d1681e |
local->call_count = call_count;
|
|
|
d1681e |
for (i = 0; i < priv->child_count; i++) {
|
|
|
d1681e |
if (local->transaction.pre_op[i] && !failed_subvols[i]) {
|
|
|
d1681e |
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
|
|
|
d1681e |
index 6be59dc..35928a9 100644
|
|
|
d1681e |
--- a/xlators/cluster/afr/src/afr.h
|
|
|
d1681e |
+++ b/xlators/cluster/afr/src/afr.h
|
|
|
d1681e |
@@ -1199,6 +1199,9 @@ afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this,
|
|
|
d1681e |
int
|
|
|
d1681e |
__afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx);
|
|
|
d1681e |
|
|
|
d1681e |
+uint64_t
|
|
|
d1681e |
+afr_write_subvol_get (call_frame_t *frame, xlator_t *this);
|
|
|
d1681e |
+
|
|
|
d1681e |
int
|
|
|
d1681e |
afr_write_subvol_set (call_frame_t *frame, xlator_t *this);
|
|
|
d1681e |
|
|
|
d1681e |
--
|
|
|
d1681e |
1.8.3.1
|
|
|
d1681e |
|