|
|
12a457 |
From ebcd815d4ae36228cf79c393e8d58aa49d5300bb Mon Sep 17 00:00:00 2001
|
|
|
12a457 |
From: Anuradha Talur <atalur@redhat.com>
|
|
|
12a457 |
Date: Fri, 13 May 2016 15:34:06 +0530
|
|
|
12a457 |
Subject: [PATCH 150/158] cluster/afr : Do post-op in case of symmetric errors
|
|
|
12a457 |
|
|
|
12a457 |
Backport of: http://review.gluster.org/#/c/14310/
|
|
|
12a457 |
|
|
|
12a457 |
In afr_changelog_post_op_now(), if there was any error,
|
|
|
12a457 |
meaning op_ret < 0, post-op was not being done even when
|
|
|
12a457 |
the errors were symmetric and there were no "failed
|
|
|
12a457 |
subvols".
|
|
|
12a457 |
|
|
|
12a457 |
Fix:
|
|
|
12a457 |
When the errors are symmetric, perform post-op.
|
|
|
12a457 |
|
|
|
12a457 |
How was the bug found :
|
|
|
12a457 |
In a 1 X 3 volume with shard and write behind on
|
|
|
12a457 |
when writes were done into a file with one brick down,
|
|
|
12a457 |
the trusted.afr.dirty xattr's value for .shard directory
|
|
|
12a457 |
would keep increasing as post op was not done but pre-op was.
|
|
|
12a457 |
This incorrectly showed .shard to be in split-brain.
|
|
|
12a457 |
|
|
|
12a457 |
RCA:
|
|
|
12a457 |
When WB is on, due to multiple writes being sent on
|
|
|
12a457 |
offset lying in the same shard, chances are that
|
|
|
12a457 |
same shard file will be created more than once
|
|
|
12a457 |
with the second one failing with op_ret < 0
|
|
|
12a457 |
and op_errno = EEXIST.
|
|
|
12a457 |
|
|
|
12a457 |
As op_ret was negative, afr wouldn't do post-op,
|
|
|
12a457 |
leading to no decrement of trusted.afr.dirty xattr.
|
|
|
12a457 |
Thus showing .shard directory to be in split-brain.
|
|
|
12a457 |
|
|
|
12a457 |
>Change-Id: I711bdeaa1397244e6a7790e96f0c84501798fc59
|
|
|
12a457 |
>BUG: 1335652
|
|
|
12a457 |
>Signed-off-by: Anuradha Talur <atalur@redhat.com>
|
|
|
12a457 |
|
|
|
12a457 |
Change-Id: I711bdeaa1397244e6a7790e96f0c84501798fc59
|
|
|
12a457 |
BUG: 1332949
|
|
|
12a457 |
Signed-off-by: Anuradha Talur <atalur@redhat.com>
|
|
|
12a457 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/74280
|
|
|
12a457 |
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
|
|
|
12a457 |
Tested-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
|
|
|
12a457 |
---
|
|
|
12a457 |
tests/bugs/replicate/bug-1335652.t | 29 +++++++++++++++++++++++++++++
|
|
|
12a457 |
xlators/cluster/afr/src/afr-transaction.c | 8 ++++++--
|
|
|
12a457 |
2 files changed, 35 insertions(+), 2 deletions(-)
|
|
|
12a457 |
create mode 100644 tests/bugs/replicate/bug-1335652.t
|
|
|
12a457 |
|
|
|
12a457 |
diff --git a/tests/bugs/replicate/bug-1335652.t b/tests/bugs/replicate/bug-1335652.t
|
|
|
12a457 |
new file mode 100644
|
|
|
12a457 |
index 0000000..653a1b0
|
|
|
12a457 |
--- /dev/null
|
|
|
12a457 |
+++ b/tests/bugs/replicate/bug-1335652.t
|
|
|
12a457 |
@@ -0,0 +1,29 @@
|
|
|
12a457 |
+#!/bin/bash
|
|
|
12a457 |
+. $(dirname $0)/../../include.rc
|
|
|
12a457 |
+. $(dirname $0)/../../volume.rc
|
|
|
12a457 |
+cleanup;
|
|
|
12a457 |
+
|
|
|
12a457 |
+TEST glusterd
|
|
|
12a457 |
+TEST pidof glusterd
|
|
|
12a457 |
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
|
|
|
12a457 |
+TEST $CLI volume set $V0 shard on
|
|
|
12a457 |
+TEST $CLI volume set $V0 self-heal-daemon off
|
|
|
12a457 |
+TEST $CLI volume set $V0 data-self-heal off
|
|
|
12a457 |
+TEST $CLI volume set $V0 entry-self-heal off
|
|
|
12a457 |
+TEST $CLI volume set $V0 metadata-self-heal off
|
|
|
12a457 |
+TEST $CLI volume start $V0
|
|
|
12a457 |
+
|
|
|
12a457 |
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
|
|
|
12a457 |
+
|
|
|
12a457 |
+#Kill the zero'th brick so that 1st and 2nd get marked dirty
|
|
|
12a457 |
+TEST kill_brick $V0 $H0 $B0/${V0}0
|
|
|
12a457 |
+
|
|
|
12a457 |
+TEST dd if=/dev/urandom of=$M0/file bs=10MB count=20
|
|
|
12a457 |
+
|
|
|
12a457 |
+#At any point value of dirty should not be greater than 0 on source bricks
|
|
|
12a457 |
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}1/.shard
|
|
|
12a457 |
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2/.shard
|
|
|
12a457 |
+
|
|
|
12a457 |
+rm -rf $M0/file;
|
|
|
12a457 |
+
|
|
|
12a457 |
+cleanup;
|
|
|
12a457 |
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
|
|
|
12a457 |
index 73de030..d83a45c 100644
|
|
|
12a457 |
--- a/xlators/cluster/afr/src/afr-transaction.c
|
|
|
12a457 |
+++ b/xlators/cluster/afr/src/afr-transaction.c
|
|
|
12a457 |
@@ -580,10 +580,14 @@ afr_handle_symmetric_errors (call_frame_t *frame, xlator_t *this)
|
|
|
12a457 |
}
|
|
|
12a457 |
i_errno = local->replies[i].op_errno;
|
|
|
12a457 |
|
|
|
12a457 |
- if (i_errno == ENOTCONN) {
|
|
|
12a457 |
+ if (i_errno == ENOTCONN || i_errno == EDQUOT ||
|
|
|
12a457 |
+ i_errno == ENOSPC) {
|
|
|
12a457 |
/* ENOTCONN is not a symmetric error. We do not
|
|
|
12a457 |
know if the operation was performed on the
|
|
|
12a457 |
backend or not.
|
|
|
12a457 |
+ * Before reaching EDQUOT and ENOSPC, each brick would
|
|
|
12a457 |
+ * have written some amount of data, hence this is not
|
|
|
12a457 |
+ * symmetric error.
|
|
|
12a457 |
*/
|
|
|
12a457 |
matching_errors = _gf_false;
|
|
|
12a457 |
break;
|
|
|
12a457 |
@@ -760,7 +764,7 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
|
|
|
12a457 |
else
|
|
|
12a457 |
need_undirty = _gf_true;
|
|
|
12a457 |
|
|
|
12a457 |
- if (local->op_ret < 0) {
|
|
|
12a457 |
+ if (local->op_ret < 0 && !nothing_failed) {
|
|
|
12a457 |
afr_changelog_post_op_done (frame, this);
|
|
|
12a457 |
goto out;
|
|
|
12a457 |
}
|
|
|
12a457 |
--
|
|
|
12a457 |
1.7.1
|
|
|
12a457 |
|