12a457
From ebcd815d4ae36228cf79c393e8d58aa49d5300bb Mon Sep 17 00:00:00 2001
12a457
From: Anuradha Talur <atalur@redhat.com>
12a457
Date: Fri, 13 May 2016 15:34:06 +0530
12a457
Subject: [PATCH 150/158] cluster/afr : Do post-op in case of symmetric errors
12a457
12a457
        Backport of: http://review.gluster.org/#/c/14310/
12a457
12a457
In afr_changelog_post_op_now(), if there was any error,
12a457
meaning op_ret < 0, post-op was not being done even when
12a457
the errors were symmetric and there were no "failed
12a457
subvols".
12a457
12a457
Fix:
12a457
When the errors are symmetric, perform post-op.
12a457
12a457
How was the bug found :
12a457
In a 1 X 3 volume with shard and write behind on
12a457
when writes were done into a file with one brick down,
12a457
the trusted.afr.dirty xattr's value for .shard directory
12a457
would keep increasing as post op was not done but pre-op was.
12a457
This incorrectly showed .shard to be in split-brain.
12a457
12a457
RCA:
12a457
When WB is on, due to multiple writes being sent on
12a457
offset lying in the same shard, chances are that
12a457
same shard file will be created more than once
12a457
with the second one failing with op_ret < 0
12a457
and op_errno = EEXIST.
12a457
12a457
As op_ret was negative, afr wouldn't do post-op,
12a457
leading to no decrement of trusted.afr.dirty xattr.
12a457
Thus showing .shard directory to be in split-brain.
12a457
12a457
        >Change-Id: I711bdeaa1397244e6a7790e96f0c84501798fc59
12a457
        >BUG: 1335652
12a457
        >Signed-off-by: Anuradha Talur <atalur@redhat.com>
12a457
12a457
Change-Id: I711bdeaa1397244e6a7790e96f0c84501798fc59
12a457
BUG: 1332949
12a457
Signed-off-by: Anuradha Talur <atalur@redhat.com>
12a457
Reviewed-on: https://code.engineering.redhat.com/gerrit/74280
12a457
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
12a457
Tested-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
12a457
---
12a457
 tests/bugs/replicate/bug-1335652.t        |   29 +++++++++++++++++++++++++++++
12a457
 xlators/cluster/afr/src/afr-transaction.c |    8 ++++++--
12a457
 2 files changed, 35 insertions(+), 2 deletions(-)
12a457
 create mode 100644 tests/bugs/replicate/bug-1335652.t
12a457
12a457
diff --git a/tests/bugs/replicate/bug-1335652.t b/tests/bugs/replicate/bug-1335652.t
12a457
new file mode 100644
12a457
index 0000000..653a1b0
12a457
--- /dev/null
12a457
+++ b/tests/bugs/replicate/bug-1335652.t
12a457
@@ -0,0 +1,29 @@
12a457
+#!/bin/bash
12a457
+. $(dirname $0)/../../include.rc
12a457
+. $(dirname $0)/../../volume.rc
12a457
+cleanup;
12a457
+
12a457
+TEST glusterd
12a457
+TEST pidof glusterd
12a457
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
12a457
+TEST $CLI volume set $V0 shard on
12a457
+TEST $CLI volume set $V0 self-heal-daemon off
12a457
+TEST $CLI volume set $V0 data-self-heal off
12a457
+TEST $CLI volume set $V0 entry-self-heal off
12a457
+TEST $CLI volume set $V0 metadata-self-heal off
12a457
+TEST $CLI volume start $V0
12a457
+
12a457
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
12a457
+
12a457
+#Kill the zero'th brick so that 1st and 2nd get marked dirty
12a457
+TEST kill_brick $V0 $H0 $B0/${V0}0
12a457
+
12a457
+TEST dd if=/dev/urandom of=$M0/file bs=10MB count=20
12a457
+
12a457
+#At any point value of dirty should not be greater than 0 on source bricks
12a457
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}1/.shard
12a457
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2/.shard
12a457
+
12a457
+rm -rf $M0/file;
12a457
+
12a457
+cleanup;
12a457
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
12a457
index 73de030..d83a45c 100644
12a457
--- a/xlators/cluster/afr/src/afr-transaction.c
12a457
+++ b/xlators/cluster/afr/src/afr-transaction.c
12a457
@@ -580,10 +580,14 @@ afr_handle_symmetric_errors (call_frame_t *frame, xlator_t *this)
12a457
 		}
12a457
 		i_errno = local->replies[i].op_errno;
12a457
 
12a457
-		if (i_errno == ENOTCONN) {
12a457
+		if (i_errno == ENOTCONN || i_errno == EDQUOT ||
12a457
+                    i_errno == ENOSPC) {
12a457
 			/* ENOTCONN is not a symmetric error. We do not
12a457
 			   know if the operation was performed on the
12a457
 			   backend or not.
12a457
+			*  Before reaching EDQUOT and ENOSPC, each brick would
12a457
+			*  have written some amount of data, hence this is not
12a457
+			*  symmetric error.
12a457
 			*/
12a457
 			matching_errors = _gf_false;
12a457
 			break;
12a457
@@ -760,7 +764,7 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
12a457
 	else
12a457
 		need_undirty = _gf_true;
12a457
 
12a457
-        if (local->op_ret < 0) {
12a457
+        if (local->op_ret < 0 && !nothing_failed) {
12a457
                 afr_changelog_post_op_done (frame, this);
12a457
                 goto out;
12a457
         }
12a457
-- 
12a457
1.7.1
12a457