21ab4e
From 8c6bf26a112521284a62082f5796df8f9b504f54 Mon Sep 17 00:00:00 2001
21ab4e
From: karthik-us <ksubrahm@redhat.com>
21ab4e
Date: Fri, 31 Mar 2017 15:17:42 +0530
21ab4e
Subject: [PATCH 369/369] cluster/afr: Undo pending xattrs only on the up
21ab4e
 bricks
21ab4e
21ab4e
Problem:
21ab4e
While doing conservative merge, even if a brick is down, it will reset
21ab4e
the pending xattr on that. When that brick comes up, as part of the
21ab4e
heal, it will consider this brick as the source and removes the entries
21ab4e
on the other bricks, which leads to data loss.
21ab4e
21ab4e
Fix: Undo pending only for the bricks which are up.
21ab4e
21ab4e
> Reviewed-on: https://review.gluster.org/16913
21ab4e
> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Ravishankar N <ravishankar@redhat.com>
21ab4e
21ab4e
Change-Id: Ie166ac2e738f0b231b42082855f3c1c57797e7a7
21ab4e
BUG: 1437773
21ab4e
Signed-off-by: karthik-us <ksubrahm@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/102101
21ab4e
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 .../bug-1433571-undo-pending-only-on-up-bricks.t   | 89 ++++++++++++++++++++++
21ab4e
 xlators/cluster/afr/src/afr-self-heal-common.c     |  2 +-
21ab4e
 2 files changed, 90 insertions(+), 1 deletion(-)
21ab4e
 create mode 100644 tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
21ab4e
21ab4e
diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
21ab4e
new file mode 100644
21ab4e
index 0000000..271abb4
21ab4e
--- /dev/null
21ab4e
+++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
21ab4e
@@ -0,0 +1,89 @@
21ab4e
+#!/bin/bash
21ab4e
+. $(dirname $0)/../../include.rc
21ab4e
+. $(dirname $0)/../../volume.rc
21ab4e
+cleanup;
21ab4e
+
21ab4e
+TEST glusterd
21ab4e
+TEST pidof glusterd
21ab4e
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
21ab4e
+TEST $CLI volume start $V0
21ab4e
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
21ab4e
+
21ab4e
+# Disable self-heal-daemon, client-side-heal and set quorum-type to none
21ab4e
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
21ab4e
+TEST $CLI volume set $V0 cluster.data-self-heal off
21ab4e
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
21ab4e
+TEST $CLI volume set $V0 cluster.entry-self-heal off
21ab4e
+TEST $CLI volume set $V0 cluster.quorum-type none
21ab4e
+
21ab4e
+#Kill bricks 0 & 1 and create a file to have pending entry for 0 & 1 on brick 2
21ab4e
+TEST kill_brick $V0 $H0 $B0/${V0}0
21ab4e
+TEST kill_brick $V0 $H0 $B0/${V0}1
21ab4e
+echo "file 1" >> $M0/f1
21ab4e
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
21ab4e
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
21ab4e
+
21ab4e
+TEST $CLI volume start $V0 force
21ab4e
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
21ab4e
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
21ab4e
+
21ab4e
+#Kill bricks 1 & 2 and create a file to have pending entry for 1 & 2 on brick 0
21ab4e
+TEST kill_brick $V0 $H0 $B0/${V0}1
21ab4e
+TEST kill_brick $V0 $H0 $B0/${V0}2
21ab4e
+echo "file 2" >> $M0/f2
21ab4e
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
21ab4e
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
21ab4e
+
21ab4e
+TEST $CLI volume start $V0 force
21ab4e
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
21ab4e
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
21ab4e
+
21ab4e
+#Kill bricks 2 & 0 and create a file to have pending entry for 2 & 0 on brick 1
21ab4e
+TEST kill_brick $V0 $H0 $B0/${V0}2
21ab4e
+TEST kill_brick $V0 $H0 $B0/${V0}0
21ab4e
+echo "file 3" >> $M0/f3
21ab4e
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
21ab4e
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
21ab4e
+
21ab4e
+TEST $CLI volume start $V0 force
21ab4e
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
21ab4e
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
21ab4e
+
21ab4e
+#Kill brick 0 and turn on the client side heal and do ls to trigger the heal.
21ab4e
+#The pending xattrs on bricks 1 & 2 should have pending entry on brick 0.
21ab4e
+TEST kill_brick $V0 $H0 $B0/${V0}0
21ab4e
+TEST $CLI volume set $V0 cluster.data-self-heal on
21ab4e
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
21ab4e
+TEST $CLI volume set $V0 cluster.entry-self-heal on
21ab4e
+
21ab4e
+TEST ls $M0
21ab4e
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
21ab4e
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
21ab4e
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
21ab4e
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
21ab4e
+
21ab4e
+#Bring back all the bricks and trigger the heal again by doing ls. Now the
21ab4e
+#pending xattrs on all the bricks should be 0.
21ab4e
+TEST $CLI volume start $V0 force
21ab4e
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
21ab4e
+TEST ls $M0
21ab4e
+
21ab4e
+TEST cat $M0/f1
21ab4e
+TEST cat $M0/f2
21ab4e
+TEST cat $M0/f3
21ab4e
+
21ab4e
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
21ab4e
+
21ab4e
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
21ab4e
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
21ab4e
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
21ab4e
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
21ab4e
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
21ab4e
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
21ab4e
+
21ab4e
+#Check whether all the bricks contains all the 3 files.
21ab4e
+EXPECT "3" echo $(ls $B0/${V0}0 | wc -l)
21ab4e
+EXPECT "3" echo $(ls $B0/${V0}1 | wc -l)
21ab4e
+EXPECT "3" echo $(ls $B0/${V0}2 | wc -l)
21ab4e
+
21ab4e
+cleanup;
21ab4e
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
21ab4e
index 8b2c3fa..9f6cbcd 100644
21ab4e
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
21ab4e
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
21ab4e
@@ -205,7 +205,7 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
21ab4e
 				output_matrix[i][j] = 1;
21ab4e
                                 if (type == AFR_ENTRY_TRANSACTION)
21ab4e
                                         full_heal_mtx_out[i][j] = 1;
21ab4e
-			} else {
21ab4e
+			} else if (locked_on[j]) {
21ab4e
 				output_matrix[i][j] = -input_matrix[i][j];
21ab4e
                                 if (type == AFR_ENTRY_TRANSACTION)
21ab4e
                                         full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j];
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e