|
|
21ab4e |
From 58978b9bc2e82a39d42b529705946463297de8a0 Mon Sep 17 00:00:00 2001
|
|
|
21ab4e |
From: Sunil Kumar Acharya <sheggodu@redhat.com>
|
|
|
21ab4e |
Date: Wed, 5 Jul 2017 16:41:38 +0530
|
|
|
21ab4e |
Subject: [PATCH 564/566] cluster/ec: Non-disruptive upgrade on EC volume fails
|
|
|
21ab4e |
|
|
|
21ab4e |
Problem:
|
|
|
21ab4e |
Enabling optimistic changelog on EC volume was not
|
|
|
21ab4e |
handling node down scenarios appropriately resulting
|
|
|
21ab4e |
in volume data inaccessibility.
|
|
|
21ab4e |
|
|
|
21ab4e |
Solution:
|
|
|
21ab4e |
Update dirty xattr appropriately on good bricks whenever
|
|
|
21ab4e |
nodes are down. This would fix the metadata information
|
|
|
21ab4e |
as part of heal and thus ensures data accessibility.
|
|
|
21ab4e |
|
|
|
21ab4e |
>BUG: 1468261
|
|
|
21ab4e |
>Change-Id: I08b0d28df386d9b2b49c3de84b4aac1c729ac057
|
|
|
21ab4e |
>Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
|
|
|
21ab4e |
>Reviewed-on: https://review.gluster.org/17703
|
|
|
21ab4e |
>Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
>Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
21ab4e |
|
|
|
21ab4e |
BUG: 1465289
|
|
|
21ab4e |
Change-Id: I08b0d28df386d9b2b49c3de84b4aac1c729ac057
|
|
|
21ab4e |
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
|
|
|
21ab4e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/112278
|
|
|
21ab4e |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
---
|
|
|
21ab4e |
tests/basic/ec/ec-1468261.t | 96 ++++++++++++++++++++++++++++++++++++
|
|
|
21ab4e |
tests/basic/ec/ec-background-heals.t | 1 +
|
|
|
21ab4e |
tests/bugs/cli/bug-1320388.t | 1 +
|
|
|
21ab4e |
tests/include.rc | 2 +-
|
|
|
21ab4e |
xlators/cluster/ec/src/ec-common.c | 5 +-
|
|
|
21ab4e |
5 files changed, 103 insertions(+), 2 deletions(-)
|
|
|
21ab4e |
create mode 100644 tests/basic/ec/ec-1468261.t
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/tests/basic/ec/ec-1468261.t b/tests/basic/ec/ec-1468261.t
|
|
|
21ab4e |
new file mode 100644
|
|
|
21ab4e |
index 0000000..9c4f981
|
|
|
21ab4e |
--- /dev/null
|
|
|
21ab4e |
+++ b/tests/basic/ec/ec-1468261.t
|
|
|
21ab4e |
@@ -0,0 +1,96 @@
|
|
|
21ab4e |
+#!/bin/bash
|
|
|
21ab4e |
+#
|
|
|
21ab4e |
+# This test case verifies handling node down scenario with optimistic
|
|
|
21ab4e |
+# changelog enabled on EC volume.
|
|
|
21ab4e |
+###
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+. $(dirname $0)/../../include.rc
|
|
|
21ab4e |
+. $(dirname $0)/../../volume.rc
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+cleanup
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#cleate and start volume
|
|
|
21ab4e |
+TEST glusterd
|
|
|
21ab4e |
+TEST pidof glusterd
|
|
|
21ab4e |
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
|
|
|
21ab4e |
+TEST $CLI volume set $V0 disperse.optimistic-change-log on
|
|
|
21ab4e |
+TEST $CLI volume start $V0
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Mount the volume
|
|
|
21ab4e |
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
|
|
|
21ab4e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Verify that all is good
|
|
|
21ab4e |
+TEST mkdir $M0/test_dir
|
|
|
21ab4e |
+TEST touch $M0/test_dir/file
|
|
|
21ab4e |
+sleep 2
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}0/test_dir
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}1/test_dir
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}2/test_dir
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}3/test_dir
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}4/test_dir
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}5/test_dir
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Touch a file and kill two bricks
|
|
|
21ab4e |
+pid0=`get_brick_pid $V0 $H0 $B0/${V0}0`
|
|
|
21ab4e |
+pid1=`get_brick_pid $V0 $H0 $B0/${V0}1`
|
|
|
21ab4e |
+TEST touch $M0/test_dir/new_file
|
|
|
21ab4e |
+kill $pid0
|
|
|
21ab4e |
+kill $pid1
|
|
|
21ab4e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Dirty should be set on up bricks
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}0/test_dir
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}1/test_dir
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}2/test_dir
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}3/test_dir
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}4/test_dir
|
|
|
21ab4e |
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}5/test_dir
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Bring up the down bricks
|
|
|
21ab4e |
+TEST $CLI volume start $V0 force
|
|
|
21ab4e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#remove mount point contents
|
|
|
21ab4e |
+TEST rm -rf $M0"/*" 2>/dev/null
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+# unmount and remount the volume
|
|
|
21ab4e |
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
21ab4e |
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Create a tar file
|
|
|
21ab4e |
+TEST mkdir $M0/test_dir
|
|
|
21ab4e |
+for i in {1..3000};do
|
|
|
21ab4e |
+dd if=/dev/urandom of=$M0/test_dir/file-$i bs=1k count=10;
|
|
|
21ab4e |
+done
|
|
|
21ab4e |
+tar -cf $M0/test_dir.tar $M0/test_dir/ 2>/dev/null
|
|
|
21ab4e |
+rm -rf $M0/test_dir/
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Untar the tar file
|
|
|
21ab4e |
+tar -C $M0 -xf $M0/test_dir.tar 2>/dev/null&
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Kill 1st and 2nd brick
|
|
|
21ab4e |
+TEST kill_brick $V0 $H0 $B0/${V0}0
|
|
|
21ab4e |
+TEST kill_brick $V0 $H0 $B0/${V0}1
|
|
|
21ab4e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Stop untaring
|
|
|
21ab4e |
+TEST kill %1
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Bring up the down bricks
|
|
|
21ab4e |
+TEST $CLI volume start $V0 force
|
|
|
21ab4e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Wait for heal to complete
|
|
|
21ab4e |
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#Kill 3rd and 4th brick
|
|
|
21ab4e |
+TEST kill_brick $V0 $H0 $B0/${V0}3
|
|
|
21ab4e |
+TEST kill_brick $V0 $H0 $B0/${V0}4
|
|
|
21ab4e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#remove mount point contents
|
|
|
21ab4e |
+#this will fail if things are wrong
|
|
|
21ab4e |
+TEST rm -rf $M0"/*" 2>/dev/null
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+cleanup
|
|
|
21ab4e |
diff --git a/tests/basic/ec/ec-background-heals.t b/tests/basic/ec/ec-background-heals.t
|
|
|
21ab4e |
index 28efc83..3bc6df7 100644
|
|
|
21ab4e |
--- a/tests/basic/ec/ec-background-heals.t
|
|
|
21ab4e |
+++ b/tests/basic/ec/ec-background-heals.t
|
|
|
21ab4e |
@@ -16,6 +16,7 @@ TEST $CLI volume set $V0 performance.quick-read off
|
|
|
21ab4e |
TEST $CLI volume set $V0 performance.read-ahead off
|
|
|
21ab4e |
TEST $CLI volume set $V0 performance.io-cache off
|
|
|
21ab4e |
TEST $CLI volume set $V0 disperse.background-heals 0
|
|
|
21ab4e |
+TEST $CLI volume set $V0 disperse.eager-lock off
|
|
|
21ab4e |
TEST $CLI volume start $V0
|
|
|
21ab4e |
|
|
|
21ab4e |
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
|
|
|
21ab4e |
diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t
|
|
|
21ab4e |
index f6ea3d6..ca23ab8 100755
|
|
|
21ab4e |
--- a/tests/bugs/cli/bug-1320388.t
|
|
|
21ab4e |
+++ b/tests/bugs/cli/bug-1320388.t
|
|
|
21ab4e |
@@ -28,6 +28,7 @@ ln $SSL_CERT $SSL_CA
|
|
|
21ab4e |
TEST glusterd
|
|
|
21ab4e |
TEST pidof glusterd
|
|
|
21ab4e |
TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
|
|
|
21ab4e |
+TEST $CLI volume set $V0 disperse.eager-lock off
|
|
|
21ab4e |
TEST $CLI volume start $V0
|
|
|
21ab4e |
TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
|
|
|
21ab4e |
EXPECT_WITHIN $CHILD_UP_TIMEOUT "^6$" ec_child_up_count $V0 0
|
|
|
21ab4e |
diff --git a/tests/include.rc b/tests/include.rc
|
|
|
21ab4e |
index 21aabf5..3e61ce7 100644
|
|
|
21ab4e |
--- a/tests/include.rc
|
|
|
21ab4e |
+++ b/tests/include.rc
|
|
|
21ab4e |
@@ -79,6 +79,7 @@ AUTH_REFRESH_INTERVAL=10
|
|
|
21ab4e |
GRAPH_SWITCH_TIMEOUT=10
|
|
|
21ab4e |
UNLINK_TIMEOUT=5
|
|
|
21ab4e |
MDC_TIMEOUT=5
|
|
|
21ab4e |
+IO_WAIT_TIMEOUT=5
|
|
|
21ab4e |
|
|
|
21ab4e |
LOGDIR=$(gluster --print-logdir)
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -332,7 +333,6 @@ function _EXPECT_WITHIN()
|
|
|
21ab4e |
if [ $? -ne 0 ]; then
|
|
|
21ab4e |
break;
|
|
|
21ab4e |
fi
|
|
|
21ab4e |
-
|
|
|
21ab4e |
## Check match success
|
|
|
21ab4e |
if [[ "$a" =~ "$e" ]]; then
|
|
|
21ab4e |
break;
|
|
|
21ab4e |
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
|
|
|
21ab4e |
index 9a977d7..7da88f3 100644
|
|
|
21ab4e |
--- a/xlators/cluster/ec/src/ec-common.c
|
|
|
21ab4e |
+++ b/xlators/cluster/ec/src/ec-common.c
|
|
|
21ab4e |
@@ -2068,7 +2068,10 @@ ec_update_info(ec_lock_link_t *link)
|
|
|
21ab4e |
/* If we set the dirty flag for update fop, we have to unset it.
|
|
|
21ab4e |
* If fop has failed on some bricks, leave the dirty as marked. */
|
|
|
21ab4e |
if (lock->unlock_now) {
|
|
|
21ab4e |
- if (!(ec->node_mask & ~lock->good_mask)) {
|
|
|
21ab4e |
+ /* Ensure that nodes are up while doing final
|
|
|
21ab4e |
+ * metadata update.*/
|
|
|
21ab4e |
+ if (!(ec->node_mask & ~lock->good_mask) &&
|
|
|
21ab4e |
+ !(ec->node_mask & ~ec->xl_up)) {
|
|
|
21ab4e |
if (ctx->dirty[0] != 0) {
|
|
|
21ab4e |
dirty[0] = -1;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
--
|
|
|
21ab4e |
1.8.3.1
|
|
|
21ab4e |
|