From 58978b9bc2e82a39d42b529705946463297de8a0 Mon Sep 17 00:00:00 2001 From: Sunil Kumar Acharya Date: Wed, 5 Jul 2017 16:41:38 +0530 Subject: [PATCH 564/566] cluster/ec: Non-disruptive upgrade on EC volume fails Problem: Enabling optimistic changelog on EC volume was not handling node down scenarios appropriately resulting in volume data inaccessibility. Solution: Update dirty xattr appropriately on good bricks whenever nodes are down. This would fix the metadata information as part of heal and thus ensures data accessibility. >BUG: 1468261 >Change-Id: I08b0d28df386d9b2b49c3de84b4aac1c729ac057 >Signed-off-by: Sunil Kumar Acharya >Reviewed-on: https://review.gluster.org/17703 >Smoke: Gluster Build System >CentOS-regression: Gluster Build System >Reviewed-by: Pranith Kumar Karampuri BUG: 1465289 Change-Id: I08b0d28df386d9b2b49c3de84b4aac1c729ac057 Signed-off-by: Sunil Kumar Acharya Reviewed-on: https://code.engineering.redhat.com/gerrit/112278 Reviewed-by: Atin Mukherjee --- tests/basic/ec/ec-1468261.t | 96 ++++++++++++++++++++++++++++++++++++ tests/basic/ec/ec-background-heals.t | 1 + tests/bugs/cli/bug-1320388.t | 1 + tests/include.rc | 2 +- xlators/cluster/ec/src/ec-common.c | 5 +- 5 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 tests/basic/ec/ec-1468261.t diff --git a/tests/basic/ec/ec-1468261.t b/tests/basic/ec/ec-1468261.t new file mode 100644 index 0000000..9c4f981 --- /dev/null +++ b/tests/basic/ec/ec-1468261.t @@ -0,0 +1,96 @@ +#!/bin/bash +# +# This test case verifies handling node down scenario with optimistic +# changelog enabled on EC volume. +### + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup + +#cleate and start volume +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} +TEST $CLI volume set $V0 disperse.optimistic-change-log on +TEST $CLI volume start $V0 + +#Mount the volume +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 + +#Verify that all is good +TEST mkdir $M0/test_dir +TEST touch $M0/test_dir/file +sleep 2 +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}0/test_dir +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}1/test_dir +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}2/test_dir +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}3/test_dir +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}4/test_dir +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}5/test_dir + +#Touch a file and kill two bricks +pid0=`get_brick_pid $V0 $H0 $B0/${V0}0` +pid1=`get_brick_pid $V0 $H0 $B0/${V0}1` +TEST touch $M0/test_dir/new_file +kill $pid0 +kill $pid1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0 + +#Dirty should be set on up bricks +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}0/test_dir +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}1/test_dir +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}2/test_dir +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}3/test_dir +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}4/test_dir +EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}5/test_dir + +#Bring up the down bricks +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 + +#remove mount point contents +TEST rm -rf $M0"/*" 2>/dev/null + +# unmount and remount the volume +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST glusterfs -s $H0 --volfile-id $V0 $M0; + +#Create a tar file +TEST mkdir $M0/test_dir +for i in {1..3000};do +dd if=/dev/urandom of=$M0/test_dir/file-$i bs=1k count=10; +done +tar -cf $M0/test_dir.tar $M0/test_dir/ 2>/dev/null +rm -rf $M0/test_dir/ + +#Untar the tar file +tar -C $M0 -xf $M0/test_dir.tar 2>/dev/null& + +#Kill 1st and 2nd brick +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0 + +#Stop untaring +TEST kill %1 + +#Bring up the down bricks +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 + +#Wait for heal to complete +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +#Kill 3rd and 4th brick +TEST kill_brick $V0 $H0 $B0/${V0}3 +TEST kill_brick $V0 $H0 $B0/${V0}4 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0 + +#remove mount point contents +#this will fail if things are wrong +TEST rm -rf $M0"/*" 2>/dev/null + +cleanup diff --git a/tests/basic/ec/ec-background-heals.t b/tests/basic/ec/ec-background-heals.t index 28efc83..3bc6df7 100644 --- a/tests/basic/ec/ec-background-heals.t +++ b/tests/basic/ec/ec-background-heals.t @@ -16,6 +16,7 @@ TEST $CLI volume set $V0 performance.quick-read off TEST $CLI volume set $V0 performance.read-ahead off TEST $CLI volume set $V0 performance.io-cache off TEST $CLI volume set $V0 disperse.background-heals 0 +TEST $CLI volume set $V0 disperse.eager-lock off TEST $CLI volume start $V0 TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t index f6ea3d6..ca23ab8 100755 --- a/tests/bugs/cli/bug-1320388.t +++ b/tests/bugs/cli/bug-1320388.t @@ -28,6 +28,7 @@ ln $SSL_CERT $SSL_CA TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} +TEST $CLI volume set $V0 disperse.eager-lock off TEST $CLI volume start $V0 TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "^6$" ec_child_up_count $V0 0 diff --git a/tests/include.rc b/tests/include.rc index 21aabf5..3e61ce7 100644 --- a/tests/include.rc +++ b/tests/include.rc @@ -79,6 +79,7 @@ AUTH_REFRESH_INTERVAL=10 GRAPH_SWITCH_TIMEOUT=10 UNLINK_TIMEOUT=5 MDC_TIMEOUT=5 +IO_WAIT_TIMEOUT=5 LOGDIR=$(gluster --print-logdir) @@ -332,7 +333,6 @@ function _EXPECT_WITHIN() if [ $? -ne 0 ]; then break; fi - ## Check match success if [[ "$a" =~ "$e" ]]; then break; diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 9a977d7..7da88f3 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -2068,7 +2068,10 @@ ec_update_info(ec_lock_link_t *link) /* If we set the dirty flag for update fop, we have to unset it. * If fop has failed on some bricks, leave the dirty as marked. */ if (lock->unlock_now) { - if (!(ec->node_mask & ~lock->good_mask)) { + /* Ensure that nodes are up while doing final + * metadata update.*/ + if (!(ec->node_mask & ~lock->good_mask) && + !(ec->node_mask & ~ec->xl_up)) { if (ctx->dirty[0] != 0) { dirty[0] = -1; } -- 1.8.3.1