From b7b17674507090e9b161a64c3d5a6f77224fa608 Mon Sep 17 00:00:00 2001 From: Anuradha Talur Date: Fri, 3 Jul 2015 11:35:46 +0530 Subject: [PATCH 203/212] cluster/afr : expunge first, impunge next in entry selfheal Backport of: http://review.gluster.org/11544/ When entry self-heals are performed, the files/directories that are to be expunged should be removed first and then impunge should be done. Consider the following scenario : A volume with 2 bricks : b0 and b1. 1) With following hierarchy on both bricks: olddir |__ oldfile 2) Bring down b1 and do 'mv olddir newdir'. 3) Bring up b1 and self-heal. 4) Without patch, during self-heal the events occur in following order, a) Creation of newdir on the sink brick. Notice that gfid of olddir and newdir are same. As a result of which gfid-link file in .glusterfs directory still points to olddir and not to newdir. b) Deletion of olddir on the sink brick. As a part of this deletion, the gfid link file is also deleted. Now, there is no link file pointing to newdir. 5) Files under newdir will not get listed as part of readdir. To tackle this kind of scenario, an expunge should be done first and impunge later; which is the purpose of this patch. Change-Id: Idc8546f652adf11a13784ff989077cf79986bbd5 BUG: 1231732 Reviewed-on: http://review.gluster.org/11498 Reviewed-by: Ravishankar N Tested-by: Gluster Build System Reviewed-by: Krutika Dhananjay Reviewed-by: Pranith Kumar Karampuri Signed-off-by: Anuradha Talur Reviewed-on: https://code.engineering.redhat.com/gerrit/52357 Reviewed-by: Ravishankar Narayanankutty Tested-by: Ravishankar Narayanankutty --- tests/bugs/replicate/bug-1130892.t | 2 + tests/bugs/replicate/bug-1238508-self-heal.t | 51 +++++++++++++++++++++++++ xlators/cluster/afr/src/afr-self-heal-entry.c | 7 ++- 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 tests/bugs/replicate/bug-1238508-self-heal.t diff --git a/tests/bugs/replicate/bug-1130892.t b/tests/bugs/replicate/bug-1130892.t index 945ee49..7442ab8 100644 --- a/tests/bugs/replicate/bug-1130892.t +++ b/tests/bugs/replicate/bug-1130892.t @@ -54,6 +54,8 @@ EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$ TEST gluster volume set $V0 self-heal-daemon on EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two diff --git a/tests/bugs/replicate/bug-1238508-self-heal.t b/tests/bugs/replicate/bug-1238508-self-heal.t new file mode 100644 index 0000000..24fb07d --- /dev/null +++ b/tests/bugs/replicate/bug-1238508-self-heal.t @@ -0,0 +1,51 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 + +# Disable self-heal-daemon +TEST $CLI volume set $V0 cluster.self-heal-daemon off + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; + +TEST mkdir $M0/olddir; +TEST `echo "some-data" > $M0/olddir/oldfile` + +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mv $M0/olddir/oldfile $M0/olddir/newfile; +TEST mv $M0/olddir $M0/newdir; + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 + +# Test if the files are present on both the bricks +EXPECT "newdir" ls $B0/${V0}0/ +EXPECT "newdir" ls $B0/${V0}1/ +EXPECT "newfile" ls $B0/${V0}0/newdir/ +EXPECT "newfile" ls $B0/${V0}1/newdir/ + +# Test if gfid-link files in .glusterfs also provide correct info +brick0gfid=$(gf_get_gfid_backend_file_path $B0/${V0}0 newdir) +brick1gfid=$(gf_get_gfid_backend_file_path $B0/${V0}1 newdir) +EXPECT "newfile" ls $brick0gfid +EXPECT "newfile" ls $brick1gfid + +# Test if the files are accessible from the mount +EXPECT "newdir" ls $M0/ +EXPECT "newfile" ls $M0/newdir + +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 13a280f..f192931 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -589,17 +589,20 @@ afr_selfheal_entry_do (call_frame_t *frame, xlator_t *this, fd_t *fd, uuid_utoa (fd->inode->gfid)); for (i = 0; i < priv->child_count; i++) { - if (i != source && !healed_sinks[i]) + if (!healed_sinks[i]) continue; ret = afr_selfheal_entry_do_subvol (frame, this, fd, i); if (ret == -1) { /* gfid or type mismatch. */ mismatch = _gf_true; - continue; + ret = 0; } if (ret) break; } + if (!ret && source != -1) + ret = afr_selfheal_entry_do_subvol (frame, this, fd, source); + if (mismatch == _gf_true) /* undo pending will be skipped */ ret = -1; -- 1.7.1