From 6fb1804d113ae996e085ef0f23fa8908d167f006 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Thu, 14 Jun 2018 12:59:06 +0530 Subject: [PATCH 299/305] afr: heal gfids when file is not present on all bricks Backport of: https://review.gluster.org/#/c/20271/ commit f73814ad08d552d94d0139b2592175d206e7a166 (rhgs) introduced a regression wherein if a file is present in only 1 brick of replica *and* doesn't have a gfid associated with it, it doesn't get healed upon the next lookup from the client. Fix it. Change-Id: I7d1111dcb45b1b8b8340a7d02558f05df70aa599 BUG: 1592666 Signed-off-by: Ravishankar N Reviewed-on: https://code.engineering.redhat.com/gerrit/141899 Tested-by: RHGS Build Bot Reviewed-by: Karthik Subrahmanya --- .../replicate/bug-1591193-assign-gfid-and-heal.t | 128 +++++++++++++++++++++ xlators/cluster/afr/src/afr-self-heal-common.c | 39 ++++++- xlators/cluster/afr/src/afr-self-heal-data.c | 8 +- xlators/cluster/afr/src/afr-self-heal-entry.c | 4 +- xlators/cluster/afr/src/afr-self-heal-name.c | 6 +- xlators/cluster/afr/src/afr-self-heal.h | 6 +- 6 files changed, 179 insertions(+), 12 deletions(-) create mode 100644 tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t diff --git a/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t new file mode 100644 index 0000000..d3b5f9a --- /dev/null +++ b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t @@ -0,0 +1,128 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +function check_gfid_and_link_count +{ + local file=$1 + + file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file) + TEST [ ! -z $file_gfid_b0 ] + file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file) + file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file) + EXPECT $file_gfid_b0 echo $file_gfid_b1 + EXPECT $file_gfid_b0 echo $file_gfid_b2 + + EXPECT "2" stat -c %h $B0/${V0}0/$file + EXPECT "2" stat -c %h $B0/${V0}1/$file + EXPECT "2" stat -c %h $B0/${V0}2/$file +} +TESTS_EXPECTED_IN_LOOP=30 + +############################################################################## +# Test on 1x3 volume +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; +TEST $CLI volume start $V0; + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + + +# Create files directly in the backend on different bricks +echo $RANDOM >> $B0/${V0}0/file1 +echo $RANDOM >> $B0/${V0}1/file2 +echo $RANDOM >> $B0/${V0}2/file3 + +# To prevent is_fresh_file code path +sleep 2 + +# Access them from mount to trigger name + gfid heal. +TEST stat $M0/file1 +TEST stat $M0/file2 +TEST stat $M0/file3 + +# Launch index heal to complete any pending data/metadata heals. +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +# Check each file has a gfid and the .glusterfs hardlink +check_gfid_and_link_count file1 +check_gfid_and_link_count file2 +check_gfid_and_link_count file3 + +TEST rm $M0/file1 +TEST rm $M0/file2 +TEST rm $M0/file3 +cleanup; + +############################################################################## +# Test on 1x (2+1) volume +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}; +TEST $CLI volume start $V0; + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + + +# Create files directly in the backend on different bricks +echo $RANDOM >> $B0/${V0}0/file1 +echo $RANDOM >> $B0/${V0}1/file2 +touch $B0/${V0}2/file3 + +# To prevent is_fresh_file code path +sleep 2 + +# Access them from mount to trigger name + gfid heal. +TEST stat $M0/file1 +TEST stat $M0/file2 + +# Though file is created on all 3 bricks, lookup will fail as arbiter blames the +# other 2 bricks and ariter is not 'readable'. +# TEST ! stat $M0/file3 +# But the checks for failing lookups when quorum is not met is not yet there in +# rhgs-3.4.0, so stat will succeed. +TEST stat $M0/file3 + +# Launch index heal to complete any pending data/metadata heals. +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +# Check each file has a gfid and the .glusterfs hardlink +check_gfid_and_link_count file1 +check_gfid_and_link_count file2 +check_gfid_and_link_count file3 + +TEST rm $M0/file1 +TEST rm $M0/file2 +TEST rm $M0/file3 +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 32fd24a..50989d6 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -22,7 +22,7 @@ afr_heal_synctask (xlator_t *this, afr_local_t *local); int afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name, inode_t *inode, struct afr_reply *replies, - int source, void *gfid) + int source, unsigned char *sources, void *gfid) { afr_private_t *priv = NULL; call_frame_t *frame = NULL; @@ -37,6 +37,23 @@ afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name, priv = this->private; wind_on = alloca0 (priv->child_count); ia_type = replies[source].poststat.ia_type; + if ((ia_type == IA_INVAL) && + (AFR_COUNT(sources, priv->child_count) == priv->child_count)) { + /* If a file is present on some bricks of the replica but parent + * dir does not have pending xattrs, all bricks are sources and + * the 'source' we selected earlier might be one where the file + * is not actually present. Hence check if file is present in + * any of the sources.*/ + for (i = 0; i < priv->child_count; i++) { + if (i == source) + continue; + if (sources[i] && replies[i].valid && + replies[i].op_ret == 0) { + ia_type = replies[i].poststat.ia_type; + break; + } + } + } /* gfid heal on those subvolumes that do not have gfid associated * with the inode and update those replies. @@ -1250,6 +1267,21 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, return fav_child; } +gf_boolean_t +afr_is_file_empty_on_all_children (afr_private_t *priv, + struct afr_reply *replies) +{ + int i = 0; + + for (i = 0; i < priv->child_count; i++) { + if ((!replies[i].valid) || (replies[i].op_ret != 0) || + (replies[i].poststat.ia_size != 0)) + return _gf_false; + } + + return _gf_true; +} + int afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources, unsigned char *sinks, @@ -1268,11 +1300,8 @@ afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources, return -1; if (type == AFR_DATA_TRANSACTION) { - for (i = 0; i < priv->child_count; i++) { - if (replies[i].poststat.ia_size != 0) + if (!afr_is_file_empty_on_all_children(priv, replies)) return -1; - } - goto mark; } diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index f872a98..3ef7376 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -670,6 +670,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, int source = -1; gf_boolean_t did_sh = _gf_true; gf_boolean_t is_arbiter_the_only_sink = _gf_false; + gf_boolean_t empty_file = _gf_false; priv = this->private; @@ -710,6 +711,11 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, source = ret; if (AFR_IS_ARBITER_BRICK(priv, source)) { + empty_file = afr_is_file_empty_on_all_children (priv, + locked_replies); + if (empty_file) + goto restore_time; + did_sh = _gf_false; goto unlock; } @@ -746,7 +752,7 @@ restore_time: afr_selfheal_restore_time (frame, this, fd->inode, source, healed_sinks, locked_replies); - if (!is_arbiter_the_only_sink) { + if (!is_arbiter_the_only_sink || !empty_file) { ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name, 0, 0, data_lock); if (ret < priv->child_count) { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 647dd71..f6d3a8a 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -187,7 +187,7 @@ __afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, if (replies[source].op_ret == 0) { ret = afr_lookup_and_heal_gfid (this, fd->inode, name, - inode, replies, source, + inode, replies, source, sources, &replies[source].poststat.ia_gfid); if (ret) return ret; @@ -320,7 +320,7 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, } ret = afr_lookup_and_heal_gfid (this, fd->inode, name, inode, replies, - source, + source, sources, &replies[source].poststat.ia_gfid); if (ret) return ret; diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index 556d14b..bcd0e60 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -19,7 +19,7 @@ __afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid, const char *bname, inode_t *inode, struct afr_reply *replies, void *gfid, unsigned char *locked_on, int source, - gf_boolean_t is_gfid_absent) + unsigned char *sources, gf_boolean_t is_gfid_absent) { int ret = 0; int up_count = 0; @@ -48,7 +48,7 @@ __afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid, } afr_lookup_and_heal_gfid (this, parent, bname, inode, replies, source, - gfid); + sources, gfid); out: return ret; @@ -426,7 +426,7 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false; ret = __afr_selfheal_assign_gfid (this, parent, pargfid, bname, inode, replies, gfid, locked_on, source, - is_gfid_absent); + sources, is_gfid_absent); if (ret) return ret; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index b015976..cc99d9e 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -113,7 +113,7 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode); int afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name, inode_t *inode, struct afr_reply *replies, int source, - void *gfid); + unsigned char *sources, void *gfid); int afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, @@ -354,4 +354,8 @@ afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources, struct afr_reply *replies, afr_transaction_type type); +gf_boolean_t +afr_is_file_empty_on_all_children (afr_private_t *priv, + struct afr_reply *replies); + #endif /* !_AFR_SELFHEAL_H */ -- 1.8.3.1