From 8c366f34a279a5ab2a6301bfd93534fe746a23e8 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Mon, 7 Dec 2020 09:53:27 +0530 Subject: [PATCH 483/511] afr: more quorum checks in lookup and new entry marking Problem: See upstream github issue for details. Fix: -In lookup if the entry exists in 2 out of 3 bricks, don't fail the lookup with ENOENT just because there is an entrylk on the parent. Consider quorum before deciding. -If entry FOP does not succeed on quorum no. of bricks, do not perform new entry mark. Upstream patch details: > Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/24499/ > Fixes: #1303 > Change-Id: I56df8c89ad53b29fa450c7930a7b7ccec9f4a6c5 > Signed-off-by: Ravishankar N BUG: 1821599 Change-Id: If513e8a7d6088a676288927630d8e616269bf5d5 Signed-off-by: Ravishankar N Reviewed-on: https://code.engineering.redhat.com/gerrit/220363 Tested-by: RHGS Build Bot Reviewed-by: Sunil Kumar Heggodu Gopala Acharya --- ...20-mark-dirty-for-entry-txn-on-quorum-failure.t | 2 -- xlators/cluster/afr/src/afr-common.c | 24 ++++++++++++---------- xlators/cluster/afr/src/afr-dir-write.c | 8 ++++++++ xlators/cluster/afr/src/afr.h | 4 ++++ 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t index 26f9049..49c4dea 100644 --- a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t +++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t @@ -53,8 +53,6 @@ TEST ! ls $B0/${V0}1/file$i TEST ls $B0/${V0}2/file$i dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}2) TEST [ "$dirty" != "000000000000000000000000" ] -EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i -EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i TEST $CLI volume set $V0 self-heal-daemon on EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 89e2483..851ccad 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1236,7 +1236,7 @@ refresh_done: return 0; } -static void +void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies) { @@ -2290,6 +2290,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) 0, }; gf_boolean_t locked_entry = _gf_false; + gf_boolean_t in_flight_create = _gf_false; gf_boolean_t can_interpret = _gf_true; inode_t *parent = NULL; ia_type_t ia_type = IA_INVAL; @@ -2333,17 +2334,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) if (!replies[i].valid) continue; - if (locked_entry && replies[i].op_ret == -1 && - replies[i].op_errno == ENOENT) { - /* Second, check entry is still - "underway" in creation */ - local->op_ret = -1; - local->op_errno = ENOENT; - goto error; - } - - if (replies[i].op_ret == -1) + if (replies[i].op_ret == -1) { + if (locked_entry && replies[i].op_errno == ENOENT) { + in_flight_create = _gf_true; + } continue; + } if (read_subvol == -1 || !readable[read_subvol]) { read_subvol = i; @@ -2353,6 +2349,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) } } + if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) { + local->op_ret = -1; + local->op_errno = ENOENT; + goto error; + } + if (read_subvol == -1) goto error; /* We now have a read_subvol, which is readable[] (if there diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 84e2a34..416c19d 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -349,6 +349,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) afr_private_t *priv = NULL; int pre_op_count = 0; int failed_count = 0; + unsigned char *success_replies = NULL; local = frame->local; priv = this->private; @@ -364,9 +365,16 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) failed_count = AFR_COUNT(local->transaction.failed_subvols, priv->child_count); + /* FOP succeeded on all bricks. */ if (pre_op_count == priv->child_count && !failed_count) return; + /* FOP did not suceed on quorum no. of bricks. */ + success_replies = alloca0(priv->child_count); + afr_fill_success_replies(local, priv, success_replies); + if (!afr_has_quorum(success_replies, this, NULL)) + return; + if (priv->thin_arbiter_count) { /*Mark new entry using ta file*/ local->is_new_entry = _gf_true; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index ff96246..ed5096e 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1334,4 +1334,8 @@ afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this); void afr_selfheal_childup(xlator_t *this, afr_private_t *priv); + +void +afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, + unsigned char *replies); #endif /* __AFR_H__ */ -- 1.8.3.1