From 77c33f6c257928576d328e6e735f7e7a086202a3 Mon Sep 17 00:00:00 2001 From: karthik-us Date: Tue, 17 Jul 2018 11:56:10 +0530 Subject: [PATCH 323/325] cluster/afr: Mark dirty for entry transactions for quorum failures Backport of:https://review.gluster.org/#/c/20153/ Problem: If an entry creation transaction fails on quprum number of bricks it might end up setting the pending changelogs on the file itself on the brick where it got created. But the parent does not have any entry pending marker set. This will lead to the entry not getting healed by the self heal daemon automatically. Fix: For entry transactions mark dirty on the parent if it fails on quorum number of bricks, so that the heal can do conservative merge and entry gets healed by shd. Change-Id: I8bbd02da7c4c9edd9c3f947e9a4ed3d37c9bec1c BUG: 1566336 Signed-off-by: karthik-us Reviewed-on: https://code.engineering.redhat.com/gerrit/144145 Tested-by: RHGS Build Bot Reviewed-by: Sunil Kumar Heggodu Gopala Acharya --- ...20-mark-dirty-for-entry-txn-on-quorum-failure.t | 73 ++++++++++++++++++++++ xlators/cluster/afr/src/afr-transaction.c | 62 ++++++++++++++---- 2 files changed, 124 insertions(+), 11 deletions(-) create mode 100644 tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t new file mode 100644 index 0000000..7fec3b4 --- /dev/null +++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t @@ -0,0 +1,73 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +function create_files { + local i=1 + while (true) + do + dd if=/dev/zero of=$M0/file$i bs=1M count=10 + if [ -e $B0/${V0}0/file$i ] && [ -e $B0/${V0}1/file$i ]; then + ((i++)) + else + break + fi + done + echo $i +} + +TEST glusterd + +#Create brick partitions +TEST truncate -s 100M $B0/brick0 +TEST truncate -s 100M $B0/brick1 +#Have the 3rd brick of a higher size to test the scenario of entry transaction +#passing on only one brick and not on other bricks. +TEST truncate -s 110M $B0/brick2 +LO1=`SETUP_LOOP $B0/brick0` +TEST [ $? -eq 0 ] +TEST MKFS_LOOP $LO1 +LO2=`SETUP_LOOP $B0/brick1` +TEST [ $? -eq 0 ] +TEST MKFS_LOOP $LO2 +LO3=`SETUP_LOOP $B0/brick2` +TEST [ $? -eq 0 ] +TEST MKFS_LOOP $LO3 +TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2 +TEST MOUNT_LOOP $LO1 $B0/${V0}0 +TEST MOUNT_LOOP $LO2 $B0/${V0}1 +TEST MOUNT_LOOP $LO3 $B0/${V0}2 + +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume start $V0 +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 self-heal-daemon off +TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 + +i=$(create_files) +TEST ! ls $B0/${V0}0/file$i +TEST ! ls $B0/${V0}1/file$i +TEST ls $B0/${V0}2/file$i +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2 +EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i +EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i + +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +TEST rm -f $M0/file1 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 +TEST force_umount $M0 +TEST $CLI volume stop $V0 +EXPECT 'Stopped' volinfo_field $V0 'Status'; +TEST $CLI volume delete $V0; +UMOUNT_LOOP ${B0}/${V0}{0,1,2} +rm -f ${B0}/brick{0,1,2} +cleanup; diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 5b18f63..321b6f1 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -774,8 +774,38 @@ afr_has_fop_cbk_quorum (call_frame_t *frame) return afr_has_quorum (success, this); } +gf_boolean_t +afr_need_dirty_marking (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = this->private; + afr_local_t *local = NULL; + gf_boolean_t need_dirty = _gf_false; + + local = frame->local; + + if (!priv->quorum_count || !local->optimistic_change_log) + return _gf_false; + + if (local->transaction.type == AFR_DATA_TRANSACTION || + local->transaction.type == AFR_METADATA_TRANSACTION) + return _gf_false; + + if (AFR_COUNT (local->transaction.failed_subvols, priv->child_count) == + priv->child_count) + return _gf_false; + + if (priv->arbiter_count) { + if (!afr_has_arbiter_fop_cbk_quorum (frame)) + need_dirty = _gf_true; + } else if (!afr_has_fop_cbk_quorum (frame)) { + need_dirty = _gf_true; + } + + return need_dirty; +} + void -afr_handle_quorum (call_frame_t *frame) +afr_handle_quorum (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; afr_private_t *priv = NULL; @@ -826,11 +856,15 @@ afr_handle_quorum (call_frame_t *frame) return; } + if (afr_need_dirty_marking (frame, this)) + goto set_response; + for (i = 0; i < priv->child_count; i++) { if (local->transaction.pre_op[i]) afr_transaction_fop_failed (frame, frame->this, i); } +set_response: local->op_ret = -1; local->op_errno = afr_final_errno (local, priv); if (local->op_errno == 0) @@ -874,9 +908,17 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) int nothing_failed = 1; gf_boolean_t need_undirty = _gf_false; - afr_handle_quorum (frame); + afr_handle_quorum (frame, this); local = frame->local; - idx = afr_index_for_transaction_type (local->transaction.type); + idx = afr_index_for_transaction_type (local->transaction.type); + + xattr = dict_new (); + if (!xattr) { + local->op_ret = -1; + local->op_errno = ENOMEM; + afr_changelog_post_op_done (frame, this); + goto out; + } nothing_failed = afr_txn_nothing_failed (frame, this); @@ -886,6 +928,11 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) need_undirty = _gf_true; if (local->op_ret < 0 && !nothing_failed) { + if (afr_need_dirty_marking (frame, this)) { + local->dirty[idx] = hton32(1); + goto set_dirty; + } + afr_changelog_post_op_done (frame, this); goto out; } @@ -902,14 +949,6 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) goto out; } - xattr = dict_new (); - if (!xattr) { - local->op_ret = -1; - local->op_errno = ENOMEM; - afr_changelog_post_op_done (frame, this); - goto out; - } - for (i = 0; i < priv->child_count; i++) { if (local->transaction.failed_subvols[i]) local->pending[i][idx] = hton32(1); @@ -928,6 +967,7 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) else local->dirty[idx] = hton32(0); +set_dirty: ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty, sizeof(int) * AFR_NUM_CHANGE_LOGS); if (ret) { -- 1.8.3.1