|
|
d1681e |
From 77c33f6c257928576d328e6e735f7e7a086202a3 Mon Sep 17 00:00:00 2001
|
|
|
d1681e |
From: karthik-us <ksubrahm@redhat.com>
|
|
|
d1681e |
Date: Tue, 17 Jul 2018 11:56:10 +0530
|
|
|
d1681e |
Subject: [PATCH 323/325] cluster/afr: Mark dirty for entry transactions for
|
|
|
d1681e |
quorum failures
|
|
|
d1681e |
|
|
|
d1681e |
Backport of:https://review.gluster.org/#/c/20153/
|
|
|
d1681e |
Problem:
|
|
|
d1681e |
If an entry creation transaction fails on quprum number of bricks
|
|
|
d1681e |
it might end up setting the pending changelogs on the file itself
|
|
|
d1681e |
on the brick where it got created. But the parent does not have
|
|
|
d1681e |
any entry pending marker set. This will lead to the entry not
|
|
|
d1681e |
getting healed by the self heal daemon automatically.
|
|
|
d1681e |
|
|
|
d1681e |
Fix:
|
|
|
d1681e |
For entry transactions mark dirty on the parent if it fails on
|
|
|
d1681e |
quorum number of bricks, so that the heal can do conservative
|
|
|
d1681e |
merge and entry gets healed by shd.
|
|
|
d1681e |
|
|
|
d1681e |
Change-Id: I8bbd02da7c4c9edd9c3f947e9a4ed3d37c9bec1c
|
|
|
d1681e |
BUG: 1566336
|
|
|
d1681e |
Signed-off-by: karthik-us <ksubrahm@redhat.com>
|
|
|
d1681e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/144145
|
|
|
d1681e |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
d1681e |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
d1681e |
---
|
|
|
d1681e |
...20-mark-dirty-for-entry-txn-on-quorum-failure.t | 73 ++++++++++++++++++++++
|
|
|
d1681e |
xlators/cluster/afr/src/afr-transaction.c | 62 ++++++++++++++----
|
|
|
d1681e |
2 files changed, 124 insertions(+), 11 deletions(-)
|
|
|
d1681e |
create mode 100644 tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
|
|
|
d1681e |
new file mode 100644
|
|
|
d1681e |
index 0000000..7fec3b4
|
|
|
d1681e |
--- /dev/null
|
|
|
d1681e |
+++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
|
|
|
d1681e |
@@ -0,0 +1,73 @@
|
|
|
d1681e |
+#!/bin/bash
|
|
|
d1681e |
+
|
|
|
d1681e |
+. $(dirname $0)/../../include.rc
|
|
|
d1681e |
+. $(dirname $0)/../../volume.rc
|
|
|
d1681e |
+
|
|
|
d1681e |
+cleanup;
|
|
|
d1681e |
+
|
|
|
d1681e |
+function create_files {
|
|
|
d1681e |
+ local i=1
|
|
|
d1681e |
+ while (true)
|
|
|
d1681e |
+ do
|
|
|
d1681e |
+ dd if=/dev/zero of=$M0/file$i bs=1M count=10
|
|
|
d1681e |
+ if [ -e $B0/${V0}0/file$i ] && [ -e $B0/${V0}1/file$i ]; then
|
|
|
d1681e |
+ ((i++))
|
|
|
d1681e |
+ else
|
|
|
d1681e |
+ break
|
|
|
d1681e |
+ fi
|
|
|
d1681e |
+ done
|
|
|
d1681e |
+ echo $i
|
|
|
d1681e |
+}
|
|
|
d1681e |
+
|
|
|
d1681e |
+TEST glusterd
|
|
|
d1681e |
+
|
|
|
d1681e |
+#Create brick partitions
|
|
|
d1681e |
+TEST truncate -s 100M $B0/brick0
|
|
|
d1681e |
+TEST truncate -s 100M $B0/brick1
|
|
|
d1681e |
+#Have the 3rd brick of a higher size to test the scenario of entry transaction
|
|
|
d1681e |
+#passing on only one brick and not on other bricks.
|
|
|
d1681e |
+TEST truncate -s 110M $B0/brick2
|
|
|
d1681e |
+LO1=`SETUP_LOOP $B0/brick0`
|
|
|
d1681e |
+TEST [ $? -eq 0 ]
|
|
|
d1681e |
+TEST MKFS_LOOP $LO1
|
|
|
d1681e |
+LO2=`SETUP_LOOP $B0/brick1`
|
|
|
d1681e |
+TEST [ $? -eq 0 ]
|
|
|
d1681e |
+TEST MKFS_LOOP $LO2
|
|
|
d1681e |
+LO3=`SETUP_LOOP $B0/brick2`
|
|
|
d1681e |
+TEST [ $? -eq 0 ]
|
|
|
d1681e |
+TEST MKFS_LOOP $LO3
|
|
|
d1681e |
+TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
|
|
|
d1681e |
+TEST MOUNT_LOOP $LO1 $B0/${V0}0
|
|
|
d1681e |
+TEST MOUNT_LOOP $LO2 $B0/${V0}1
|
|
|
d1681e |
+TEST MOUNT_LOOP $LO3 $B0/${V0}2
|
|
|
d1681e |
+
|
|
|
d1681e |
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
|
|
|
d1681e |
+TEST $CLI volume start $V0
|
|
|
d1681e |
+TEST $CLI volume set $V0 performance.write-behind off
|
|
|
d1681e |
+TEST $CLI volume set $V0 self-heal-daemon off
|
|
|
d1681e |
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
|
|
|
d1681e |
+
|
|
|
d1681e |
+i=$(create_files)
|
|
|
d1681e |
+TEST ! ls $B0/${V0}0/file$i
|
|
|
d1681e |
+TEST ! ls $B0/${V0}1/file$i
|
|
|
d1681e |
+TEST ls $B0/${V0}2/file$i
|
|
|
d1681e |
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
|
|
|
d1681e |
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i
|
|
|
d1681e |
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i
|
|
|
d1681e |
+
|
|
|
d1681e |
+TEST $CLI volume set $V0 self-heal-daemon on
|
|
|
d1681e |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
|
|
|
d1681e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
|
|
d1681e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
|
|
d1681e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
|
|
|
d1681e |
+TEST rm -f $M0/file1
|
|
|
d1681e |
+
|
|
|
d1681e |
+TEST $CLI volume heal $V0
|
|
|
d1681e |
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
|
|
|
d1681e |
+TEST force_umount $M0
|
|
|
d1681e |
+TEST $CLI volume stop $V0
|
|
|
d1681e |
+EXPECT 'Stopped' volinfo_field $V0 'Status';
|
|
|
d1681e |
+TEST $CLI volume delete $V0;
|
|
|
d1681e |
+UMOUNT_LOOP ${B0}/${V0}{0,1,2}
|
|
|
d1681e |
+rm -f ${B0}/brick{0,1,2}
|
|
|
d1681e |
+cleanup;
|
|
|
d1681e |
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
|
|
|
d1681e |
index 5b18f63..321b6f1 100644
|
|
|
d1681e |
--- a/xlators/cluster/afr/src/afr-transaction.c
|
|
|
d1681e |
+++ b/xlators/cluster/afr/src/afr-transaction.c
|
|
|
d1681e |
@@ -774,8 +774,38 @@ afr_has_fop_cbk_quorum (call_frame_t *frame)
|
|
|
d1681e |
return afr_has_quorum (success, this);
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
+gf_boolean_t
|
|
|
d1681e |
+afr_need_dirty_marking (call_frame_t *frame, xlator_t *this)
|
|
|
d1681e |
+{
|
|
|
d1681e |
+ afr_private_t *priv = this->private;
|
|
|
d1681e |
+ afr_local_t *local = NULL;
|
|
|
d1681e |
+ gf_boolean_t need_dirty = _gf_false;
|
|
|
d1681e |
+
|
|
|
d1681e |
+ local = frame->local;
|
|
|
d1681e |
+
|
|
|
d1681e |
+ if (!priv->quorum_count || !local->optimistic_change_log)
|
|
|
d1681e |
+ return _gf_false;
|
|
|
d1681e |
+
|
|
|
d1681e |
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
|
|
|
d1681e |
+ local->transaction.type == AFR_METADATA_TRANSACTION)
|
|
|
d1681e |
+ return _gf_false;
|
|
|
d1681e |
+
|
|
|
d1681e |
+ if (AFR_COUNT (local->transaction.failed_subvols, priv->child_count) ==
|
|
|
d1681e |
+ priv->child_count)
|
|
|
d1681e |
+ return _gf_false;
|
|
|
d1681e |
+
|
|
|
d1681e |
+ if (priv->arbiter_count) {
|
|
|
d1681e |
+ if (!afr_has_arbiter_fop_cbk_quorum (frame))
|
|
|
d1681e |
+ need_dirty = _gf_true;
|
|
|
d1681e |
+ } else if (!afr_has_fop_cbk_quorum (frame)) {
|
|
|
d1681e |
+ need_dirty = _gf_true;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+
|
|
|
d1681e |
+ return need_dirty;
|
|
|
d1681e |
+}
|
|
|
d1681e |
+
|
|
|
d1681e |
void
|
|
|
d1681e |
-afr_handle_quorum (call_frame_t *frame)
|
|
|
d1681e |
+afr_handle_quorum (call_frame_t *frame, xlator_t *this)
|
|
|
d1681e |
{
|
|
|
d1681e |
afr_local_t *local = NULL;
|
|
|
d1681e |
afr_private_t *priv = NULL;
|
|
|
d1681e |
@@ -826,11 +856,15 @@ afr_handle_quorum (call_frame_t *frame)
|
|
|
d1681e |
return;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
+ if (afr_need_dirty_marking (frame, this))
|
|
|
d1681e |
+ goto set_response;
|
|
|
d1681e |
+
|
|
|
d1681e |
for (i = 0; i < priv->child_count; i++) {
|
|
|
d1681e |
if (local->transaction.pre_op[i])
|
|
|
d1681e |
afr_transaction_fop_failed (frame, frame->this, i);
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
+set_response:
|
|
|
d1681e |
local->op_ret = -1;
|
|
|
d1681e |
local->op_errno = afr_final_errno (local, priv);
|
|
|
d1681e |
if (local->op_errno == 0)
|
|
|
d1681e |
@@ -874,9 +908,17 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
|
|
|
d1681e |
int nothing_failed = 1;
|
|
|
d1681e |
gf_boolean_t need_undirty = _gf_false;
|
|
|
d1681e |
|
|
|
d1681e |
- afr_handle_quorum (frame);
|
|
|
d1681e |
+ afr_handle_quorum (frame, this);
|
|
|
d1681e |
local = frame->local;
|
|
|
d1681e |
- idx = afr_index_for_transaction_type (local->transaction.type);
|
|
|
d1681e |
+ idx = afr_index_for_transaction_type (local->transaction.type);
|
|
|
d1681e |
+
|
|
|
d1681e |
+ xattr = dict_new ();
|
|
|
d1681e |
+ if (!xattr) {
|
|
|
d1681e |
+ local->op_ret = -1;
|
|
|
d1681e |
+ local->op_errno = ENOMEM;
|
|
|
d1681e |
+ afr_changelog_post_op_done (frame, this);
|
|
|
d1681e |
+ goto out;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
|
|
|
d1681e |
nothing_failed = afr_txn_nothing_failed (frame, this);
|
|
|
d1681e |
|
|
|
d1681e |
@@ -886,6 +928,11 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
|
|
|
d1681e |
need_undirty = _gf_true;
|
|
|
d1681e |
|
|
|
d1681e |
if (local->op_ret < 0 && !nothing_failed) {
|
|
|
d1681e |
+ if (afr_need_dirty_marking (frame, this)) {
|
|
|
d1681e |
+ local->dirty[idx] = hton32(1);
|
|
|
d1681e |
+ goto set_dirty;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+
|
|
|
d1681e |
afr_changelog_post_op_done (frame, this);
|
|
|
d1681e |
goto out;
|
|
|
d1681e |
}
|
|
|
d1681e |
@@ -902,14 +949,6 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
|
|
|
d1681e |
goto out;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
- xattr = dict_new ();
|
|
|
d1681e |
- if (!xattr) {
|
|
|
d1681e |
- local->op_ret = -1;
|
|
|
d1681e |
- local->op_errno = ENOMEM;
|
|
|
d1681e |
- afr_changelog_post_op_done (frame, this);
|
|
|
d1681e |
- goto out;
|
|
|
d1681e |
- }
|
|
|
d1681e |
-
|
|
|
d1681e |
for (i = 0; i < priv->child_count; i++) {
|
|
|
d1681e |
if (local->transaction.failed_subvols[i])
|
|
|
d1681e |
local->pending[i][idx] = hton32(1);
|
|
|
d1681e |
@@ -928,6 +967,7 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
|
|
|
d1681e |
else
|
|
|
d1681e |
local->dirty[idx] = hton32(0);
|
|
|
d1681e |
|
|
|
d1681e |
+set_dirty:
|
|
|
d1681e |
ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty,
|
|
|
d1681e |
sizeof(int) * AFR_NUM_CHANGE_LOGS);
|
|
|
d1681e |
if (ret) {
|
|
|
d1681e |
--
|
|
|
d1681e |
1.8.3.1
|
|
|
d1681e |
|