a3470f
From 77c33f6c257928576d328e6e735f7e7a086202a3 Mon Sep 17 00:00:00 2001
a3470f
From: karthik-us <ksubrahm@redhat.com>
a3470f
Date: Tue, 17 Jul 2018 11:56:10 +0530
a3470f
Subject: [PATCH 323/325] cluster/afr: Mark dirty for entry transactions for
a3470f
 quorum failures
a3470f
a3470f
Backport of:https://review.gluster.org/#/c/20153/
a3470f
Problem:
a3470f
If an entry creation transaction fails on quprum number of bricks
a3470f
it might end up setting the pending changelogs on the file itself
a3470f
on the brick where it got created. But the parent does not have
a3470f
any entry pending marker set. This will lead to the entry not
a3470f
getting healed by the self heal daemon automatically.
a3470f
a3470f
Fix:
a3470f
For entry transactions mark dirty on the parent if it fails on
a3470f
quorum number of bricks, so that the heal can do conservative
a3470f
merge and entry gets healed by shd.
a3470f
a3470f
Change-Id: I8bbd02da7c4c9edd9c3f947e9a4ed3d37c9bec1c
a3470f
BUG: 1566336
a3470f
Signed-off-by: karthik-us <ksubrahm@redhat.com>
a3470f
Reviewed-on: https://code.engineering.redhat.com/gerrit/144145
a3470f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
a3470f
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
a3470f
---
a3470f
 ...20-mark-dirty-for-entry-txn-on-quorum-failure.t | 73 ++++++++++++++++++++++
a3470f
 xlators/cluster/afr/src/afr-transaction.c          | 62 ++++++++++++++----
a3470f
 2 files changed, 124 insertions(+), 11 deletions(-)
a3470f
 create mode 100644 tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
a3470f
a3470f
diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
a3470f
new file mode 100644
a3470f
index 0000000..7fec3b4
a3470f
--- /dev/null
a3470f
+++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
a3470f
@@ -0,0 +1,73 @@
a3470f
+#!/bin/bash
a3470f
+
a3470f
+. $(dirname $0)/../../include.rc
a3470f
+. $(dirname $0)/../../volume.rc
a3470f
+
a3470f
+cleanup;
a3470f
+
a3470f
+function create_files {
a3470f
+        local i=1
a3470f
+        while (true)
a3470f
+        do
a3470f
+                dd if=/dev/zero of=$M0/file$i bs=1M count=10
a3470f
+                if [ -e $B0/${V0}0/file$i ] && [ -e $B0/${V0}1/file$i ]; then
a3470f
+                        ((i++))
a3470f
+                else
a3470f
+                        break
a3470f
+                fi
a3470f
+        done
a3470f
+        echo $i
a3470f
+}
a3470f
+
a3470f
+TEST glusterd
a3470f
+
a3470f
+#Create brick partitions
a3470f
+TEST truncate -s 100M $B0/brick0
a3470f
+TEST truncate -s 100M $B0/brick1
a3470f
+#Have the 3rd brick of a higher size to test the scenario of entry transaction
a3470f
+#passing on only one brick and not on other bricks.
a3470f
+TEST truncate -s 110M $B0/brick2
a3470f
+LO1=`SETUP_LOOP $B0/brick0`
a3470f
+TEST [ $? -eq 0 ]
a3470f
+TEST MKFS_LOOP $LO1
a3470f
+LO2=`SETUP_LOOP $B0/brick1`
a3470f
+TEST [ $? -eq 0 ]
a3470f
+TEST MKFS_LOOP $LO2
a3470f
+LO3=`SETUP_LOOP $B0/brick2`
a3470f
+TEST [ $? -eq 0 ]
a3470f
+TEST MKFS_LOOP $LO3
a3470f
+TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
a3470f
+TEST MOUNT_LOOP $LO1 $B0/${V0}0
a3470f
+TEST MOUNT_LOOP $LO2 $B0/${V0}1
a3470f
+TEST MOUNT_LOOP $LO3 $B0/${V0}2
a3470f
+
a3470f
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
a3470f
+TEST $CLI volume start $V0
a3470f
+TEST $CLI volume set $V0 performance.write-behind off
a3470f
+TEST $CLI volume set $V0 self-heal-daemon off
a3470f
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
a3470f
+
a3470f
+i=$(create_files)
a3470f
+TEST ! ls $B0/${V0}0/file$i
a3470f
+TEST ! ls $B0/${V0}1/file$i
a3470f
+TEST ls $B0/${V0}2/file$i
a3470f
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
a3470f
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i
a3470f
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i
a3470f
+
a3470f
+TEST $CLI volume set $V0 self-heal-daemon on
a3470f
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
a3470f
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
a3470f
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
a3470f
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
a3470f
+TEST rm -f $M0/file1
a3470f
+
a3470f
+TEST $CLI volume heal $V0
a3470f
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
a3470f
+TEST force_umount $M0
a3470f
+TEST $CLI volume stop $V0
a3470f
+EXPECT 'Stopped' volinfo_field $V0 'Status';
a3470f
+TEST $CLI volume delete $V0;
a3470f
+UMOUNT_LOOP ${B0}/${V0}{0,1,2}
a3470f
+rm -f ${B0}/brick{0,1,2}
a3470f
+cleanup;
a3470f
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
a3470f
index 5b18f63..321b6f1 100644
a3470f
--- a/xlators/cluster/afr/src/afr-transaction.c
a3470f
+++ b/xlators/cluster/afr/src/afr-transaction.c
a3470f
@@ -774,8 +774,38 @@ afr_has_fop_cbk_quorum (call_frame_t *frame)
a3470f
         return afr_has_quorum (success, this);
a3470f
 }
a3470f
 
a3470f
+gf_boolean_t
a3470f
+afr_need_dirty_marking (call_frame_t *frame, xlator_t *this)
a3470f
+{
a3470f
+        afr_private_t           *priv           = this->private;
a3470f
+        afr_local_t             *local          = NULL;
a3470f
+        gf_boolean_t            need_dirty      = _gf_false;
a3470f
+
a3470f
+        local = frame->local;
a3470f
+
a3470f
+        if (!priv->quorum_count || !local->optimistic_change_log)
a3470f
+                return _gf_false;
a3470f
+
a3470f
+        if (local->transaction.type == AFR_DATA_TRANSACTION ||
a3470f
+            local->transaction.type == AFR_METADATA_TRANSACTION)
a3470f
+                return _gf_false;
a3470f
+
a3470f
+        if (AFR_COUNT (local->transaction.failed_subvols, priv->child_count) ==
a3470f
+            priv->child_count)
a3470f
+                return _gf_false;
a3470f
+
a3470f
+        if (priv->arbiter_count) {
a3470f
+                if (!afr_has_arbiter_fop_cbk_quorum (frame))
a3470f
+                        need_dirty = _gf_true;
a3470f
+        } else if (!afr_has_fop_cbk_quorum (frame)) {
a3470f
+                need_dirty = _gf_true;
a3470f
+        }
a3470f
+
a3470f
+        return need_dirty;
a3470f
+}
a3470f
+
a3470f
 void
a3470f
-afr_handle_quorum (call_frame_t *frame)
a3470f
+afr_handle_quorum (call_frame_t *frame, xlator_t *this)
a3470f
 {
a3470f
         afr_local_t   *local = NULL;
a3470f
         afr_private_t *priv  = NULL;
a3470f
@@ -826,11 +856,15 @@ afr_handle_quorum (call_frame_t *frame)
a3470f
                 return;
a3470f
         }
a3470f
 
a3470f
+        if (afr_need_dirty_marking (frame, this))
a3470f
+                goto set_response;
a3470f
+
a3470f
         for (i = 0; i < priv->child_count; i++) {
a3470f
                 if (local->transaction.pre_op[i])
a3470f
                         afr_transaction_fop_failed (frame, frame->this, i);
a3470f
         }
a3470f
 
a3470f
+set_response:
a3470f
         local->op_ret = -1;
a3470f
         local->op_errno = afr_final_errno (local, priv);
a3470f
         if (local->op_errno == 0)
a3470f
@@ -874,9 +908,17 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
a3470f
         int                     nothing_failed  = 1;
a3470f
         gf_boolean_t            need_undirty    = _gf_false;
a3470f
 
a3470f
-        afr_handle_quorum (frame);
a3470f
+        afr_handle_quorum (frame, this);
a3470f
         local = frame->local;
a3470f
-	idx = afr_index_for_transaction_type (local->transaction.type);
a3470f
+        idx = afr_index_for_transaction_type (local->transaction.type);
a3470f
+
a3470f
+        xattr = dict_new ();
a3470f
+        if (!xattr) {
a3470f
+                local->op_ret = -1;
a3470f
+                local->op_errno = ENOMEM;
a3470f
+                afr_changelog_post_op_done (frame, this);
a3470f
+                goto out;
a3470f
+        }
a3470f
 
a3470f
         nothing_failed = afr_txn_nothing_failed (frame, this);
a3470f
 
a3470f
@@ -886,6 +928,11 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
a3470f
 		need_undirty = _gf_true;
a3470f
 
a3470f
         if (local->op_ret < 0 && !nothing_failed) {
a3470f
+                if (afr_need_dirty_marking (frame, this)) {
a3470f
+                        local->dirty[idx] = hton32(1);
a3470f
+                        goto set_dirty;
a3470f
+                }
a3470f
+
a3470f
                 afr_changelog_post_op_done (frame, this);
a3470f
                 goto out;
a3470f
         }
a3470f
@@ -902,14 +949,6 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
a3470f
                 goto out;
a3470f
         }
a3470f
 
a3470f
-	xattr = dict_new ();
a3470f
-	if (!xattr) {
a3470f
-		local->op_ret = -1;
a3470f
-		local->op_errno = ENOMEM;
a3470f
-		afr_changelog_post_op_done (frame, this);
a3470f
-		goto out;
a3470f
-	}
a3470f
-
a3470f
 	for (i = 0; i < priv->child_count; i++) {
a3470f
 		if (local->transaction.failed_subvols[i])
a3470f
 			local->pending[i][idx] = hton32(1);
a3470f
@@ -928,6 +967,7 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
a3470f
 	else
a3470f
 		local->dirty[idx] = hton32(0);
a3470f
 
a3470f
+set_dirty:
a3470f
 	ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty,
a3470f
 				   sizeof(int) * AFR_NUM_CHANGE_LOGS);
a3470f
 	if (ret) {
a3470f
-- 
a3470f
1.8.3.1
a3470f