e7a346
From 77c33f6c257928576d328e6e735f7e7a086202a3 Mon Sep 17 00:00:00 2001
e7a346
From: karthik-us <ksubrahm@redhat.com>
e7a346
Date: Tue, 17 Jul 2018 11:56:10 +0530
e7a346
Subject: [PATCH 323/325] cluster/afr: Mark dirty for entry transactions for
e7a346
 quorum failures
e7a346
e7a346
Backport of:https://review.gluster.org/#/c/20153/
e7a346
Problem:
e7a346
If an entry creation transaction fails on quprum number of bricks
e7a346
it might end up setting the pending changelogs on the file itself
e7a346
on the brick where it got created. But the parent does not have
e7a346
any entry pending marker set. This will lead to the entry not
e7a346
getting healed by the self heal daemon automatically.
e7a346
e7a346
Fix:
e7a346
For entry transactions mark dirty on the parent if it fails on
e7a346
quorum number of bricks, so that the heal can do conservative
e7a346
merge and entry gets healed by shd.
e7a346
e7a346
Change-Id: I8bbd02da7c4c9edd9c3f947e9a4ed3d37c9bec1c
e7a346
BUG: 1566336
e7a346
Signed-off-by: karthik-us <ksubrahm@redhat.com>
e7a346
Reviewed-on: https://code.engineering.redhat.com/gerrit/144145
e7a346
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e7a346
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
e7a346
---
e7a346
 ...20-mark-dirty-for-entry-txn-on-quorum-failure.t | 73 ++++++++++++++++++++++
e7a346
 xlators/cluster/afr/src/afr-transaction.c          | 62 ++++++++++++++----
e7a346
 2 files changed, 124 insertions(+), 11 deletions(-)
e7a346
 create mode 100644 tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
e7a346
e7a346
diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
e7a346
new file mode 100644
e7a346
index 0000000..7fec3b4
e7a346
--- /dev/null
e7a346
+++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
e7a346
@@ -0,0 +1,73 @@
e7a346
+#!/bin/bash
e7a346
+
e7a346
+. $(dirname $0)/../../include.rc
e7a346
+. $(dirname $0)/../../volume.rc
e7a346
+
e7a346
+cleanup;
e7a346
+
e7a346
+function create_files {
e7a346
+        local i=1
e7a346
+        while (true)
e7a346
+        do
e7a346
+                dd if=/dev/zero of=$M0/file$i bs=1M count=10
e7a346
+                if [ -e $B0/${V0}0/file$i ] && [ -e $B0/${V0}1/file$i ]; then
e7a346
+                        ((i++))
e7a346
+                else
e7a346
+                        break
e7a346
+                fi
e7a346
+        done
e7a346
+        echo $i
e7a346
+}
e7a346
+
e7a346
+TEST glusterd
e7a346
+
e7a346
+#Create brick partitions
e7a346
+TEST truncate -s 100M $B0/brick0
e7a346
+TEST truncate -s 100M $B0/brick1
e7a346
+#Have the 3rd brick of a higher size to test the scenario of entry transaction
e7a346
+#passing on only one brick and not on other bricks.
e7a346
+TEST truncate -s 110M $B0/brick2
e7a346
+LO1=`SETUP_LOOP $B0/brick0`
e7a346
+TEST [ $? -eq 0 ]
e7a346
+TEST MKFS_LOOP $LO1
e7a346
+LO2=`SETUP_LOOP $B0/brick1`
e7a346
+TEST [ $? -eq 0 ]
e7a346
+TEST MKFS_LOOP $LO2
e7a346
+LO3=`SETUP_LOOP $B0/brick2`
e7a346
+TEST [ $? -eq 0 ]
e7a346
+TEST MKFS_LOOP $LO3
e7a346
+TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
e7a346
+TEST MOUNT_LOOP $LO1 $B0/${V0}0
e7a346
+TEST MOUNT_LOOP $LO2 $B0/${V0}1
e7a346
+TEST MOUNT_LOOP $LO3 $B0/${V0}2
e7a346
+
e7a346
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
e7a346
+TEST $CLI volume start $V0
e7a346
+TEST $CLI volume set $V0 performance.write-behind off
e7a346
+TEST $CLI volume set $V0 self-heal-daemon off
e7a346
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
e7a346
+
e7a346
+i=$(create_files)
e7a346
+TEST ! ls $B0/${V0}0/file$i
e7a346
+TEST ! ls $B0/${V0}1/file$i
e7a346
+TEST ls $B0/${V0}2/file$i
e7a346
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
e7a346
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i
e7a346
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i
e7a346
+
e7a346
+TEST $CLI volume set $V0 self-heal-daemon on
e7a346
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
e7a346
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
e7a346
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
e7a346
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
e7a346
+TEST rm -f $M0/file1
e7a346
+
e7a346
+TEST $CLI volume heal $V0
e7a346
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
e7a346
+TEST force_umount $M0
e7a346
+TEST $CLI volume stop $V0
e7a346
+EXPECT 'Stopped' volinfo_field $V0 'Status';
e7a346
+TEST $CLI volume delete $V0;
e7a346
+UMOUNT_LOOP ${B0}/${V0}{0,1,2}
e7a346
+rm -f ${B0}/brick{0,1,2}
e7a346
+cleanup;
e7a346
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
e7a346
index 5b18f63..321b6f1 100644
e7a346
--- a/xlators/cluster/afr/src/afr-transaction.c
e7a346
+++ b/xlators/cluster/afr/src/afr-transaction.c
e7a346
@@ -774,8 +774,38 @@ afr_has_fop_cbk_quorum (call_frame_t *frame)
e7a346
         return afr_has_quorum (success, this);
e7a346
 }
e7a346
 
e7a346
+gf_boolean_t
e7a346
+afr_need_dirty_marking (call_frame_t *frame, xlator_t *this)
e7a346
+{
e7a346
+        afr_private_t           *priv           = this->private;
e7a346
+        afr_local_t             *local          = NULL;
e7a346
+        gf_boolean_t            need_dirty      = _gf_false;
e7a346
+
e7a346
+        local = frame->local;
e7a346
+
e7a346
+        if (!priv->quorum_count || !local->optimistic_change_log)
e7a346
+                return _gf_false;
e7a346
+
e7a346
+        if (local->transaction.type == AFR_DATA_TRANSACTION ||
e7a346
+            local->transaction.type == AFR_METADATA_TRANSACTION)
e7a346
+                return _gf_false;
e7a346
+
e7a346
+        if (AFR_COUNT (local->transaction.failed_subvols, priv->child_count) ==
e7a346
+            priv->child_count)
e7a346
+                return _gf_false;
e7a346
+
e7a346
+        if (priv->arbiter_count) {
e7a346
+                if (!afr_has_arbiter_fop_cbk_quorum (frame))
e7a346
+                        need_dirty = _gf_true;
e7a346
+        } else if (!afr_has_fop_cbk_quorum (frame)) {
e7a346
+                need_dirty = _gf_true;
e7a346
+        }
e7a346
+
e7a346
+        return need_dirty;
e7a346
+}
e7a346
+
e7a346
 void
e7a346
-afr_handle_quorum (call_frame_t *frame)
e7a346
+afr_handle_quorum (call_frame_t *frame, xlator_t *this)
e7a346
 {
e7a346
         afr_local_t   *local = NULL;
e7a346
         afr_private_t *priv  = NULL;
e7a346
@@ -826,11 +856,15 @@ afr_handle_quorum (call_frame_t *frame)
e7a346
                 return;
e7a346
         }
e7a346
 
e7a346
+        if (afr_need_dirty_marking (frame, this))
e7a346
+                goto set_response;
e7a346
+
e7a346
         for (i = 0; i < priv->child_count; i++) {
e7a346
                 if (local->transaction.pre_op[i])
e7a346
                         afr_transaction_fop_failed (frame, frame->this, i);
e7a346
         }
e7a346
 
e7a346
+set_response:
e7a346
         local->op_ret = -1;
e7a346
         local->op_errno = afr_final_errno (local, priv);
e7a346
         if (local->op_errno == 0)
e7a346
@@ -874,9 +908,17 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
e7a346
         int                     nothing_failed  = 1;
e7a346
         gf_boolean_t            need_undirty    = _gf_false;
e7a346
 
e7a346
-        afr_handle_quorum (frame);
e7a346
+        afr_handle_quorum (frame, this);
e7a346
         local = frame->local;
e7a346
-	idx = afr_index_for_transaction_type (local->transaction.type);
e7a346
+        idx = afr_index_for_transaction_type (local->transaction.type);
e7a346
+
e7a346
+        xattr = dict_new ();
e7a346
+        if (!xattr) {
e7a346
+                local->op_ret = -1;
e7a346
+                local->op_errno = ENOMEM;
e7a346
+                afr_changelog_post_op_done (frame, this);
e7a346
+                goto out;
e7a346
+        }
e7a346
 
e7a346
         nothing_failed = afr_txn_nothing_failed (frame, this);
e7a346
 
e7a346
@@ -886,6 +928,11 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
e7a346
 		need_undirty = _gf_true;
e7a346
 
e7a346
         if (local->op_ret < 0 && !nothing_failed) {
e7a346
+                if (afr_need_dirty_marking (frame, this)) {
e7a346
+                        local->dirty[idx] = hton32(1);
e7a346
+                        goto set_dirty;
e7a346
+                }
e7a346
+
e7a346
                 afr_changelog_post_op_done (frame, this);
e7a346
                 goto out;
e7a346
         }
e7a346
@@ -902,14 +949,6 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
e7a346
                 goto out;
e7a346
         }
e7a346
 
e7a346
-	xattr = dict_new ();
e7a346
-	if (!xattr) {
e7a346
-		local->op_ret = -1;
e7a346
-		local->op_errno = ENOMEM;
e7a346
-		afr_changelog_post_op_done (frame, this);
e7a346
-		goto out;
e7a346
-	}
e7a346
-
e7a346
 	for (i = 0; i < priv->child_count; i++) {
e7a346
 		if (local->transaction.failed_subvols[i])
e7a346
 			local->pending[i][idx] = hton32(1);
e7a346
@@ -928,6 +967,7 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
e7a346
 	else
e7a346
 		local->dirty[idx] = hton32(0);
e7a346
 
e7a346
+set_dirty:
e7a346
 	ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty,
e7a346
 				   sizeof(int) * AFR_NUM_CHANGE_LOGS);
e7a346
 	if (ret) {
e7a346
-- 
e7a346
1.8.3.1
e7a346