Blob Blame History Raw
From 77c33f6c257928576d328e6e735f7e7a086202a3 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Tue, 17 Jul 2018 11:56:10 +0530
Subject: [PATCH 323/325] cluster/afr: Mark dirty for entry transactions for
 quorum failures

Backport of:https://review.gluster.org/#/c/20153/
Problem:
If an entry creation transaction fails on quprum number of bricks
it might end up setting the pending changelogs on the file itself
on the brick where it got created. But the parent does not have
any entry pending marker set. This will lead to the entry not
getting healed by the self heal daemon automatically.

Fix:
For entry transactions mark dirty on the parent if it fails on
quorum number of bricks, so that the heal can do conservative
merge and entry gets healed by shd.

Change-Id: I8bbd02da7c4c9edd9c3f947e9a4ed3d37c9bec1c
BUG: 1566336
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/144145
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 ...20-mark-dirty-for-entry-txn-on-quorum-failure.t | 73 ++++++++++++++++++++++
 xlators/cluster/afr/src/afr-transaction.c          | 62 ++++++++++++++----
 2 files changed, 124 insertions(+), 11 deletions(-)
 create mode 100644 tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t

diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
new file mode 100644
index 0000000..7fec3b4
--- /dev/null
+++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function create_files {
+        local i=1
+        while (true)
+        do
+                dd if=/dev/zero of=$M0/file$i bs=1M count=10
+                if [ -e $B0/${V0}0/file$i ] && [ -e $B0/${V0}1/file$i ]; then
+                        ((i++))
+                else
+                        break
+                fi
+        done
+        echo $i
+}
+
+TEST glusterd
+
+#Create brick partitions
+TEST truncate -s 100M $B0/brick0
+TEST truncate -s 100M $B0/brick1
+#Have the 3rd brick of a higher size to test the scenario of entry transaction
+#passing on only one brick and not on other bricks.
+TEST truncate -s 110M $B0/brick2
+LO1=`SETUP_LOOP $B0/brick0`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO1
+LO2=`SETUP_LOOP $B0/brick1`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO2
+LO3=`SETUP_LOOP $B0/brick2`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO3
+TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
+TEST MOUNT_LOOP $LO1 $B0/${V0}0
+TEST MOUNT_LOOP $LO2 $B0/${V0}1
+TEST MOUNT_LOOP $LO3 $B0/${V0}2
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+i=$(create_files)
+TEST ! ls $B0/${V0}0/file$i
+TEST ! ls $B0/${V0}1/file$i
+TEST ls $B0/${V0}2/file$i
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST rm -f $M0/file1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+UMOUNT_LOOP ${B0}/${V0}{0,1,2}
+rm -f ${B0}/brick{0,1,2}
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 5b18f63..321b6f1 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -774,8 +774,38 @@ afr_has_fop_cbk_quorum (call_frame_t *frame)
         return afr_has_quorum (success, this);
 }
 
+gf_boolean_t
+afr_need_dirty_marking (call_frame_t *frame, xlator_t *this)
+{
+        afr_private_t           *priv           = this->private;
+        afr_local_t             *local          = NULL;
+        gf_boolean_t            need_dirty      = _gf_false;
+
+        local = frame->local;
+
+        if (!priv->quorum_count || !local->optimistic_change_log)
+                return _gf_false;
+
+        if (local->transaction.type == AFR_DATA_TRANSACTION ||
+            local->transaction.type == AFR_METADATA_TRANSACTION)
+                return _gf_false;
+
+        if (AFR_COUNT (local->transaction.failed_subvols, priv->child_count) ==
+            priv->child_count)
+                return _gf_false;
+
+        if (priv->arbiter_count) {
+                if (!afr_has_arbiter_fop_cbk_quorum (frame))
+                        need_dirty = _gf_true;
+        } else if (!afr_has_fop_cbk_quorum (frame)) {
+                need_dirty = _gf_true;
+        }
+
+        return need_dirty;
+}
+
 void
-afr_handle_quorum (call_frame_t *frame)
+afr_handle_quorum (call_frame_t *frame, xlator_t *this)
 {
         afr_local_t   *local = NULL;
         afr_private_t *priv  = NULL;
@@ -826,11 +856,15 @@ afr_handle_quorum (call_frame_t *frame)
                 return;
         }
 
+        if (afr_need_dirty_marking (frame, this))
+                goto set_response;
+
         for (i = 0; i < priv->child_count; i++) {
                 if (local->transaction.pre_op[i])
                         afr_transaction_fop_failed (frame, frame->this, i);
         }
 
+set_response:
         local->op_ret = -1;
         local->op_errno = afr_final_errno (local, priv);
         if (local->op_errno == 0)
@@ -874,9 +908,17 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
         int                     nothing_failed  = 1;
         gf_boolean_t            need_undirty    = _gf_false;
 
-        afr_handle_quorum (frame);
+        afr_handle_quorum (frame, this);
         local = frame->local;
-	idx = afr_index_for_transaction_type (local->transaction.type);
+        idx = afr_index_for_transaction_type (local->transaction.type);
+
+        xattr = dict_new ();
+        if (!xattr) {
+                local->op_ret = -1;
+                local->op_errno = ENOMEM;
+                afr_changelog_post_op_done (frame, this);
+                goto out;
+        }
 
         nothing_failed = afr_txn_nothing_failed (frame, this);
 
@@ -886,6 +928,11 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
 		need_undirty = _gf_true;
 
         if (local->op_ret < 0 && !nothing_failed) {
+                if (afr_need_dirty_marking (frame, this)) {
+                        local->dirty[idx] = hton32(1);
+                        goto set_dirty;
+                }
+
                 afr_changelog_post_op_done (frame, this);
                 goto out;
         }
@@ -902,14 +949,6 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
                 goto out;
         }
 
-	xattr = dict_new ();
-	if (!xattr) {
-		local->op_ret = -1;
-		local->op_errno = ENOMEM;
-		afr_changelog_post_op_done (frame, this);
-		goto out;
-	}
-
 	for (i = 0; i < priv->child_count; i++) {
 		if (local->transaction.failed_subvols[i])
 			local->pending[i][idx] = hton32(1);
@@ -928,6 +967,7 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
 	else
 		local->dirty[idx] = hton32(0);
 
+set_dirty:
 	ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty,
 				   sizeof(int) * AFR_NUM_CHANGE_LOGS);
 	if (ret) {
-- 
1.8.3.1