Blob Blame History Raw
From 449250fa06c3a9ce9fb185216885b817005ed12a Mon Sep 17 00:00:00 2001
From: Krutika Dhananjay <kdhananj@redhat.com>
Date: Thu, 8 Dec 2016 22:49:48 +0530
Subject: [PATCH 237/246] cluster/afr: Fix per-txn optimistic changelog
 initialisation

        Backport of: http://review.gluster.org/16075

Incorrect initialisation of local->optimistic_change_log was leading
to skipped pre-op and post-op even when a brick didn't participate in
the txn because it was down.
The result - missing granular name index resulting in some entries
never getting healed.

FIX:
Initialise local->optimistic_change_log just before pre-op.

Also fixed granular entry heal to create the granular name index in
pre-op as opposed to post-op. This is to prevent loss of granular
information when during an entry txn, the good (src) brick goes
offline before the post-op is done. This would cause self-heal to
do conservative merge (since dirty xattr is the only information
available), which when granular-entry-heal is enabled, expects
granular indices, the lack of which can lead to loss of data in
the worst case.

Change-Id: I1b752fc92fdee389daca89dbeadec2012546d7a2
BUG: 1400057
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/92713
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
 tests/bugs/replicate/bug-1402730.t        | 42 ++++++++++++++
 xlators/cluster/afr/src/afr-common.c      |  5 --
 xlators/cluster/afr/src/afr-transaction.c | 93 +++++++++++++++++++++++--------
 3 files changed, 111 insertions(+), 29 deletions(-)
 create mode 100644 tests/bugs/replicate/bug-1402730.t

diff --git a/tests/bugs/replicate/bug-1402730.t b/tests/bugs/replicate/bug-1402730.t
new file mode 100644
index 0000000..c6768a0
--- /dev/null
+++ b/tests/bugs/replicate/bug-1402730.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 granular-entry-heal on
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0
+
+TEST mkdir -p $M0/a/b/c -p
+cd $M0/a/b/c
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+rm -rf $B0/${V0}2/*
+rm -rf $B0/${V0}2/.glusterfs
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST touch file
+
+GFID_C=$(get_gfid_string $M0/a/b/c)
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$GFID_C/file
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$GFID_C/file
+
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/a/b/c trusted.afr.$V0-client-2 entry
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}1/a/b/c trusted.afr.$V0-client-2 entry
+
+cd ~
+
+cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index b825b19..60bae18 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -4949,7 +4949,6 @@ out:
 int
 afr_transaction_local_init (afr_local_t *local, xlator_t *this)
 {
-        int            child_up_count = 0;
         int            ret = -ENOMEM;
         afr_private_t *priv = NULL;
 
@@ -4968,10 +4967,6 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
         }
 
         ret = -ENOMEM;
-        child_up_count = AFR_COUNT (local->child_up, priv->child_count);
-        if (priv->optimistic_change_log && child_up_count == priv->child_count)
-                local->optimistic_change_log = 1;
-
 	local->pre_op_compat = priv->pre_op_compat;
 
         local->transaction.eager_lock =
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index f397ae7..9a3b2e9 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1163,19 +1163,21 @@ void
 afr_changelog_populate_xdata (call_frame_t *frame, afr_xattrop_type_t op,
                               dict_t **xdata, dict_t **newloc_xdata)
 {
-        dict_t      *xdata1 = NULL;
-        dict_t      *xdata2 = NULL;
-        afr_local_t *local  = NULL;
-        afr_private_t *priv = NULL;
-        int         ret     = 0;
-        const char  *name   = NULL;
+        int              i                    = 0;
+        int              ret                  = 0;
+        char            *key                  = NULL;
+        const char      *name                 = NULL;
+        dict_t          *xdata1               = NULL;
+        dict_t          *xdata2               = NULL;
+        xlator_t        *this                 = NULL;
+        afr_local_t     *local                = NULL;
+        afr_private_t   *priv                 = NULL;
+        gf_boolean_t     need_entry_key_set   = _gf_true;
 
         local = frame->local;
-        priv = THIS->private;
+        this = THIS;
+        priv = this->private;
 
-        /*Populate xdata for POST_OP only.*/
-        if (op == AFR_TRANSACTION_PRE_OP)
-                goto out;
         if (local->transaction.type == AFR_DATA_TRANSACTION ||
             local->transaction.type == AFR_METADATA_TRANSACTION)
                 goto out;
@@ -1186,26 +1188,53 @@ afr_changelog_populate_xdata (call_frame_t *frame, afr_xattrop_type_t op,
         xdata1 = dict_new();
         if (!xdata1)
                 goto out;
+
         name = local->loc.name;
         if (local->op == GF_FOP_LINK)
                 name = local->newloc.name;
-        ret = dict_set_str (xdata1, GF_XATTROP_ENTRY_IN_KEY, (char *)name);
-        if (ret)
-                gf_msg (THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
-                        "%s/%s: Could not set xattrop-entry key during post-op",
-                        uuid_utoa (local->loc.pargfid), local->loc.name);
-        if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
-                xdata2 = dict_new();
-                if (!xdata2)
-                        goto out;
-                ret = dict_set_str (xdata2, GF_XATTROP_ENTRY_IN_KEY,
-                                    (char *)local->newloc.name);
+
+        switch (op) {
+        case AFR_TRANSACTION_PRE_OP:
+                key = GF_XATTROP_ENTRY_IN_KEY;
+                break;
+        case AFR_TRANSACTION_POST_OP:
+                if (afr_txn_nothing_failed (frame, this)) {
+                        key = GF_XATTROP_ENTRY_OUT_KEY;
+                        for (i = 0; i < priv->child_count; i++) {
+                                if (!local->transaction.failed_subvols[i])
+                                        continue;
+                                need_entry_key_set = _gf_false;
+                                break;
+                        }
+                } else {
+                        key = GF_XATTROP_ENTRY_IN_KEY;
+                }
+                break;
+        }
+
+        if (need_entry_key_set) {
+                ret = dict_set_str (xdata1, key, (char *)name);
                 if (ret)
                         gf_msg (THIS->name, GF_LOG_ERROR, 0,
                                 AFR_MSG_DICT_SET_FAILED,
-                                "%s/%s: Could not set xattrop-entry key during"
-                                " post-op", uuid_utoa (local->newloc.pargfid),
-                                local->newloc.name);
+                                "%s/%s: Could not set %s key during xattrop",
+                                uuid_utoa (local->loc.pargfid), local->loc.name,
+                                key);
+                if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+                        xdata2 = dict_new ();
+                        if (!xdata2)
+                                goto out;
+
+                        ret = dict_set_str (xdata2, key,
+                                            (char *)local->newloc.name);
+                        if (ret)
+                                gf_msg (THIS->name, GF_LOG_ERROR, 0,
+                                        AFR_MSG_DICT_SET_FAILED,
+                                        "%s/%s: Could not set %s key during "
+                                        "xattrop",
+                                        uuid_utoa (local->newloc.pargfid),
+                                        local->newloc.name, key);
+                }
         }
 
         *xdata = xdata1;
@@ -1622,6 +1651,20 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
 	return 0;
 }
 
+static void
+afr_init_optimistic_changelog_for_txn (xlator_t *this, afr_local_t *local)
+{
+        int                locked_count   = 0;
+        afr_private_t     *priv           = NULL;
+
+        priv = this->private;
+
+        locked_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
+        if (priv->optimistic_change_log && locked_count == priv->child_count)
+                local->optimistic_change_log = 1;
+
+        return;
+}
 
 int
 afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
@@ -1653,6 +1696,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
                 }
 	}
 
+        afr_init_optimistic_changelog_for_txn (this, local);
+
         /* This condition should not be met with present code, as
          * transaction.done will be called if locks are not acquired on even a
          * single node.
-- 
2.9.3