887953
From 039d3b0631336ba2197fdf203226151a488d60bb Mon Sep 17 00:00:00 2001
887953
From: karthik-us <ksubrahm@redhat.com>
887953
Date: Mon, 11 Mar 2019 17:03:28 +0530
887953
Subject: [PATCH 538/538] cluster/afr: Send truncate on arbiter brick from SHD
887953
887953
Problem:
887953
In an arbiter volume configuration SHD will not send any writes onto the arbiter
887953
brick even if there is data pending marker for the arbiter brick. If we have a
887953
arbiter setup on the geo-rep master and there are data pending markers for the files
887953
on arbiter brick, SHD will not mark any data changelog during healing. While syncing
887953
the data from master to slave, if the arbiter-brick is considered as ACTIVE, then
887953
there is a chance that slave will miss out some data. If the arbiter brick is being
887953
newly added or replaced there is a chance of slave missing all the data during sync.
887953
887953
Fix:
887953
If there is data pending marker for the arbiter brick, send truncate on the arbiter
887953
brick during heal, so that it will record truncate as the data transaction in changelog.
887953
887953
Backport of: https://review.gluster.org/#/c/glusterfs/+/22325/
887953
887953
Change-Id: I174d5d557f1ae55dbe758bc92368c133f1ad0929
887953
BUG: 1683893
887953
Signed-off-by: karthik-us <ksubrahm@redhat.com>
887953
Reviewed-on: https://code.engineering.redhat.com/gerrit/164978
887953
Tested-by: RHGS Build Bot <nigelb@redhat.com>
887953
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
887953
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
887953
---
887953
 ...bug-1686568-send-truncate-on-arbiter-from-shd.t | 38 ++++++++++++++++++++++
887953
 tests/volume.rc                                    |  2 +-
887953
 xlators/cluster/afr/src/afr-self-heal-data.c       | 25 +++++++-------
887953
 3 files changed, 51 insertions(+), 14 deletions(-)
887953
 create mode 100644 tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
887953
887953
diff --git a/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
887953
new file mode 100644
887953
index 0000000..78581e9
887953
--- /dev/null
887953
+++ b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
887953
@@ -0,0 +1,38 @@
887953
+#!/bin/bash
887953
+. $(dirname $0)/../../include.rc
887953
+. $(dirname $0)/../../volume.rc
887953
+cleanup;
887953
+
887953
+CHANGELOG_PATH_0="$B0/${V0}2/.glusterfs/changelogs"
887953
+ROLLOVER_TIME=100
887953
+
887953
+TEST glusterd
887953
+TEST pidof glusterd
887953
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
887953
+TEST $CLI volume set $V0 changelog.changelog on
887953
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
887953
+TEST $CLI volume start $V0
887953
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
887953
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
887953
+
887953
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
887953
+TEST dd if=/dev/zero of=$M0/file1 bs=128K count=5
887953
+
887953
+TEST $CLI volume profile $V0 start
887953
+TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}2
887953
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
887953
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
887953
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
887953
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
887953
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
887953
+
887953
+TEST $CLI volume heal $V0
887953
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
887953
+
887953
+TEST $CLI volume profile $V0 info
887953
+truncate_count=$($CLI volume profile $V0 info | grep TRUNCATE | awk '{count += $8} END {print count}')
887953
+
887953
+EXPECT "1" echo $truncate_count
887953
+EXPECT "1" check_changelog_op ${CHANGELOG_PATH_0} "^ D "
887953
+
887953
+cleanup;
887953
diff --git a/tests/volume.rc b/tests/volume.rc
887953
index 6a983fd..3af663c 100644
887953
--- a/tests/volume.rc
887953
+++ b/tests/volume.rc
887953
@@ -874,5 +874,5 @@ function check_changelog_op {
887953
         local clog_path=$1
887953
         local op=$2
887953
 
887953
-        $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep $op | wc -l
887953
+        $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep "$op" | wc -l
887953
 }
887953
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
887953
index 2ac6e47..8bdea2a 100644
887953
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
887953
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
887953
@@ -399,17 +399,18 @@ __afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this,
887953
 {
887953
 	afr_local_t *local = NULL;
887953
 	afr_private_t *priv = NULL;
887953
-        unsigned char arbiter_sink_status = 0;
887953
 	int i = 0;
887953
 
887953
 	local = frame->local;
887953
 	priv = this->private;
887953
 
887953
-        if (priv->arbiter_count) {
887953
-                arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
887953
-                healed_sinks[ARBITER_BRICK_INDEX] = 0;
887953
-        }
887953
-
887953
+        /* This will send truncate on the arbiter brick as well if it is marked
887953
+         * as sink. If changelog is enabled on the volume it captures truncate
887953
+         * as a data transactions on the arbiter brick. This will help geo-rep
887953
+         * to properly sync the data from master to slave if arbiter is the
887953
+         * ACTIVE brick during syncing and which had got some entries healed for
887953
+         * data as part of self heal.
887953
+         */
887953
 	AFR_ONLIST (healed_sinks, frame, afr_sh_generic_fop_cbk, ftruncate, fd,
887953
                     size, NULL);
887953
 
887953
@@ -420,8 +421,6 @@ __afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this,
887953
 			*/
887953
 			healed_sinks[i] = 0;
887953
 
887953
-        if (arbiter_sink_status)
887953
-                healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status;
887953
 	return 0;
887953
 }
887953
 
887953
@@ -733,6 +732,11 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
887953
                         goto unlock;
887953
                 }
887953
 
887953
+		ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks,
887953
+						     locked_replies[source].poststat.ia_size);
887953
+		if (ret < 0)
887953
+			goto unlock;
887953
+
887953
                 if (priv->arbiter_count &&
887953
                     AFR_COUNT (healed_sinks, priv->child_count) == 1 &&
887953
                     healed_sinks[ARBITER_BRICK_INDEX]) {
887953
@@ -740,11 +744,6 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
887953
                         goto restore_time;
887953
                 }
887953
 
887953
-		ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks,
887953
-						     locked_replies[source].poststat.ia_size);
887953
-		if (ret < 0)
887953
-			goto unlock;
887953
-
887953
 		ret = 0;
887953
 
887953
 	}
887953
-- 
887953
1.8.3.1
887953