cb8e9e
From d2c8c165deb86082a13c15cc33d558336ff22d14 Mon Sep 17 00:00:00 2001
cb8e9e
From: Ravishankar N <ravishankar@redhat.com>
cb8e9e
Date: Tue, 4 Aug 2015 18:37:47 +0530
cb8e9e
Subject: [PATCH 294/304] afr: modify afr_txn_nothing_failed()
cb8e9e
cb8e9e
Patch in master: http://review.gluster.org/#/c/11827/
cb8e9e
Patch in release-3.7: http://review.gluster.org/#/c/11985/
cb8e9e
In an AFR transaction, we need to consider something as failed only if the
cb8e9e
failure (either in the pre-op or the FOP phase) occurs on the bricks on which a
cb8e9e
transaction lock was obtained.
cb8e9e
cb8e9e
Without this, we would end up considering the transaction as failure even on the
cb8e9e
bricks on which the lock was not obtained, resulting in unnecessary fsyncs
cb8e9e
during the post-op phase of every write transaction for non-appending writes.
cb8e9e
cb8e9e
Change-Id: I02efa64f6cf80fcc3cb8dd6a9543dbf480b866b9
cb8e9e
BUG: 1227759
cb8e9e
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
cb8e9e
Reviewed-on: https://code.engineering.redhat.com/gerrit/56256
cb8e9e
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
cb8e9e
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
cb8e9e
---
cb8e9e
 tests/bugs/replicate/bug-1250170-fsync.c  |   56 +++++++++++++++++++++++++++++
cb8e9e
 tests/bugs/replicate/bug-1250170-fsync.t  |   35 ++++++++++++++++++
cb8e9e
 xlators/cluster/afr/src/afr-transaction.c |   15 ++------
cb8e9e
 3 files changed, 94 insertions(+), 12 deletions(-)
cb8e9e
 create mode 100644 tests/bugs/replicate/bug-1250170-fsync.c
cb8e9e
 create mode 100644 tests/bugs/replicate/bug-1250170-fsync.t
cb8e9e
cb8e9e
diff --git a/tests/bugs/replicate/bug-1250170-fsync.c b/tests/bugs/replicate/bug-1250170-fsync.c
cb8e9e
new file mode 100644
cb8e9e
index 0000000..1d3025b
cb8e9e
--- /dev/null
cb8e9e
+++ b/tests/bugs/replicate/bug-1250170-fsync.c
cb8e9e
@@ -0,0 +1,56 @@
cb8e9e
+#include <stdio.h>
cb8e9e
+#include <stdlib.h>
cb8e9e
+#include <inttypes.h>
cb8e9e
+#include <sys/types.h>
cb8e9e
+#include <sys/stat.h>
cb8e9e
+#include <fcntl.h>
cb8e9e
+#include <unistd.h>
cb8e9e
+#include <string.h>
cb8e9e
+
cb8e9e
+int main (int argc, char **argv)
cb8e9e
+{
cb8e9e
+        char *file = NULL;
cb8e9e
+        int fd = -1;
cb8e9e
+        char *buffer = NULL;
cb8e9e
+        size_t buf_size = 0;
cb8e9e
+        size_t written = 0;
cb8e9e
+        int ret = 0;
cb8e9e
+        off_t offset = 0;
cb8e9e
+        int i = 0;
cb8e9e
+        int loop_count = 5;
cb8e9e
+
cb8e9e
+        if (argc < 2) {
cb8e9e
+                printf ("Usage:%s <filename>\n", argv[0]);
cb8e9e
+                return -1;
cb8e9e
+        }
cb8e9e
+
cb8e9e
+        file = argv[1];
cb8e9e
+        buf_size = 1024;
cb8e9e
+        buffer = calloc(1, buf_size);
cb8e9e
+        if (!buffer) {
cb8e9e
+                perror("calloc");
cb8e9e
+                return -1;
cb8e9e
+        }
cb8e9e
+        memset (buffer, 'R', buf_size);
cb8e9e
+
cb8e9e
+        fd = open(file, O_WRONLY);
cb8e9e
+        if (fd == -1) {
cb8e9e
+                perror("open");
cb8e9e
+                return -1;
cb8e9e
+        }
cb8e9e
+
cb8e9e
+        for (i = 0; i < loop_count; i++) {
cb8e9e
+                ret =  write (fd, buffer, buf_size);
cb8e9e
+                if (ret == -1) {
cb8e9e
+                        perror("write");
cb8e9e
+                        return ret;
cb8e9e
+                } else {
cb8e9e
+                        written += ret;
cb8e9e
+                }
cb8e9e
+                offset = lseek (fd, 0 , SEEK_SET);
cb8e9e
+        }
cb8e9e
+
cb8e9e
+        free(buffer);
cb8e9e
+        return 0;
cb8e9e
+
cb8e9e
+}
cb8e9e
diff --git a/tests/bugs/replicate/bug-1250170-fsync.t b/tests/bugs/replicate/bug-1250170-fsync.t
cb8e9e
new file mode 100644
cb8e9e
index 0000000..7a3fdbf
cb8e9e
--- /dev/null
cb8e9e
+++ b/tests/bugs/replicate/bug-1250170-fsync.t
cb8e9e
@@ -0,0 +1,35 @@
cb8e9e
+#!/bin/bash
cb8e9e
+
cb8e9e
+. $(dirname $0)/../../include.rc
cb8e9e
+. $(dirname $0)/../../volume.rc
cb8e9e
+
cb8e9e
+cleanup;
cb8e9e
+TEST gcc  $(dirname $0)/bug-1250170-fsync.c -o  $(dirname $0)/bug-1250170-fsync
cb8e9e
+TEST glusterd
cb8e9e
+TEST pidof glusterd
cb8e9e
+
cb8e9e
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
cb8e9e
+TEST $CLI volume set $V0 performance.write-behind off
cb8e9e
+TEST $CLI volume start $V0
cb8e9e
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
cb8e9e
+
cb8e9e
+TEST touch $M0/file
cb8e9e
+TEST kill_brick $V0 $H0 $B0/${V0}0
cb8e9e
+TEST gluster volume profile $V0 start
cb8e9e
+#Perform 5 non-sequential writes.
cb8e9e
+TEST $(dirname $0)/bug-1250170-fsync $M0/file
cb8e9e
+
cb8e9e
+#Run profile info initially to filter out the interval statistics in the
cb8e9e
+#subsequent runs.
cb8e9e
+TEST $CLI volume profile $V0 info
cb8e9e
+#We get only cumulative statistics.
cb8e9e
+write_count=$($CLI volume profile $V0 info | grep WRITE |awk '{count += $8} END {print count}')
cb8e9e
+fsync_count=$($CLI volume profile $V0 info | grep FSYNC |awk '{count += $8} END {print count}')
cb8e9e
+
cb8e9e
+EXPECT "5" echo $write_count
cb8e9e
+TEST [ -z $fsync_count ]
cb8e9e
+
cb8e9e
+TEST $CLI volume profile $V0 stop
cb8e9e
+TEST umount $M0
cb8e9e
+rm -f $(dirname $0)/bug-1250170-fsync
cb8e9e
+cleanup
cb8e9e
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
cb8e9e
index ba50abd..d773b5a 100644
cb8e9e
--- a/xlators/cluster/afr/src/afr-transaction.c
cb8e9e
+++ b/xlators/cluster/afr/src/afr-transaction.c
cb8e9e
@@ -493,18 +493,14 @@ afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
cb8e9e
 {
cb8e9e
         afr_private_t *priv = NULL;
cb8e9e
         afr_local_t *local = NULL;
cb8e9e
-        int pre_op_count = 0;
cb8e9e
         int i = 0;
cb8e9e
 
cb8e9e
         local = frame->local;
cb8e9e
 	priv = this->private;
cb8e9e
 
cb8e9e
-        pre_op_count = AFR_COUNT (local->transaction.pre_op, priv->child_count);
cb8e9e
-        if (pre_op_count < priv->child_count)
cb8e9e
-                return _gf_false;
cb8e9e
-
cb8e9e
         for (i = 0; i < priv->child_count; i++) {
cb8e9e
-                if (local->transaction.failed_subvols[i])
cb8e9e
+                if (local->transaction.pre_op[i] &&
cb8e9e
+                    local->transaction.failed_subvols[i])
cb8e9e
                         return _gf_false;
cb8e9e
         }
cb8e9e
 
cb8e9e
@@ -1156,14 +1152,9 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
cb8e9e
 		goto err;
cb8e9e
 	}
cb8e9e
 
cb8e9e
-	if (call_count > 1 &&
cb8e9e
-	    (local->transaction.type == AFR_DATA_TRANSACTION ||
cb8e9e
+	if ((local->transaction.type == AFR_DATA_TRANSACTION ||
cb8e9e
 	     !local->optimistic_change_log)) {
cb8e9e
 
cb8e9e
-		/* If we are performing change on only one subvol, no
cb8e9e
-		   need to mark dirty, because we are setting the pending
cb8e9e
-		   counts already anyways
cb8e9e
-		*/
cb8e9e
 		local->dirty[idx] = hton32(1);
cb8e9e
 
cb8e9e
 		ret = dict_set_static_bin (xdata_req, AFR_DIRTY, local->dirty,
cb8e9e
-- 
cb8e9e
1.7.1
cb8e9e