Blob Blame History Raw
From 212caab4f8db39845c4c391e97103be82c0e8d88 Mon Sep 17 00:00:00 2001
From: Anuradha <atalur@redhat.com>
Date: Tue, 16 Jun 2015 15:57:30 +0530
Subject: [PATCH 169/190] cluster/afr : truncate all sinks files

        Backport of: http://review.gluster.org/11252/

Problem : During data self-heal of sparse files,
sparseness of files is lost.

Cause : Earlier, only files with larger ia_size in sinks
were being truncated to ia_size of source. This caused
checksum mismatch of sparse blocks when ia_size of files
in sinks were lesser than ia_size of source file.
Leading to unnecessary healing of sparse blocks.
As a result of which sparseness of files was lost.

Solution : truncate files in all the sinks irrespective of
their size with respect to the source file. After this change,
checksum won't mismatch for sparse blocks and heal won't
be triggered. As a result, sparseness of the files will
be preserved.

Other fixes in this patch :
1) in afr_does_size_mismatch(), check for mismatch only
in sources. Previously, the check was being done for all
children in a replica.

2) in __afr_selfheal_data_checksums_match(), check checksum
mismatch only for children with valid responses.

Upstream URLs:
1) master : http://review.gluster.org/11252/
2) 3.7    : http://review.gluster.org/11423/

Change-Id: Ifcdb1cdc9b16c4a8a7867aecf9fa94b66e5301c2
BUG: 1223677
Signed-off-by: Anuradha Talur <atalur@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/51673
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
Tested-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
---
 tests/bugs/glusterfs/bug-853690.t            |    2 +-
 xlators/cluster/afr/src/afr-self-heal-data.c |   25 +++++++++++--------------
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/tests/bugs/glusterfs/bug-853690.t b/tests/bugs/glusterfs/bug-853690.t
index d81be01..59facfc 100755
--- a/tests/bugs/glusterfs/bug-853690.t
+++ b/tests/bugs/glusterfs/bug-853690.t
@@ -65,7 +65,7 @@ TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M
 # Send a single write, guaranteed to be short on one replica, and attempt to
 # read the data back. Failure to detect the short write results in different
 # file sizes and immediate split-brain (EIO).
-TEST dd if=/dev/zero of=$M0/file bs=128k count=1
+TEST dd if=/dev/urandom of=$M0/file bs=128k count=1
 TEST dd if=$M0/file of=/dev/null bs=128k count=1
 ########
 #
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 7567fe9..4ab6f00 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -102,10 +102,12 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
 	for (i = 0; i < priv->child_count; i++) {
 		if (i == source)
 			continue;
-		if (memcmp (local->replies[source].checksum,
-			    local->replies[i].checksum,
-			    MD5_DIGEST_LENGTH))
-			return _gf_false;
+                if (local->replies[i].valid) {
+                        if (memcmp (local->replies[source].checksum,
+                                    local->replies[i].checksum,
+                                    MD5_DIGEST_LENGTH))
+                                return _gf_false;
+                }
 	}
 
 	return _gf_true;
@@ -383,23 +385,16 @@ out:
 static int
 __afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this,
 			       fd_t *fd, unsigned char *healed_sinks,
-			       struct afr_reply *replies, uint64_t size)
+			       uint64_t size)
 {
 	afr_local_t *local = NULL;
 	afr_private_t *priv = NULL;
-	unsigned char *larger_sinks = 0;
 	int i = 0;
 
 	local = frame->local;
 	priv = this->private;
 
-	larger_sinks = alloca0 (priv->child_count);
-	for (i = 0; i < priv->child_count; i++) {
-		if (healed_sinks[i] && replies[i].poststat.ia_size > size)
-			larger_sinks[i] = 1;
-	}
-
-	AFR_ONLIST (larger_sinks, frame, attr_cbk, ftruncate, fd, size, NULL);
+	AFR_ONLIST (healed_sinks, frame, attr_cbk, ftruncate, fd, size, NULL);
 
 	for (i = 0; i < priv->child_count; i++)
 		if (healed_sinks[i] && local->replies[i].op_ret == -1)
@@ -444,6 +439,9 @@ afr_does_size_mismatch (xlator_t *this, unsigned char *sources,
                 if (replies[i].op_ret < 0)
                         continue;
 
+                if (!sources[i])
+                        continue;
+
                 if (!min)
                         min = &replies[i].poststat;
 
@@ -691,7 +689,6 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
                 }
 
 		ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks,
-						     locked_replies,
 						     locked_replies[source].poststat.ia_size);
 		if (ret < 0)
 			goto unlock;
-- 
1.7.1