Blob Blame History Raw
From 5c188a0f989c50985584272dc5cef39177832708 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Thu, 17 Mar 2016 19:42:00 +0530
Subject: [PATCH 73/80] cluster/afr: Fix partial heals in 3-way replication

Problem:
When there are 2 sources and one sink and if two self-heal daemons
try to acquire locks at the same time, there is a chance that it
gets a lock on one source and sink leading partial to heal. This will
need one more heal from the remaining source to sink for the complete
self-heal. This is not optimal.

Fix:
Upgrade non-blocking locks to blocking lock on all the subvolumes, if
the number of locks acquired is majority and there were eagains.

 >BUG: 1318751
 >Change-Id: Iae10b8d3402756c4164b98cc49876056ff7a61e5
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
 >Reviewed-on: http://review.gluster.org/13766
 >Smoke: Gluster Build System <jenkins@build.gluster.com>
 >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
 >CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
 >Reviewed-by: Ravishankar N <ravishankar@redhat.com>
 >(cherry picked from commit 8deedef565df49def75083678f8d1558c7b1f7d3)

 >Change-Id: Ia164360dc1474a717f63633f5deb2c39cc15017c
 >BUG: 1327863
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
 >Reviewed-on: http://review.gluster.org/14008
 >Smoke: Gluster Build System <jenkins@build.gluster.com>
 >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
 >CentOS-regression: Gluster Build System <jenkins@build.gluster.com>

BUG: 1314724
Change-Id: I9ddea45d827a1cedba3170913b7190a9564449a0
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/72371
---
 xlators/cluster/afr/src/afr-self-heal-common.c   |  121 ++++++++++++++++++++-
 xlators/cluster/afr/src/afr-self-heal-data.c     |   10 +-
 xlators/cluster/afr/src/afr-self-heal-entry.c    |    7 +-
 xlators/cluster/afr/src/afr-self-heal-metadata.c |    2 +-
 xlators/cluster/afr/src/afr-self-heal.h          |   13 ++-
 5 files changed, 138 insertions(+), 15 deletions(-)

diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index b3dbc95..130388d 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -727,11 +727,14 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
 
 void
 afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
-                  int source, unsigned char *healed_sinks)
+                  int source, unsigned char *sources,
+                  unsigned char *healed_sinks)
 {
         char *status = NULL;
         char *sinks_str = NULL;
         char *p = NULL;
+        char *sources_str = NULL;
+        char *q = NULL;
         afr_private_t *priv = NULL;
         gf_loglevel_t loglevel = GF_LOG_NONE;
         int i = 0;
@@ -739,10 +742,18 @@ afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
         priv = this->private;
         sinks_str = alloca0 (priv->child_count * 8);
         p = sinks_str;
+        sources_str = alloca0 (priv->child_count * 8);
+        q = sources_str;
         for (i = 0; i < priv->child_count; i++) {
-                if (!healed_sinks[i])
-                        continue;
-                p += sprintf (p, "%d ", i);
+                if (healed_sinks[i])
+                        p += sprintf (p, "%d ", i);
+                if (sources[i]) {
+                        if (source == i) {
+                                q += sprintf (q, "[%d] ", i);
+                        } else {
+                                q += sprintf (q, "%d ", i);
+                        }
+                }
         }
 
         if (ret < 0) {
@@ -755,8 +766,8 @@ afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
 
         gf_msg (this->name, loglevel, 0,
                 AFR_MSG_SELF_HEAL_INFO, "%s %s selfheal on %s. "
-                "source=%d sinks=%s", status, type, uuid_utoa (gfid),
-                source, sinks_str);
+                "sources=%s sinks=%s", status, type, uuid_utoa (gfid),
+                sources_str, sinks_str);
 }
 
 int
@@ -1012,6 +1023,67 @@ afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
 	return afr_locked_fill (frame, this, locked_on);
 }
 
+static void
+afr_get_lock_and_eagain_counts (afr_private_t *priv, struct afr_reply *replies,
+                                int *lock_count, int *eagain_count)
+{
+	int i = 0;
+
+	for (i = 0; i < priv->child_count; i++) {
+	        if (!replies[i].valid)
+	                continue;
+	        if (replies[i].op_ret == 0) {
+	                (*lock_count)++;
+                } else if (replies[i].op_ret == -1 &&
+		         replies[i].op_errno == EAGAIN) {
+		        (*eagain_count)++;
+                }
+	}
+}
+
+/*Do blocking locks if number of locks acquired is majority and there were some
+ * EAGAINs. Useful for odd-way replication*/
+int
+afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this,
+                                  inode_t *inode, char *dom, off_t off,
+                                  size_t size, unsigned char *locked_on)
+{
+	loc_t loc = {0,};
+	struct gf_flock flock = {0, };
+	afr_local_t *local = NULL;
+	afr_private_t *priv = NULL;
+	int lock_count = 0;
+	int eagain_count = 0;
+
+	priv = this->private;
+	local = frame->local;
+
+	loc.inode = inode_ref (inode);
+	gf_uuid_copy (loc.gfid, inode->gfid);
+
+	flock.l_type = F_WRLCK;
+	flock.l_start = off;
+	flock.l_len = size;
+
+	AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom,
+		   &loc, F_SETLK, &flock, NULL);
+
+        afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count,
+                                        &eagain_count);
+
+	if (lock_count > priv->child_count/2 && eagain_count) {
+                afr_locked_fill (frame, this, locked_on);
+                afr_selfheal_uninodelk (frame, this, inode, dom, off,
+                                        size, locked_on);
+
+                AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom,
+                         &loc, F_SETLKW, &flock, NULL);
+        }
+
+	loc_wipe (&loc);
+
+	return afr_locked_fill (frame, this, locked_on);
+}
 
 int
 afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
@@ -1092,6 +1164,43 @@ afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
 	return afr_locked_fill (frame, this, locked_on);
 }
 
+int
+afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this,
+                                  inode_t *inode, char *dom, const char *name,
+                                  unsigned char *locked_on)
+{
+	loc_t loc = {0,};
+	afr_local_t *local = NULL;
+	afr_private_t *priv = NULL;
+	int lock_count = 0;
+	int eagain_count = 0;
+
+	priv = this->private;
+	local = frame->local;
+
+	loc.inode = inode_ref (inode);
+	gf_uuid_copy (loc.gfid, inode->gfid);
+
+	AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc,
+		   name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+
+        afr_get_lock_and_eagain_counts (priv, local->replies, &lock_count,
+                                        &eagain_count);
+
+	if (lock_count > priv->child_count/2 && eagain_count) {
+                afr_locked_fill (frame, this, locked_on);
+                afr_selfheal_unentrylk (frame, this, inode, dom, name,
+                                        locked_on);
+
+                AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom,
+                         &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+	}
+
+	loc_wipe (&loc);
+
+	return afr_locked_fill (frame, this, locked_on);
+}
+
 
 int
 afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 332471c..67bb8e6 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -786,7 +786,7 @@ out:
 
         if (did_sh)
                 afr_log_selfheal (fd->inode->gfid, this, ret, "data", source,
-                                  healed_sinks);
+                                  sources, healed_sinks);
         else
                 ret = 1;
 
@@ -844,8 +844,9 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)
 
 	locked_on = alloca0 (priv->child_count);
 
-	ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0,
-				       locked_on);
+	ret = afr_selfheal_tie_breaker_inodelk (frame, this, inode,
+	                                        priv->sh_domain, 0, 0,
+				                locked_on);
 	{
 		if (ret < AFR_SH_MIN_PARTICIPANTS) {
                         gf_msg_debug (this->name, 0, "%s: Skipping "
@@ -864,7 +865,8 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)
 		ret = __afr_selfheal_data (frame, this, fd, locked_on);
 	}
 unlock:
-	afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, locked_on);
+	afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0,
+	                        locked_on);
 
 	if (fd)
 		fd_unref (fd);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 0837e5a..a93c640 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -704,7 +704,7 @@ postop_unlock:
 out:
         if (did_sh)
                 afr_log_selfheal (fd->inode->gfid, this, ret, "entry", source,
-                                  healed_sinks);
+                                  sources, healed_sinks);
         else
                 ret = 1;
 
@@ -759,8 +759,9 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
 	locked_on = alloca0 (priv->child_count);
 	long_name_locked = alloca0 (priv->child_count);
 
-	ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, NULL,
-				       locked_on);
+	ret = afr_selfheal_tie_breaker_entrylk (frame, this, inode,
+	                                        priv->sh_domain, NULL,
+                                                locked_on);
 	{
 		if (ret < AFR_SH_MIN_PARTICIPANTS) {
                         gf_msg_debug (this->name, 0, "%s: Skipping "
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 778f2a1..d6daadc 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -427,7 +427,7 @@ unlock:
 
         if (did_sh)
                 afr_log_selfheal (inode->gfid, this, ret, "metadata", source,
-                                  healed_sinks);
+                                  sources, healed_sinks);
         else
                 ret = 1;
 
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index b0f545f..e0a3341 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -113,6 +113,11 @@ afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
 			 unsigned char *locked_on);
 
 int
+afr_selfheal_tie_breaker_inodelk (call_frame_t *frame, xlator_t *this,
+                                  inode_t *inode, char *dom, off_t off,
+                                  size_t size, unsigned char *locked_on);
+
+int
 afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode,
 			char *dom, off_t off, size_t size,
 			const unsigned char *locked_on);
@@ -126,6 +131,11 @@ afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
 			 char *dom, const char *name, unsigned char *locked_on);
 
 int
+afr_selfheal_tie_breaker_entrylk (call_frame_t *frame, xlator_t *this,
+                                  inode_t *inode, char *dom, const char *name,
+                                  unsigned char *locked_on);
+
+int
 afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
 			char *dom, const char *name, unsigned char *locked_on);
 
@@ -197,7 +207,8 @@ afr_success_count (struct afr_reply *replies, unsigned int count);
 
 void
 afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
-                  int source, unsigned char *healed_sinks);
+                  int source, unsigned char *sources,
+                  unsigned char *healed_sinks);
 
 void
 afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources,
-- 
1.7.1