12a457
From bdb10b5f21dc668de5fa5cf2dccb46d64d08d6ec Mon Sep 17 00:00:00 2001
12a457
From: Pranith Kumar K <pkarampu@redhat.com>
12a457
Date: Wed, 2 Mar 2016 22:09:44 +0530
12a457
Subject: [PATCH 69/80] cluster/afr: Choose local child as source if possible
12a457
12a457
It is better to choose local brick as source if possible to prevent
12a457
over the wire read thus saving on bandwidth. Also changed code to not
12a457
attempt data-heal if 'source' is selected as arbiter.
12a457
12a457
 >Change-Id: I9a328d0198422280b13a30ab99545370a301dfea
12a457
 >BUG: 1314150
12a457
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
12a457
 >Reviewed-on: http://review.gluster.org/13585
12a457
 >Smoke: Gluster Build System <jenkins@build.gluster.com>
12a457
 >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
12a457
 >Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
12a457
 >Tested-by: Krutika Dhananjay <kdhananj@redhat.com>
12a457
 >CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
12a457
 >Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
12a457
 >(cherry picked from commit 2807e3fc005630213ab7ad251fef13d61c07ac6b)
12a457
12a457
 >Change-Id: I24ea66683f81e238a6c1850664a49fe554011a0a
12a457
 >BUG: 1322521
12a457
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
12a457
 >Reviewed-on: http://review.gluster.org/13860
12a457
 >Smoke: Gluster Build System <jenkins@build.gluster.com>
12a457
 >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
12a457
 >CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
12a457
 >Reviewed-by: Ravishankar N <ravishankar@redhat.com>
12a457
12a457
BUG: 1314724
12a457
Change-Id: I742c4a95ff5f01a5be501cf868171346b0aedbb4
12a457
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
12a457
Reviewed-on: https://code.engineering.redhat.com/gerrit/72367
12a457
---
12a457
 xlators/cluster/afr/src/afr-common.c             |    1 +
12a457
 xlators/cluster/afr/src/afr-self-heal-common.c   |   29 ++++++++++++++++++++++
12a457
 xlators/cluster/afr/src/afr-self-heal-data.c     |   12 ++-------
12a457
 xlators/cluster/afr/src/afr-self-heal-entry.c    |   10 +------
12a457
 xlators/cluster/afr/src/afr-self-heal-metadata.c |   22 ++++++----------
12a457
 xlators/cluster/afr/src/afr-self-heal.h          |    3 ++
12a457
 xlators/cluster/afr/src/afr-self-heald.c         |    3 ++
12a457
 xlators/cluster/afr/src/afr.c                    |    7 +++++
12a457
 xlators/cluster/afr/src/afr.h                    |    1 +
12a457
 9 files changed, 57 insertions(+), 31 deletions(-)
12a457
12a457
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
12a457
index 308766c..dcbab23 100644
12a457
--- a/xlators/cluster/afr/src/afr-common.c
12a457
+++ b/xlators/cluster/afr/src/afr-common.c
12a457
@@ -1891,6 +1891,7 @@ afr_local_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
12a457
          * the slowest local subvolume is far preferable to a remote one.
12a457
          */
12a457
         if (is_local) {
12a457
+                priv->local[child_index] = 1;
12a457
                 /* Don't set arbiter as read child. */
12a457
                 if (AFR_IS_ARBITER_BRICK(priv, child_index))
12a457
                         goto out;
12a457
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
12a457
index cbe70f5..db9af05 100644
12a457
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
12a457
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
12a457
@@ -1594,3 +1594,32 @@ afr_throttled_selfheal (call_frame_t *frame, xlator_t *this)
12a457
                                       "pending, background self-heal rejected.");
12a457
         }
12a457
 }
12a457
+
12a457
+int
12a457
+afr_choose_source_by_policy (afr_private_t *priv, unsigned char *sources,
12a457
+                             afr_transaction_type type)
12a457
+{
12a457
+        int source = -1;
12a457
+        int i      = 0;
12a457
+
12a457
+        /* Give preference to local child to save on bandwidth */
12a457
+        for (i = 0; i < priv->child_count; i++) {
12a457
+                if (priv->local[i] && sources[i]) {
12a457
+                        if ((type == AFR_DATA_TRANSACTION) &&
12a457
+                            AFR_IS_ARBITER_BRICK (priv, i))
12a457
+                                continue;
12a457
+
12a457
+                        source = i;
12a457
+                        goto out;
12a457
+                }
12a457
+        }
12a457
+
12a457
+        for (i = 0; i < priv->child_count; i++) {
12a457
+                if (sources[i]) {
12a457
+                        source = i;
12a457
+                        goto out;
12a457
+                }
12a457
+        }
12a457
+out:
12a457
+        return source;
12a457
+}
12a457
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
12a457
index ebf262e..332471c 100644
12a457
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
12a457
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
12a457
@@ -577,7 +577,6 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
12a457
 				     struct afr_reply *replies,
12a457
                                      uint64_t *witness)
12a457
 {
12a457
-	int i = 0;
12a457
 	afr_private_t *priv = NULL;
12a457
 	int source = -1;
12a457
 	int sources_count = 0;
12a457
@@ -614,13 +613,9 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
12a457
 
12a457
 out:
12a457
         afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
12a457
+        source = afr_choose_source_by_policy (priv, sources,
12a457
+                                              AFR_DATA_TRANSACTION);
12a457
 
12a457
-        for (i = 0; i < priv->child_count; i++) {
12a457
-                if (sources[i]) {
12a457
-                        source = i;
12a457
-                        break;
12a457
-                }
12a457
-        }
12a457
 	return source;
12a457
 }
12a457
 
12a457
@@ -734,8 +729,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
12a457
 
12a457
 		source = ret;
12a457
 
12a457
-                if (AFR_IS_ARBITER_BRICK(priv, source) &&
12a457
-                    AFR_COUNT (sources, priv->child_count) == 1) {
12a457
+                if (AFR_IS_ARBITER_BRICK(priv, source)) {
12a457
                         did_sh = _gf_false;
12a457
                         goto unlock;
12a457
                 }
12a457
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
12a457
index fe0596c..0837e5a 100644
12a457
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
12a457
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
12a457
@@ -361,7 +361,6 @@ __afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources,
12a457
                                       struct afr_reply *replies,
12a457
                                       uint64_t *witness)
12a457
 {
12a457
-	int i = 0;
12a457
 	afr_private_t *priv = NULL;
12a457
 	int source = -1;
12a457
 	int sources_count = 0;
12a457
@@ -378,13 +377,8 @@ __afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources,
12a457
 		return -1;
12a457
 	}
12a457
 
12a457
-	for (i = 0; i < priv->child_count; i++) {
12a457
-		if (sources[i]) {
12a457
-			source = i;
12a457
-			break;
12a457
-		}
12a457
-	}
12a457
-
12a457
+        source = afr_choose_source_by_policy (priv, sources,
12a457
+                                              AFR_ENTRY_TRANSACTION);
12a457
 	return source;
12a457
 }
12a457
 
12a457
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
12a457
index b58767c..778f2a1 100644
12a457
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
12a457
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
12a457
@@ -209,7 +209,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
12a457
 {
12a457
 	int i = 0;
12a457
 	afr_private_t *priv = NULL;
12a457
-	struct iatt first = {0, };
12a457
+	struct iatt srcstat = {0, };
12a457
 	int source = -1;
12a457
 	int sources_count = 0;
12a457
 
12a457
@@ -262,23 +262,17 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
12a457
         if (afr_dict_contains_heal_op(frame))
12a457
                 return -EIO;
12a457
 
12a457
-	for (i = 0; i < priv->child_count; i++) {
12a457
-		if (!sources[i])
12a457
-			continue;
12a457
-		if (source == -1) {
12a457
-			source = i;
12a457
-			first = replies[i].poststat;
12a457
-                        break;
12a457
-		}
12a457
-	}
12a457
+        source = afr_choose_source_by_policy (priv, sources,
12a457
+                                              AFR_METADATA_TRANSACTION);
12a457
+        srcstat = replies[source].poststat;
12a457
 
12a457
 	for (i = 0; i < priv->child_count; i++) {
12a457
 		if (!sources[i] || i == source)
12a457
 			continue;
12a457
-		if (!IA_EQUAL (first, replies[i].poststat, type) ||
12a457
-		    !IA_EQUAL (first, replies[i].poststat, uid) ||
12a457
-		    !IA_EQUAL (first, replies[i].poststat, gid) ||
12a457
-		    !IA_EQUAL (first, replies[i].poststat, prot)) {
12a457
+		if (!IA_EQUAL (srcstat, replies[i].poststat, type) ||
12a457
+		    !IA_EQUAL (srcstat, replies[i].poststat, uid) ||
12a457
+		    !IA_EQUAL (srcstat, replies[i].poststat, gid) ||
12a457
+		    !IA_EQUAL (srcstat, replies[i].poststat, prot)) {
12a457
                         gf_msg_debug (this->name, 0, "%s: iatt mismatch "
12a457
                                       "for source(%d) vs (%d)",
12a457
                                       uuid_utoa
12a457
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
12a457
index b298fa1..b0f545f 100644
12a457
--- a/xlators/cluster/afr/src/afr-self-heal.h
12a457
+++ b/xlators/cluster/afr/src/afr-self-heal.h
12a457
@@ -266,4 +266,7 @@ afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
12a457
 int
12a457
 afr_locked_fill (call_frame_t *frame, xlator_t *this,
12a457
                  unsigned char *locked_on);
12a457
+int
12a457
+afr_choose_source_by_policy (afr_private_t *priv, unsigned char *sources,
12a457
+                             afr_transaction_type type);
12a457
 #endif /* !_AFR_SELFHEAL_H */
12a457
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
12a457
index d77a9ec..1da3cb9 100644
12a457
--- a/xlators/cluster/afr/src/afr-self-heald.c
12a457
+++ b/xlators/cluster/afr/src/afr-self-heald.c
12a457
@@ -521,14 +521,17 @@ afr_shd_index_healer (void *data)
12a457
 	struct subvol_healer *healer = NULL;
12a457
 	xlator_t *this = NULL;
12a457
 	int ret = 0;
12a457
+	afr_private_t *priv = NULL;
12a457
 
12a457
 	healer = data;
12a457
 	THIS = this = healer->this;
12a457
+	priv = this->private;
12a457
 
12a457
 	for (;;) {
12a457
 		afr_shd_healer_wait (healer);
12a457
 
12a457
 		ASSERT_LOCAL(this, healer);
12a457
+		priv->local[healer->subvol] = healer->local;
12a457
 
12a457
 		do {
12a457
 		        gf_msg_debug (this->name, 0,
12a457
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
12a457
index d65895a..49ce495 100644
12a457
--- a/xlators/cluster/afr/src/afr.c
12a457
+++ b/xlators/cluster/afr/src/afr.c
12a457
@@ -389,6 +389,13 @@ init (xlator_t *this)
12a457
 
12a457
         priv->wait_count = 1;
12a457
 
12a457
+        priv->local = GF_CALLOC (sizeof (unsigned char), child_count,
12a457
+                                 gf_afr_mt_char);
12a457
+        if (!priv->local) {
12a457
+                ret = -ENOMEM;
12a457
+                goto out;
12a457
+        }
12a457
+
12a457
         priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
12a457
                                     gf_afr_mt_char);
12a457
         if (!priv->child_up) {
12a457
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
12a457
index dec4eaa..e507fd7 100644
12a457
--- a/xlators/cluster/afr/src/afr.h
12a457
+++ b/xlators/cluster/afr/src/afr.h
12a457
@@ -66,6 +66,7 @@ typedef struct _afr_private {
12a457
         inode_t *root_inode;
12a457
 
12a457
         unsigned char *child_up;
12a457
+        unsigned char *local;
12a457
 
12a457
         char **pending_key;
12a457
 
12a457
-- 
12a457
1.7.1
12a457