Blob Blame History Raw
From 96575a86f55568e5e0a4d8d13aad1cf8a4aeb2b7 Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Fri, 30 Dec 2016 14:57:17 +0530
Subject: [PATCH 269/270] afr: Avoid resetting event_gen when brick is always
 down

Backport of : http://review.gluster.org/16309

Problem:
__afr_set_in_flight_sb_status(), which resets event_gen to zero, is
called if failed_subvols[i] is non-zero for any brick. But failed_subvols[i]
is true even if the brick was down *before* the transaction started.
Hence say if 1 brick is down in  a replica-3, every writev that comes
will trigger an inode refresh because of this resetting, as seen from
the no. of FSTATs in the profile info in the BZ.

Fix:
Reset event gen only if the brick was previously a valid read child and
the FOP failed on it the first time.

Also `s/afr_inode_read_subvol_reset/afr_inode_event_gen_reset` because
the function only resets event gen and not the data/metadata readable.

Change-Id: I2f6ccb792ad04b3b0d5cca5d5cb1a1d60e92046d
BUG: 1410025
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/94930
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 xlators/cluster/afr/src/afr-common.c    | 24 +++++++++++++-----------
 xlators/cluster/afr/src/afr-dir-write.c |  6 +++---
 xlators/cluster/afr/src/afr.h           |  6 +-----
 3 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index d16ddec..696e909 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -134,6 +134,7 @@ __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
 out:
         return ret;
 }
+
 /*
  * INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS:
  *
@@ -195,10 +196,7 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
 
         metadatamap_old = metadatamap = (val & 0x000000000000ffff);
         datamap_old = datamap = (val & 0x00000000ffff0000) >> 16;
-        /* Hard-code event to 0 since there is a failure and the inode
-         * needs to be refreshed anyway.
-         */
-        event = 0;
+        event = (val & 0xffffffff00000000) >> 32;
 
         if (txn_type == AFR_DATA_TRANSACTION)
                 tmp_map = datamap;
@@ -231,6 +229,8 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
                         local->transaction.in_flight_sb = _gf_true;
                         metadatamap |= (1 << index);
                 }
+                if (metadatamap_old != metadatamap)
+                        event = 0;
                 break;
 
         case AFR_DATA_TRANSACTION:
@@ -240,10 +240,12 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
                         local->transaction.in_flight_sb = _gf_true;
                         datamap |= (1 << index);
                 }
+                if (datamap_old != datamap)
+                        event = 0;
                 break;
 
         default:
-        break;
+                break;
         }
 
         val = ((uint64_t) metadatamap) |
@@ -354,7 +356,7 @@ out:
 }
 
 int
-__afr_inode_read_subvol_reset_small (inode_t *inode, xlator_t *this)
+__afr_inode_event_gen_reset_small (inode_t *inode, xlator_t *this)
 {
 	int               ret         = -1;
 	uint16_t          datamap     = 0;
@@ -455,7 +457,7 @@ out:
 }
 
 int
-__afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
+__afr_inode_event_gen_reset (inode_t *inode, xlator_t *this)
 {
 	afr_private_t *priv = NULL;
 	int ret = -1;
@@ -463,7 +465,7 @@ __afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
 	priv = this->private;
 
 	if (priv->child_count <= 16)
-		ret = __afr_inode_read_subvol_reset_small (inode, this);
+		ret = __afr_inode_event_gen_reset_small (inode, this);
 	else
 		ret = -1;
 
@@ -652,7 +654,7 @@ out:
 }
 
 int
-afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
+afr_inode_event_gen_reset (inode_t *inode, xlator_t *this)
 {
 	int ret = -1;
 
@@ -660,7 +662,7 @@ afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
 
         LOCK(&inode->lock);
         {
-                ret = __afr_inode_read_subvol_reset (inode, this);
+                ret = __afr_inode_event_gen_reset (inode, this);
         }
         UNLOCK(&inode->lock);
 out:
@@ -2124,7 +2126,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
 		if (afr_replies_interpret (frame, this, local->inode, NULL)) {
                         read_subvol = afr_read_subvol_decide (local->inode,
                                                               this, &args);
-			afr_inode_read_subvol_reset (local->inode, this);
+			afr_inode_event_gen_reset (local->inode, this);
 			goto cant_interpret;
 		} else {
                         read_subvol = afr_data_subvol_get (local->inode, this,
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 286a539..8e483c3 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -122,12 +122,12 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
 			continue;
 		if (local->replies[i].op_ret < 0) {
 			if (local->inode)
-				afr_inode_read_subvol_reset (local->inode, this);
+				afr_inode_event_gen_reset (local->inode, this);
 			if (local->parent)
-				afr_inode_read_subvol_reset (local->parent,
+				afr_inode_event_gen_reset (local->parent,
 							     this);
 			if (local->parent2)
-				afr_inode_read_subvol_reset (local->parent2,
+				afr_inode_event_gen_reset (local->parent2,
 							     this);
 			continue;
 		}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 69d0584..c48507b 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -883,7 +883,7 @@ afr_inode_read_subvol_set (inode_t *inode, xlator_t *this,
 			   int event_generation);
 
 int
-afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this);
+afr_inode_event_gen_reset (inode_t *inode, xlator_t *this);
 
 int
 afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
@@ -907,10 +907,6 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
 	afr_read_subvol_get(i, t, s, r, e, AFR_METADATA_TRANSACTION, a)
 
 int
-afr_inode_ctx_reset_unreadable_subvol (inode_t *inode, xlator_t *this,
-                                       int subvol_idx, int txn_type);
-
-int
 afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
                    uuid_t gfid, afr_inode_refresh_cbk_t cbk);
 
-- 
2.9.3