3604df
From 96575a86f55568e5e0a4d8d13aad1cf8a4aeb2b7 Mon Sep 17 00:00:00 2001
3604df
From: Ravishankar N <ravishankar@redhat.com>
3604df
Date: Fri, 30 Dec 2016 14:57:17 +0530
3604df
Subject: [PATCH 269/270] afr: Avoid resetting event_gen when brick is always
3604df
 down
3604df
3604df
Backport of : http://review.gluster.org/16309
3604df
3604df
Problem:
3604df
__afr_set_in_flight_sb_status(), which resets event_gen to zero, is
3604df
called if failed_subvols[i] is non-zero for any brick. But failed_subvols[i]
3604df
is true even if the brick was down *before* the transaction started.
3604df
Hence say if 1 brick is down in  a replica-3, every writev that comes
3604df
will trigger an inode refresh because of this resetting, as seen from
3604df
the no. of FSTATs in the profile info in the BZ.
3604df
3604df
Fix:
3604df
Reset event gen only if the brick was previously a valid read child and
3604df
the FOP failed on it the first time.
3604df
3604df
Also `s/afr_inode_read_subvol_reset/afr_inode_event_gen_reset` because
3604df
the function only resets event gen and not the data/metadata readable.
3604df
3604df
Change-Id: I2f6ccb792ad04b3b0d5cca5d5cb1a1d60e92046d
3604df
BUG: 1410025
3604df
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
3604df
Reviewed-on: https://code.engineering.redhat.com/gerrit/94930
3604df
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
3604df
---
3604df
 xlators/cluster/afr/src/afr-common.c    | 24 +++++++++++++-----------
3604df
 xlators/cluster/afr/src/afr-dir-write.c |  6 +++---
3604df
 xlators/cluster/afr/src/afr.h           |  6 +-----
3604df
 3 files changed, 17 insertions(+), 19 deletions(-)
3604df
3604df
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
3604df
index d16ddec..696e909 100644
3604df
--- a/xlators/cluster/afr/src/afr-common.c
3604df
+++ b/xlators/cluster/afr/src/afr-common.c
3604df
@@ -134,6 +134,7 @@ __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
3604df
 out:
3604df
         return ret;
3604df
 }
3604df
+
3604df
 /*
3604df
  * INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS:
3604df
  *
3604df
@@ -195,10 +196,7 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
3604df
 
3604df
         metadatamap_old = metadatamap = (val & 0x000000000000ffff);
3604df
         datamap_old = datamap = (val & 0x00000000ffff0000) >> 16;
3604df
-        /* Hard-code event to 0 since there is a failure and the inode
3604df
-         * needs to be refreshed anyway.
3604df
-         */
3604df
-        event = 0;
3604df
+        event = (val & 0xffffffff00000000) >> 32;
3604df
 
3604df
         if (txn_type == AFR_DATA_TRANSACTION)
3604df
                 tmp_map = datamap;
3604df
@@ -231,6 +229,8 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
3604df
                         local->transaction.in_flight_sb = _gf_true;
3604df
                         metadatamap |= (1 << index);
3604df
                 }
3604df
+                if (metadatamap_old != metadatamap)
3604df
+                        event = 0;
3604df
                 break;
3604df
 
3604df
         case AFR_DATA_TRANSACTION:
3604df
@@ -240,10 +240,12 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
3604df
                         local->transaction.in_flight_sb = _gf_true;
3604df
                         datamap |= (1 << index);
3604df
                 }
3604df
+                if (datamap_old != datamap)
3604df
+                        event = 0;
3604df
                 break;
3604df
 
3604df
         default:
3604df
-        break;
3604df
+                break;
3604df
         }
3604df
 
3604df
         val = ((uint64_t) metadatamap) |
3604df
@@ -354,7 +356,7 @@ out:
3604df
 }
3604df
 
3604df
 int
3604df
-__afr_inode_read_subvol_reset_small (inode_t *inode, xlator_t *this)
3604df
+__afr_inode_event_gen_reset_small (inode_t *inode, xlator_t *this)
3604df
 {
3604df
 	int               ret         = -1;
3604df
 	uint16_t          datamap     = 0;
3604df
@@ -455,7 +457,7 @@ out:
3604df
 }
3604df
 
3604df
 int
3604df
-__afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
3604df
+__afr_inode_event_gen_reset (inode_t *inode, xlator_t *this)
3604df
 {
3604df
 	afr_private_t *priv = NULL;
3604df
 	int ret = -1;
3604df
@@ -463,7 +465,7 @@ __afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
3604df
 	priv = this->private;
3604df
 
3604df
 	if (priv->child_count <= 16)
3604df
-		ret = __afr_inode_read_subvol_reset_small (inode, this);
3604df
+		ret = __afr_inode_event_gen_reset_small (inode, this);
3604df
 	else
3604df
 		ret = -1;
3604df
 
3604df
@@ -652,7 +654,7 @@ out:
3604df
 }
3604df
 
3604df
 int
3604df
-afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
3604df
+afr_inode_event_gen_reset (inode_t *inode, xlator_t *this)
3604df
 {
3604df
 	int ret = -1;
3604df
 
3604df
@@ -660,7 +662,7 @@ afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)
3604df
 
3604df
         LOCK(&inode->lock);
3604df
         {
3604df
-                ret = __afr_inode_read_subvol_reset (inode, this);
3604df
+                ret = __afr_inode_event_gen_reset (inode, this);
3604df
         }
3604df
         UNLOCK(&inode->lock);
3604df
 out:
3604df
@@ -2124,7 +2126,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
3604df
 		if (afr_replies_interpret (frame, this, local->inode, NULL)) {
3604df
                         read_subvol = afr_read_subvol_decide (local->inode,
3604df
                                                               this, &args);
3604df
-			afr_inode_read_subvol_reset (local->inode, this);
3604df
+			afr_inode_event_gen_reset (local->inode, this);
3604df
 			goto cant_interpret;
3604df
 		} else {
3604df
                         read_subvol = afr_data_subvol_get (local->inode, this,
3604df
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
3604df
index 286a539..8e483c3 100644
3604df
--- a/xlators/cluster/afr/src/afr-dir-write.c
3604df
+++ b/xlators/cluster/afr/src/afr-dir-write.c
3604df
@@ -122,12 +122,12 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
3604df
 			continue;
3604df
 		if (local->replies[i].op_ret < 0) {
3604df
 			if (local->inode)
3604df
-				afr_inode_read_subvol_reset (local->inode, this);
3604df
+				afr_inode_event_gen_reset (local->inode, this);
3604df
 			if (local->parent)
3604df
-				afr_inode_read_subvol_reset (local->parent,
3604df
+				afr_inode_event_gen_reset (local->parent,
3604df
 							     this);
3604df
 			if (local->parent2)
3604df
-				afr_inode_read_subvol_reset (local->parent2,
3604df
+				afr_inode_event_gen_reset (local->parent2,
3604df
 							     this);
3604df
 			continue;
3604df
 		}
3604df
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
3604df
index 69d0584..c48507b 100644
3604df
--- a/xlators/cluster/afr/src/afr.h
3604df
+++ b/xlators/cluster/afr/src/afr.h
3604df
@@ -883,7 +883,7 @@ afr_inode_read_subvol_set (inode_t *inode, xlator_t *this,
3604df
 			   int event_generation);
3604df
 
3604df
 int
3604df
-afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this);
3604df
+afr_inode_event_gen_reset (inode_t *inode, xlator_t *this);
3604df
 
3604df
 int
3604df
 afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
3604df
@@ -907,10 +907,6 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
3604df
 	afr_read_subvol_get(i, t, s, r, e, AFR_METADATA_TRANSACTION, a)
3604df
 
3604df
 int
3604df
-afr_inode_ctx_reset_unreadable_subvol (inode_t *inode, xlator_t *this,
3604df
-                                       int subvol_idx, int txn_type);
3604df
-
3604df
-int
3604df
 afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
3604df
                    uuid_t gfid, afr_inode_refresh_cbk_t cbk);
3604df
 
3604df
-- 
3604df
2.9.3
3604df