7f4c2a
From 56092f9ab16a7606e549e0aba84a78ed6de0d397 Mon Sep 17 00:00:00 2001
7f4c2a
From: Krutika Dhananjay <kdhananj@redhat.com>
7f4c2a
Date: Wed, 24 Jun 2015 08:02:51 +0530
7f4c2a
Subject: [PATCH 140/190] cluster/afr: Pick gfid from poststat during fresh lookup for read child calculation
7f4c2a
7f4c2a
        Backport of: http://review.gluster.org/11373
7f4c2a
7f4c2a
Change-Id: I669f7c7d02e1dbd276f3d4c9c7bd8c575517e1b7
7f4c2a
BUG: 1223757
7f4c2a
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
7f4c2a
Reviewed-on: https://code.engineering.redhat.com/gerrit/51654
7f4c2a
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
7f4c2a
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
7f4c2a
---
7f4c2a
 libglusterfs/src/inode.c                  |   22 +++++++++
7f4c2a
 libglusterfs/src/inode.h                  |    3 +
7f4c2a
 xlators/cluster/afr/src/afr-common.c      |   68 +++++++++++++++++-----------
7f4c2a
 xlators/cluster/afr/src/afr-dir-read.c    |    7 +++-
7f4c2a
 xlators/cluster/afr/src/afr-dir-write.c   |    6 +-
7f4c2a
 xlators/cluster/afr/src/afr-inode-write.c |    5 +-
7f4c2a
 xlators/cluster/afr/src/afr-read-txn.c    |    4 +-
7f4c2a
 xlators/cluster/afr/src/afr.h             |   19 ++++++---
7f4c2a
 8 files changed, 93 insertions(+), 41 deletions(-)
7f4c2a
7f4c2a
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
7f4c2a
index 7c54865..7d3215e 100644
7f4c2a
--- a/libglusterfs/src/inode.c
7f4c2a
+++ b/libglusterfs/src/inode.c
7f4c2a
@@ -2126,6 +2126,28 @@ inode_ctx_reset0 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
7f4c2a
         return ret;
7f4c2a
 }
7f4c2a
 
7f4c2a
+int
7f4c2a
+inode_is_linked (inode_t *inode)
7f4c2a
+{
7f4c2a
+        int            ret   = 0;
7f4c2a
+        inode_table_t *table = NULL;
7f4c2a
+
7f4c2a
+        if (!inode) {
7f4c2a
+                gf_log_callingfn (THIS->name, GF_LOG_WARNING,
7f4c2a
+                                  "inode not found");
7f4c2a
+                return 0;
7f4c2a
+        }
7f4c2a
+
7f4c2a
+        table = inode->table;
7f4c2a
+
7f4c2a
+        pthread_mutex_lock (&table->lock);
7f4c2a
+        {
7f4c2a
+                ret = __is_inode_hashed (inode);
7f4c2a
+        }
7f4c2a
+        pthread_mutex_unlock (&table->lock);
7f4c2a
+
7f4c2a
+        return ret;
7f4c2a
+}
7f4c2a
 
7f4c2a
 void
7f4c2a
 inode_dump (inode_t *inode, char *prefix)
7f4c2a
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
7f4c2a
index 5081559..474dc39 100644
7f4c2a
--- a/libglusterfs/src/inode.h
7f4c2a
+++ b/libglusterfs/src/inode.h
7f4c2a
@@ -269,4 +269,7 @@ inode_table_set_lru_limit (inode_table_t *table, uint32_t lru_limit);
7f4c2a
 void
7f4c2a
 inode_ctx_merge (fd_t *fd, inode_t *inode, inode_t *linked_inode);
7f4c2a
 
7f4c2a
+int
7f4c2a
+inode_is_linked (inode_t *inode);
7f4c2a
+
7f4c2a
 #endif /* _INODE_H */
7f4c2a
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
7f4c2a
index 410d31d..7150f0f 100644
7f4c2a
--- a/xlators/cluster/afr/src/afr-common.c
7f4c2a
+++ b/xlators/cluster/afr/src/afr-common.c
7f4c2a
@@ -987,7 +987,7 @@ out:
7f4c2a
 
7f4c2a
 
7f4c2a
 int
7f4c2a
-afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)
7f4c2a
+afr_hash_child (afr_read_subvol_args_t *args, int32_t child_count, int hashmode)
7f4c2a
 {
7f4c2a
         uuid_t gfid_copy = {0,};
7f4c2a
         pid_t pid;
7f4c2a
@@ -996,11 +996,9 @@ afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)
7f4c2a
                 return -1;
7f4c2a
         }
7f4c2a
 
7f4c2a
-        if (inode) {
7f4c2a
-               gf_uuid_copy (gfid_copy, inode->gfid);
7f4c2a
-        }
7f4c2a
+        gf_uuid_copy (gfid_copy, args->gfid);
7f4c2a
 
7f4c2a
-        if (hashmode > 1 && inode->ia_type != IA_IFDIR) {
7f4c2a
+        if ((hashmode > 1) && (args->ia_type != IA_IFDIR)) {
7f4c2a
                 /*
7f4c2a
                  * Why getpid?  Because it's one of the cheapest calls
7f4c2a
                  * available - faster than gethostname etc. - and returns a
7f4c2a
@@ -1021,32 +1019,41 @@ afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)
7f4c2a
 
7f4c2a
 int
7f4c2a
 afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
7f4c2a
-				  unsigned char *readable)
7f4c2a
+				  unsigned char *readable,
7f4c2a
+                                  afr_read_subvol_args_t *args)
7f4c2a
 {
7f4c2a
-	afr_private_t *priv = NULL;
7f4c2a
-	int read_subvol = -1;
7f4c2a
-	int i = 0;
7f4c2a
+	int             i           = 0;
7f4c2a
+	int             read_subvol = -1;
7f4c2a
+	afr_private_t  *priv        = NULL;
7f4c2a
+        afr_read_subvol_args_t local_args = {0,};
7f4c2a
 
7f4c2a
 	priv = this->private;
7f4c2a
 
7f4c2a
 	/* first preference - explicitly specified or local subvolume */
7f4c2a
 	if (priv->read_child >= 0 && readable[priv->read_child])
7f4c2a
-		return priv->read_child;
7f4c2a
+                return priv->read_child;
7f4c2a
+
7f4c2a
+        if (inode_is_linked (inode)) {
7f4c2a
+                gf_uuid_copy (local_args.gfid, inode->gfid);
7f4c2a
+                local_args.ia_type = inode->ia_type;
7f4c2a
+        } else if (args) {
7f4c2a
+                local_args = *args;
7f4c2a
+        }
7f4c2a
 
7f4c2a
 	/* second preference - use hashed mode */
7f4c2a
-	read_subvol = afr_hash_child (inode, priv->child_count,
7f4c2a
-				      priv->hash_mode);
7f4c2a
+	read_subvol = afr_hash_child (&local_args, priv->child_count,
7f4c2a
+                                      priv->hash_mode);
7f4c2a
 	if (read_subvol >= 0 && readable[read_subvol])
7f4c2a
-		return read_subvol;
7f4c2a
+                return read_subvol;
7f4c2a
 
7f4c2a
 	for (i = 0; i < priv->child_count; i++) {
7f4c2a
-		if (readable[i])
7f4c2a
-			return i;
7f4c2a
+                if (readable[i])
7f4c2a
+                return i;
7f4c2a
 	}
7f4c2a
 
7f4c2a
-	/* no readable subvolumes, either split brain or all subvols down */
7f4c2a
+        /* no readable subvolumes, either split brain or all subvols down */
7f4c2a
 
7f4c2a
-	return -1;
7f4c2a
+        return -1;
7f4c2a
 }
7f4c2a
 
7f4c2a
 
7f4c2a
@@ -1069,7 +1076,8 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
7f4c2a
 
7f4c2a
 int
7f4c2a
 afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
7f4c2a
-		     int *event_p, afr_transaction_type type)
7f4c2a
+		     int *event_p, afr_transaction_type type,
7f4c2a
+                     afr_read_subvol_args_t *args)
7f4c2a
 {
7f4c2a
 	afr_private_t *priv = NULL;
7f4c2a
 	unsigned char *data_readable = NULL;
7f4c2a
@@ -1096,10 +1104,10 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
7f4c2a
 
7f4c2a
 	if (AFR_COUNT (intersection, priv->child_count) > 0)
7f4c2a
 		subvol = afr_read_subvol_select_by_policy (inode, this,
7f4c2a
-							   intersection);
7f4c2a
+							   intersection, args);
7f4c2a
 	else
7f4c2a
 		subvol = afr_read_subvol_select_by_policy (inode, this,
7f4c2a
-							   readable);
7f4c2a
+							   readable, args);
7f4c2a
 	if (subvol_p)
7f4c2a
 		*subvol_p = subvol;
7f4c2a
 	if (event_p)
7f4c2a
@@ -1413,7 +1421,8 @@ afr_get_parent_read_subvol (xlator_t *this, inode_t *parent,
7f4c2a
         priv = this->private;
7f4c2a
 
7f4c2a
         if (parent)
7f4c2a
-                par_read_subvol = afr_data_subvol_get (parent, this, 0, 0);
7f4c2a
+                par_read_subvol = afr_data_subvol_get (parent, this, 0, 0,
7f4c2a
+                                                       NULL);
7f4c2a
 
7f4c2a
         for (i = 0; i < priv->child_count; i++) {
7f4c2a
                 if (!replies[i].valid)
7f4c2a
@@ -1462,6 +1471,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
7f4c2a
 	gf_boolean_t        can_interpret = _gf_true;
7f4c2a
         inode_t            *parent = NULL;
7f4c2a
         int                 spb_choice = -1;
7f4c2a
+        ia_type_t           ia_type = IA_INVAL;
7f4c2a
+        afr_read_subvol_args_t args = {0,};
7f4c2a
 
7f4c2a
         priv  = this->private;
7f4c2a
         local = frame->local;
7f4c2a
@@ -1509,6 +1520,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
7f4c2a
 		if (read_subvol == -1 || !readable[read_subvol]) {
7f4c2a
 			read_subvol = i;
7f4c2a
 			gf_uuid_copy (read_gfid, replies[i].poststat.ia_gfid);
7f4c2a
+                        ia_type = replies[i].poststat.ia_type;
7f4c2a
 			local->op_ret = 0;
7f4c2a
 		}
7f4c2a
 	}
7f4c2a
@@ -1554,14 +1566,16 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
7f4c2a
 		   a response from all the UP subvolumes and all of them resolved
7f4c2a
 		   to the same GFID
7f4c2a
 		*/
7f4c2a
+                gf_uuid_copy (args.gfid, read_gfid);
7f4c2a
+                args.ia_type = ia_type;
7f4c2a
 		if (afr_replies_interpret (frame, this, local->inode)) {
7f4c2a
 			read_subvol = afr_data_subvol_get (local->inode, this,
7f4c2a
-							   0, 0);
7f4c2a
+							   0, 0, &args);
7f4c2a
 			afr_inode_read_subvol_reset (local->inode, this);
7f4c2a
 			goto cant_interpret;
7f4c2a
 		} else {
7f4c2a
 			read_subvol = afr_data_subvol_get (local->inode, this,
7f4c2a
-							   0, 0);
7f4c2a
+							   0, 0, &args);
7f4c2a
 		}
7f4c2a
 	} else {
7f4c2a
 	cant_interpret:
7f4c2a
@@ -1979,7 +1993,7 @@ afr_discover_done (call_frame_t *frame, xlator_t *this)
7f4c2a
 
7f4c2a
 	afr_replies_interpret (frame, this, local->inode);
7f4c2a
 
7f4c2a
-	read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
7f4c2a
+	read_subvol = afr_data_subvol_get (local->inode, this, 0, 0, NULL);
7f4c2a
 	if (read_subvol == -1) {
7f4c2a
 		gf_log (this->name, GF_LOG_WARNING, "no read subvols for %s",
7f4c2a
 			local->loc.path);
7f4c2a
@@ -2142,7 +2156,7 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req
7f4c2a
 	}
7f4c2a
 
7f4c2a
 	afr_read_subvol_get (loc->inode, this, NULL, &event,
7f4c2a
-			     AFR_DATA_TRANSACTION);
7f4c2a
+			     AFR_DATA_TRANSACTION, NULL);
7f4c2a
 
7f4c2a
 	if (event != local->event_generation)
7f4c2a
 		afr_inode_refresh (frame, this, loc->inode, afr_discover_do);
7f4c2a
@@ -2288,7 +2302,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
7f4c2a
         }
7f4c2a
 
7f4c2a
 	afr_read_subvol_get (loc->parent, this, NULL, &event,
7f4c2a
-			     AFR_DATA_TRANSACTION);
7f4c2a
+			     AFR_DATA_TRANSACTION, NULL);
7f4c2a
 
7f4c2a
 	if (event != local->event_generation)
7f4c2a
 		afr_inode_refresh (frame, this, loc->parent, afr_lookup_do);
7f4c2a
@@ -2608,7 +2622,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
7f4c2a
 
7f4c2a
         local = frame->local;
7f4c2a
 
7f4c2a
-	read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
7f4c2a
+	read_subvol = afr_data_subvol_get (local->inode, this, 0, 0, NULL);
7f4c2a
 
7f4c2a
         LOCK (&frame->lock);
7f4c2a
         {
7f4c2a
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
7f4c2a
index 984ed9c..11f583e 100644
7f4c2a
--- a/xlators/cluster/afr/src/afr-dir-read.c
7f4c2a
+++ b/xlators/cluster/afr/src/afr-dir-read.c
7f4c2a
@@ -153,7 +153,12 @@ afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol)
7f4c2a
         if (!priv->consistent_metadata)
7f4c2a
                 return 0;
7f4c2a
 
7f4c2a
-        entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0);
7f4c2a
+        /* For an inode fetched through readdirp which is yet to be linked,
7f4c2a
+         * inode ctx would not be initialised (yet). So this function returns
7f4c2a
+         * -1 above due to gen being 0, which is why it is OK to pass NULL for
7f4c2a
+         *  read_subvol_args here.
7f4c2a
+         */
7f4c2a
+        entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0, NULL);
7f4c2a
         if (entry_read_subvol != par_read_subvol)
7f4c2a
                 return -1;
7f4c2a
 
7f4c2a
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
7f4c2a
index 8a2c0e4..2891f36 100644
7f4c2a
--- a/xlators/cluster/afr/src/afr-dir-write.c
7f4c2a
+++ b/xlators/cluster/afr/src/afr-dir-write.c
7f4c2a
@@ -95,14 +95,14 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
7f4c2a
 	if (local->inode) {
7f4c2a
 		afr_replies_interpret (frame, this, local->inode);
7f4c2a
 		inode_read_subvol = afr_data_subvol_get (local->inode, this,
7f4c2a
-							 NULL, NULL);
7f4c2a
+							 NULL, NULL, NULL);
7f4c2a
 	}
7f4c2a
 	if (local->parent)
7f4c2a
 		parent_read_subvol = afr_data_subvol_get (local->parent, this,
7f4c2a
-							  NULL, NULL);
7f4c2a
+							  NULL, NULL, NULL);
7f4c2a
 	if (local->parent2)
7f4c2a
 		parent2_read_subvol = afr_data_subvol_get (local->parent2, this,
7f4c2a
-							   NULL, NULL);
7f4c2a
+							   NULL, NULL, NULL);
7f4c2a
 
7f4c2a
 	local->op_ret = -1;
7f4c2a
 	local->op_errno = afr_final_errno (local, priv);
7f4c2a
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
7f4c2a
index ecd2b9d..5d32927 100644
7f4c2a
--- a/xlators/cluster/afr/src/afr-inode-write.c
7f4c2a
+++ b/xlators/cluster/afr/src/afr-inode-write.c
7f4c2a
@@ -53,10 +53,11 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
7f4c2a
 	if (local->inode) {
7f4c2a
 		if (local->transaction.type == AFR_METADATA_TRANSACTION)
7f4c2a
 			read_subvol = afr_metadata_subvol_get (local->inode, this,
7f4c2a
-							       NULL, NULL);
7f4c2a
+							       NULL, NULL,
7f4c2a
+                                                               NULL);
7f4c2a
 		else
7f4c2a
 			read_subvol = afr_data_subvol_get (local->inode, this,
7f4c2a
-							   NULL, NULL);
7f4c2a
+							   NULL, NULL, NULL);
7f4c2a
 	}
7f4c2a
 
7f4c2a
 	local->op_ret = -1;
7f4c2a
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
7f4c2a
index 0ec1d91..6121108 100644
7f4c2a
--- a/xlators/cluster/afr/src/afr-read-txn.c
7f4c2a
+++ b/xlators/cluster/afr/src/afr-read-txn.c
7f4c2a
@@ -90,7 +90,7 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
7f4c2a
                 memcpy (local->readable, local->child_up, priv->child_count);
7f4c2a
 
7f4c2a
 	read_subvol = afr_read_subvol_select_by_policy (inode, this,
7f4c2a
-							local->readable);
7f4c2a
+							local->readable, NULL);
7f4c2a
 	if (read_subvol == -1)
7f4c2a
                 AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn);
7f4c2a
 
7f4c2a
@@ -232,7 +232,7 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
7f4c2a
 		goto refresh;
7f4c2a
 
7f4c2a
 	read_subvol = afr_read_subvol_select_by_policy (inode, this,
7f4c2a
-							local->readable);
7f4c2a
+							local->readable, NULL);
7f4c2a
 
7f4c2a
 	if (read_subvol < 0 || read_subvol > priv->child_count) {
7f4c2a
 		gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
7f4c2a
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
7f4c2a
index 855d3a3..e6e7f3e 100644
7f4c2a
--- a/xlators/cluster/afr/src/afr.h
7f4c2a
+++ b/xlators/cluster/afr/src/afr.h
7f4c2a
@@ -755,6 +755,11 @@ typedef struct afr_spbc_timeout {
7f4c2a
         int          spb_child_index;
7f4c2a
 } afr_spbc_timeout_t;
7f4c2a
 
7f4c2a
+typedef struct afr_read_subvol_args {
7f4c2a
+        ia_type_t ia_type;
7f4c2a
+        uuid_t gfid;
7f4c2a
+} afr_read_subvol_args_t;
7f4c2a
+
7f4c2a
 /* did a call fail due to a child failing? */
7f4c2a
 #define child_went_down(op_ret, op_errno) (((op_ret) < 0) &&            \
7f4c2a
                                            ((op_errno == ENOTCONN) ||   \
7f4c2a
@@ -787,7 +792,8 @@ afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this);
7f4c2a
 
7f4c2a
 int
7f4c2a
 afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
7f4c2a
-				  unsigned char *readable);
7f4c2a
+				  unsigned char *readable,
7f4c2a
+                                  afr_read_subvol_args_t *args);
7f4c2a
 
7f4c2a
 int
7f4c2a
 afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
7f4c2a
@@ -795,13 +801,14 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
7f4c2a
 				int type);
7f4c2a
 int
7f4c2a
 afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
7f4c2a
-		     int *event_p, afr_transaction_type type);
7f4c2a
+		     int *event_p, afr_transaction_type type,
7f4c2a
+                     afr_read_subvol_args_t *args);
7f4c2a
 
7f4c2a
-#define afr_data_subvol_get(i, t, s, e) \
7f4c2a
-	afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION)
7f4c2a
+#define afr_data_subvol_get(i, t, s, e, a) \
7f4c2a
+	afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION, a)
7f4c2a
 
7f4c2a
-#define afr_metadata_subvol_get(i, t, s, e) \
7f4c2a
-	afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION)
7f4c2a
+#define afr_metadata_subvol_get(i, t, s, e, a) \
7f4c2a
+	afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION, a)
7f4c2a
 
7f4c2a
 int
7f4c2a
 afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
7f4c2a
-- 
7f4c2a
1.7.1
7f4c2a