Blob Blame History Raw
From 115786c374680597cebec0de7a9bebf13eea0db4 Mon Sep 17 00:00:00 2001
From: Krutika Dhananjay <kdhananj@redhat.com>
Date: Mon, 7 Nov 2016 16:06:56 +0530
Subject: [PATCH 154/157] features/shard: Fill loc.pargfid too for named lookups on individual shards

        Backport of: http://review.gluster.org/15788

On a sharded volume when a brick is replaced while IO is going on, named
lookup on individual shards as part of read/write was failing with
ENOENT on the replaced brick, and as a result AFR initiated name heal in
lookup callback. But since pargfid was empty (which is what this patch
attempts to fix), the resolution of the shards by protocol/server used
to fail and the following pattern of logs was seen:

Brick-logs:

[2016-11-08 07:41:49.387127] W [MSGID: 115009]
[server-resolve.c:566:server_resolve] 0-rep-server: no resolution type
for (null) (LOOKUP)
[2016-11-08 07:41:49.387157] E [MSGID: 115050]
[server-rpc-fops.c:156:server_lookup_cbk] 0-rep-server: 91833: LOOKUP(null)
(00000000-0000-0000-0000-000000000000/16d47463-ece5-4b33-9c93-470be918c0f6.82)
==> (Invalid argument) [Invalid argument]

Client-logs:
[2016-11-08 07:41:27.497687] W [MSGID: 114031]
[client-rpc-fops.c:2930:client3_3_lookup_cbk] 2-rep-client-0: remote
operation failed. Path: (null) (00000000-0000-0000-0000-000000000000)
[Invalid argument]
[2016-11-08 07:41:27.497755] W [MSGID: 114031]
[client-rpc-fops.c:2930:client3_3_lookup_cbk] 2-rep-client-1: remote
operation failed. Path: (null) (00000000-0000-0000-0000-000000000000)
[Invalid argument]
[2016-11-08 07:41:27.498500] W [MSGID: 114031]
[client-rpc-fops.c:2930:client3_3_lookup_cbk] 2-rep-client-2: remote
operation failed. Path: (null) (00000000-0000-0000-0000-000000000000)
[Invalid argument]
[2016-11-08 07:41:27.499680] E [MSGID: 133010]

Also, this patch makes AFR by itself choose a non-NULL pargfid even if
its ancestors fail to initialize all pargfid placeholders.

Change-Id: Iedbd57ed432f3950171f0ca8549001623fe70f99
BUG: 1370350
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/89412
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
 libglusterfs/src/xlator.c            |   17 +++++++++++++++++
 libglusterfs/src/xlator.h            |    1 +
 xlators/cluster/afr/src/afr-common.c |    6 ++++--
 xlators/features/shard/src/shard.c   |    1 +
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index 2221332..3c1cde5 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -762,6 +762,23 @@ out:
         return;
 }
 
+void
+loc_pargfid (loc_t *loc, uuid_t gfid)
+{
+        if (!gfid)
+                goto out;
+        gf_uuid_clear (gfid);
+
+        if (!loc)
+                goto out;
+        else if (!gf_uuid_is_null (loc->pargfid))
+                gf_uuid_copy (gfid, loc->pargfid);
+        else if (loc->parent && (!gf_uuid_is_null (loc->parent->gfid)))
+                gf_uuid_copy (gfid, loc->parent->gfid);
+out:
+        return;
+}
+
 char*
 loc_gfid_utoa (loc_t *loc)
 {
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index 555916d..70e6f0a 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -1011,6 +1011,7 @@ int loc_copy_overload_parent (loc_t *dst,
 void loc_wipe (loc_t *loc);
 int loc_path (loc_t *loc, const char *bname);
 void loc_gfid (loc_t *loc, uuid_t gfid);
+void loc_pargfid (loc_t *loc, uuid_t pargfid);
 char* loc_gfid_utoa (loc_t *loc);
 gf_boolean_t loc_is_root (loc_t *loc);
 int32_t loc_build_child (loc_t *child, loc_t *parent, char *name);
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index c2922fb..fe0dc2d 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2234,12 +2234,14 @@ afr_lookup_selfheal_wrap (void *opaque)
 	afr_local_t *local = NULL;
 	xlator_t *this = NULL;
 	inode_t *inode = NULL;
+        uuid_t pargfid = {0,};
 
 	local = frame->local;
 	this = frame->this;
+        loc_pargfid (&local->loc, pargfid);
 
-	ret = afr_selfheal_name (frame->this, local->loc.pargfid,
-                                 local->loc.name, &local->cont.lookup.gfid_req);
+	ret = afr_selfheal_name (frame->this, pargfid, local->loc.name,
+                                 &local->cont.lookup.gfid_req);
         if (ret == -EIO)
                 goto unwind;
 
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index abac0cc..934aaaf 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -1678,6 +1678,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode,
                 bname = strrchr (path, '/') + 1;
                 loc.inode = inode_new (this->itable);
                 loc.parent = inode_ref (priv->dot_shard_inode);
+                gf_uuid_copy (loc.pargfid, priv->dot_shard_gfid);
                 ret = inode_path (loc.parent, bname, (char **) &(loc.path));
                 if (ret < 0 || !(loc.inode)) {
                         gf_msg (this->name, GF_LOG_ERROR, 0,
-- 
1.7.1