From 115786c374680597cebec0de7a9bebf13eea0db4 Mon Sep 17 00:00:00 2001
From: Krutika Dhananjay <kdhananj@redhat.com>
Date: Mon, 7 Nov 2016 16:06:56 +0530
Subject: [PATCH 154/157] features/shard: Fill loc.pargfid too for named lookups on individual shards
Backport of: http://review.gluster.org/15788
On a sharded volume when a brick is replaced while IO is going on, named
lookup on individual shards as part of read/write was failing with
ENOENT on the replaced brick, and as a result AFR initiated name heal in
lookup callback. But since pargfid was empty (which is what this patch
attempts to fix), the resolution of the shards by protocol/server used
to fail and the following pattern of logs was seen:
Brick-logs:
[2016-11-08 07:41:49.387127] W [MSGID: 115009]
[server-resolve.c:566:server_resolve] 0-rep-server: no resolution type
for (null) (LOOKUP)
[2016-11-08 07:41:49.387157] E [MSGID: 115050]
[server-rpc-fops.c:156:server_lookup_cbk] 0-rep-server: 91833: LOOKUP(null)
(00000000-0000-0000-0000-000000000000/16d47463-ece5-4b33-9c93-470be918c0f6.82)
==> (Invalid argument) [Invalid argument]
Client-logs:
[2016-11-08 07:41:27.497687] W [MSGID: 114031]
[client-rpc-fops.c:2930:client3_3_lookup_cbk] 2-rep-client-0: remote
operation failed. Path: (null) (00000000-0000-0000-0000-000000000000)
[Invalid argument]
[2016-11-08 07:41:27.497755] W [MSGID: 114031]
[client-rpc-fops.c:2930:client3_3_lookup_cbk] 2-rep-client-1: remote
operation failed. Path: (null) (00000000-0000-0000-0000-000000000000)
[Invalid argument]
[2016-11-08 07:41:27.498500] W [MSGID: 114031]
[client-rpc-fops.c:2930:client3_3_lookup_cbk] 2-rep-client-2: remote
operation failed. Path: (null) (00000000-0000-0000-0000-000000000000)
[Invalid argument]
[2016-11-08 07:41:27.499680] E [MSGID: 133010]
Also, this patch makes AFR by itself choose a non-NULL pargfid even if
its ancestors fail to initialize all pargfid placeholders.
Change-Id: Iedbd57ed432f3950171f0ca8549001623fe70f99
BUG: 1370350
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/89412
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
libglusterfs/src/xlator.c | 17 +++++++++++++++++
libglusterfs/src/xlator.h | 1 +
xlators/cluster/afr/src/afr-common.c | 6 ++++--
xlators/features/shard/src/shard.c | 1 +
4 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index 2221332..3c1cde5 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -762,6 +762,23 @@ out:
return;
}
+void
+loc_pargfid (loc_t *loc, uuid_t gfid)
+{
+ if (!gfid)
+ goto out;
+ gf_uuid_clear (gfid);
+
+ if (!loc)
+ goto out;
+ else if (!gf_uuid_is_null (loc->pargfid))
+ gf_uuid_copy (gfid, loc->pargfid);
+ else if (loc->parent && (!gf_uuid_is_null (loc->parent->gfid)))
+ gf_uuid_copy (gfid, loc->parent->gfid);
+out:
+ return;
+}
+
char*
loc_gfid_utoa (loc_t *loc)
{
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index 555916d..70e6f0a 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -1011,6 +1011,7 @@ int loc_copy_overload_parent (loc_t *dst,
void loc_wipe (loc_t *loc);
int loc_path (loc_t *loc, const char *bname);
void loc_gfid (loc_t *loc, uuid_t gfid);
+void loc_pargfid (loc_t *loc, uuid_t pargfid);
char* loc_gfid_utoa (loc_t *loc);
gf_boolean_t loc_is_root (loc_t *loc);
int32_t loc_build_child (loc_t *child, loc_t *parent, char *name);
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index c2922fb..fe0dc2d 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2234,12 +2234,14 @@ afr_lookup_selfheal_wrap (void *opaque)
afr_local_t *local = NULL;
xlator_t *this = NULL;
inode_t *inode = NULL;
+ uuid_t pargfid = {0,};
local = frame->local;
this = frame->this;
+ loc_pargfid (&local->loc, pargfid);
- ret = afr_selfheal_name (frame->this, local->loc.pargfid,
- local->loc.name, &local->cont.lookup.gfid_req);
+ ret = afr_selfheal_name (frame->this, pargfid, local->loc.name,
+ &local->cont.lookup.gfid_req);
if (ret == -EIO)
goto unwind;
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index abac0cc..934aaaf 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -1678,6 +1678,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode,
bname = strrchr (path, '/') + 1;
loc.inode = inode_new (this->itable);
loc.parent = inode_ref (priv->dot_shard_inode);
+ gf_uuid_copy (loc.pargfid, priv->dot_shard_gfid);
ret = inode_path (loc.parent, bname, (char **) &(loc.path));
if (ret < 0 || !(loc.inode)) {
gf_msg (this->name, GF_LOG_ERROR, 0,
--
1.7.1