From f42b8789cdcd93cb9fa93f35ed067268ce75f789 Mon Sep 17 00:00:00 2001
From: Kotresh HR <khiremat@redhat.com>
Date: Thu, 25 Oct 2018 03:23:56 -0400
Subject: [PATCH 436/444] geo-rep: Fix issue in gfid-conflict-resolution
Problem:
During gfid-conflict-resolution, geo-rep crashes
with 'ValueError: list.remove(x): x not in list'
Cause and Analysis:
During gfid-conflict-resolution, the entry blob is
passed back to master along with additional
information to verify it's integrity. If everything
looks fine, the entry creation is ignored and is
deleted from the original list. But it is crashing
during removal of entry from the list saying entry
not in list. The reason is that the stat information
in the entry blob was modified and sent back to
master if present.
Fix:
Send back the correct stat information for
gfid-conflict-resolution.
Backport of:
> Patch: https://review.gluster.org/21483
> fixes: bz#1642865
> Change-Id: I47a6aa60b2a495465aa9314eebcb4085f0b1c4fd
> Signed-off-by: Kotresh HR <khiremat@redhat.com>
BUG: 1640347
Change-Id: I47a6aa60b2a495465aa9314eebcb4085f0b1c4fd
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/155038
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
Reviewed-by: Sunny Kumar <sunkumar@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
geo-replication/syncdaemon/resource.py | 42 +++++++++++++++++++---------------
1 file changed, 24 insertions(+), 18 deletions(-)
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index b289b3b..f16066e 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -456,7 +456,7 @@ class Server(object):
st['uid'], st['gid'],
gf, st['mode'], bn, lnk)
- def entry_purge(op, entry, gfid, e):
+ def entry_purge(op, entry, gfid, e, uid, gid):
# This is an extremely racy code and needs to be fixed ASAP.
# The GFID check here is to be sure that the pargfid/bname
# to be purged is the GFID gotten from the changelog.
@@ -470,7 +470,7 @@ class Server(object):
return
if not matching_disk_gfid(gfid, entry):
- collect_failure(e, EEXIST)
+ collect_failure(e, EEXIST, uid, gid)
return
if op == 'UNLINK':
@@ -486,7 +486,7 @@ class Server(object):
if er == ENOTEMPTY:
return er
- def collect_failure(e, cmd_ret, dst=False):
+ def collect_failure(e, cmd_ret, uid, gid, dst=False):
slv_entry_info = {}
slv_entry_info['gfid_mismatch'] = False
slv_entry_info['name_mismatch'] = False
@@ -499,6 +499,11 @@ class Server(object):
if cmd_ret is None:
return False
+ if e.get("stat", {}):
+ # Copy actual UID/GID value back to entry stat
+ e['stat']['uid'] = uid
+ e['stat']['gid'] = gid
+
if cmd_ret == EEXIST:
if dst:
en = e['entry1']
@@ -559,7 +564,7 @@ class Server(object):
errno_wrap(os.rmdir, [path], [ENOENT, ESTALE], [EBUSY])
- def rename_with_disk_gfid_confirmation(gfid, entry, en):
+ def rename_with_disk_gfid_confirmation(gfid, entry, en, uid, gid):
if not matching_disk_gfid(gfid, entry):
logging.error(lf("RENAME ignored: source entry does not match "
"with on-disk gfid",
@@ -567,14 +572,13 @@ class Server(object):
gfid=gfid,
disk_gfid=get_gfid_from_mnt(entry),
target=en))
- collect_failure(e, EEXIST)
+ collect_failure(e, EEXIST, uid, gid)
return
cmd_ret = errno_wrap(os.rename,
[entry, en],
[ENOENT, EEXIST], [ESTALE, EBUSY])
- collect_failure(e, cmd_ret)
-
+ collect_failure(e, cmd_ret, uid, gid)
for e in entries:
blob = None
@@ -595,7 +599,7 @@ class Server(object):
if op in ['RMDIR', 'UNLINK']:
# Try once, if rmdir failed with ENOTEMPTY
# then delete recursively.
- er = entry_purge(op, entry, gfid, e)
+ er = entry_purge(op, entry, gfid, e, uid, gid)
if isinstance(er, int):
if er == ENOTEMPTY and op == 'RMDIR':
# Retry if ENOTEMPTY, ESTALE
@@ -632,7 +636,7 @@ class Server(object):
cmd_ret = errno_wrap(os.link,
[slink, entry],
[ENOENT, EEXIST], [ESTALE])
- collect_failure(e, cmd_ret)
+ collect_failure(e, cmd_ret, uid, gid)
elif op == 'MKDIR':
en = e['entry']
slink = os.path.join(pfx, gfid)
@@ -676,7 +680,7 @@ class Server(object):
cmd_ret = errno_wrap(os.link,
[slink, entry],
[ENOENT, EEXIST], [ESTALE])
- collect_failure(e, cmd_ret)
+ collect_failure(e, cmd_ret, uid, gid)
elif op == 'SYMLINK':
en = e['entry']
st = lstat(entry)
@@ -684,7 +688,7 @@ class Server(object):
blob = entry_pack_symlink(gfid, bname, e['link'],
e['stat'])
elif not matching_disk_gfid(gfid, en):
- collect_failure(e, EEXIST)
+ collect_failure(e, EEXIST, uid, gid)
elif op == 'RENAME':
en = e['entry1']
# The matching disk gfid check validates two things
@@ -704,7 +708,7 @@ class Server(object):
blob = entry_pack_symlink(gfid, bname,
e['link'], e['stat'])
elif not matching_disk_gfid(gfid, en):
- collect_failure(e, EEXIST, True)
+ collect_failure(e, EEXIST, uid, gid, True)
else:
slink = os.path.join(pfx, gfid)
st = lstat(slink)
@@ -716,12 +720,13 @@ class Server(object):
else:
cmd_ret = errno_wrap(os.link, [slink, en],
[ENOENT, EEXIST], [ESTALE])
- collect_failure(e, cmd_ret)
+ collect_failure(e, cmd_ret, uid, gid)
else:
st = lstat(entry)
st1 = lstat(en)
if isinstance(st1, int):
- rename_with_disk_gfid_confirmation(gfid, entry, en)
+ rename_with_disk_gfid_confirmation(gfid, entry, en,
+ uid, gid)
else:
if st.st_ino == st1.st_ino:
# we have a hard link, we can now unlink source
@@ -746,15 +751,16 @@ class Server(object):
else:
raise
elif not matching_disk_gfid(gfid, en):
- collect_failure(e, EEXIST, True)
+ collect_failure(e, EEXIST, uid, gid, True)
else:
- rename_with_disk_gfid_confirmation(gfid, entry, en)
+ rename_with_disk_gfid_confirmation(gfid, entry, en,
+ uid, gid)
if blob:
cmd_ret = errno_wrap(Xattr.lsetxattr,
[pg, 'glusterfs.gfid.newfile', blob],
[EEXIST, ENOENT],
[ESTALE, EINVAL, EBUSY])
- failed = collect_failure(e, cmd_ret)
+ collect_failure(e, cmd_ret, uid, gid)
# If UID/GID is different than zero that means we are trying
# create Entry with different UID/GID. Create Entry with
@@ -763,7 +769,7 @@ class Server(object):
path = os.path.join(pfx, gfid)
cmd_ret = errno_wrap(os.lchown, [path, uid, gid], [ENOENT],
[ESTALE, EINVAL])
- collect_failure(e, cmd_ret)
+ collect_failure(e, cmd_ret, uid, gid)
return failures
--
1.8.3.1