14f8ab
From 69ac1fd2da7a57f2f0854412863911959bf71fde Mon Sep 17 00:00:00 2001
14f8ab
From: Sunny Kumar <sunkumar@redhat.com>
14f8ab
Date: Tue, 2 Apr 2019 12:38:09 +0530
14f8ab
Subject: [PATCH 161/169] geo-rep: Fix rename with existing destination with
14f8ab
 same gfid
14f8ab
14f8ab
Problem:
14f8ab
   Geo-rep fails to sync the rename properly if destination exists.
14f8ab
It results in source to be remained on slave causing more number of
14f8ab
files on slave. Also heavy rename workload like logrotate caused
14f8ab
lot of ESTALE errors
14f8ab
14f8ab
Cause:
14f8ab
   Geo-rep fails to sync rename if destination exists if creation
14f8ab
of source file also falls into single batch of changelogs being
14f8ab
processed. This is because, after fixing problematic gfids verifying
14f8ab
from master, while re-processing original entries, CREATE also was
14f8ab
re-processed causing more files on slave and rename to be failed.
14f8ab
14f8ab
Solution:
14f8ab
   Entries need to be removed from retrial list after fixing
14f8ab
problematic gfids on slave so that it's not re-created again on slave.
14f8ab
   Also treat ESTALE as EEXIST so that the error is properly handled
14f8ab
verifying the op on master volume.
14f8ab
14f8ab
Backport of:
14f8ab
 > Patch: https://review.gluster.org/22519
14f8ab
 > Change-Id: I50cf289e06b997adddff0552bf2466d9201dd1f9
14f8ab
 > BUG: 1694820
14f8ab
 > Signed-off-by: Kotresh HR <khiremat@redhat.com>
14f8ab
 > Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
14f8ab
14f8ab
Change-Id: I50cf289e06b997adddff0552bf2466d9201dd1f9
14f8ab
fixes: bz#1708121
14f8ab
Signed-off-by: Kotresh HR <khiremat@redhat.com>
14f8ab
Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/172393
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
14f8ab
---
14f8ab
 geo-replication/syncdaemon/master.py              | 41 +++++++++++++++++++++--
14f8ab
 geo-replication/syncdaemon/resource.py            | 10 ++++--
14f8ab
 tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t  |  5 +++
14f8ab
 tests/00-geo-rep/georep-basic-dr-rsync.t          |  5 +++
14f8ab
 tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t |  5 +++
14f8ab
 tests/00-geo-rep/georep-basic-dr-tarssh.t         |  5 +++
14f8ab
 tests/geo-rep.rc                                  | 36 ++++++++++++++++++++
14f8ab
 7 files changed, 102 insertions(+), 5 deletions(-)
14f8ab
14f8ab
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
14f8ab
index 3da7610..42c86d7 100644
14f8ab
--- a/geo-replication/syncdaemon/master.py
14f8ab
+++ b/geo-replication/syncdaemon/master.py
14f8ab
@@ -65,6 +65,9 @@ def _volinfo_hook_relax_foreign(self):
14f8ab
 def edct(op, **ed):
14f8ab
     dct = {}
14f8ab
     dct['op'] = op
14f8ab
+    # This is used in automatic gfid conflict resolution.
14f8ab
+    # When marked True, it's skipped during re-processing.
14f8ab
+    dct['skip_entry'] = False
14f8ab
     for k in ed:
14f8ab
         if k == 'stat':
14f8ab
             st = ed[k]
14f8ab
@@ -792,6 +795,7 @@ class GMasterChangelogMixin(GMasterCommon):
14f8ab
         pfx = gauxpfx()
14f8ab
         fix_entry_ops = []
14f8ab
         failures1 = []
14f8ab
+        remove_gfids = set()
14f8ab
         for failure in failures:
14f8ab
             if failure[2]['name_mismatch']:
14f8ab
                 pbname = failure[2]['slave_entry']
14f8ab
@@ -822,6 +826,18 @@ class GMasterChangelogMixin(GMasterCommon):
14f8ab
                             edct('UNLINK',
14f8ab
                                  gfid=failure[2]['slave_gfid'],
14f8ab
                                  entry=pbname))
14f8ab
+                    remove_gfids.add(slave_gfid)
14f8ab
+                    if op in ['RENAME']:
14f8ab
+                        # If renamed gfid doesn't exists on master, remove
14f8ab
+                        # rename entry and unlink src on slave
14f8ab
+                        st = lstat(os.path.join(pfx, failure[0]['gfid']))
14f8ab
+                        if isinstance(st, int) and st == ENOENT:
14f8ab
+                            logging.debug("Unlink source %s" % repr(failure))
14f8ab
+                            remove_gfids.add(failure[0]['gfid'])
14f8ab
+                            fix_entry_ops.append(
14f8ab
+                                edct('UNLINK',
14f8ab
+                                     gfid=failure[0]['gfid'],
14f8ab
+                                     entry=failure[0]['entry']))
14f8ab
                 # Takes care of scenarios of hardlinks/renames on master
14f8ab
                 elif not isinstance(st, int):
14f8ab
                     if matching_disk_gfid(slave_gfid, pbname):
14f8ab
@@ -831,7 +847,12 @@ class GMasterChangelogMixin(GMasterCommon):
14f8ab
                                         ' Safe to ignore, take out entry',
14f8ab
                                         retry_count=retry_count,
14f8ab
                                         entry=repr(failure)))
14f8ab
-                        entries.remove(failure[0])
14f8ab
+                        remove_gfids.add(failure[0]['gfid'])
14f8ab
+                        if op == 'RENAME':
14f8ab
+                            fix_entry_ops.append(
14f8ab
+                                edct('UNLINK',
14f8ab
+                                     gfid=failure[0]['gfid'],
14f8ab
+                                     entry=failure[0]['entry']))
14f8ab
                     # The file exists on master but with different name.
14f8ab
                     # Probably renamed and got missed during xsync crawl.
14f8ab
                     elif failure[2]['slave_isdir']:
14f8ab
@@ -856,7 +877,10 @@ class GMasterChangelogMixin(GMasterCommon):
14f8ab
                                             'take out entry',
14f8ab
                                             retry_count=retry_count,
14f8ab
                                             entry=repr(failure)))
14f8ab
-                            entries.remove(failure[0])
14f8ab
+                            try:
14f8ab
+                                entries.remove(failure[0])
14f8ab
+                            except ValueError:
14f8ab
+                                pass
14f8ab
                         else:
14f8ab
                             rename_dict = edct('RENAME', gfid=slave_gfid,
14f8ab
                                                entry=src_entry,
14f8ab
@@ -896,7 +920,10 @@ class GMasterChangelogMixin(GMasterCommon):
14f8ab
                                     'ignore, take out entry',
14f8ab
                                     retry_count=retry_count,
14f8ab
                                     entry=repr(failure)))
14f8ab
-                    entries.remove(failure[0])
14f8ab
+                    try:
14f8ab
+                        entries.remove(failure[0])
14f8ab
+                    except ValueError:
14f8ab
+                        pass
14f8ab
                 else:
14f8ab
                     logging.info(lf('Fixing ENOENT error in slave. Create '
14f8ab
                                     'parent directory on slave.',
14f8ab
@@ -913,6 +940,14 @@ class GMasterChangelogMixin(GMasterCommon):
14f8ab
                         edct('MKDIR', gfid=pargfid, entry=dir_entry,
14f8ab
                              mode=st.st_mode, uid=st.st_uid, gid=st.st_gid))
14f8ab
 
14f8ab
+        logging.debug("remove_gfids: %s" % repr(remove_gfids))
14f8ab
+        if remove_gfids:
14f8ab
+            for e in entries:
14f8ab
+                if e['op'] in ['MKDIR', 'MKNOD', 'CREATE', 'RENAME'] \
14f8ab
+                   and e['gfid'] in remove_gfids:
14f8ab
+                    logging.debug("Removed entry op from retrial list: entry: %s" % repr(e))
14f8ab
+                    e['skip_entry'] = True
14f8ab
+
14f8ab
         if fix_entry_ops:
14f8ab
             # Process deletions of entries whose gfids are mismatched
14f8ab
             failures1 = self.slave.server.entry_ops(fix_entry_ops)
14f8ab
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
14f8ab
index c290d86..f54ccd9 100644
14f8ab
--- a/geo-replication/syncdaemon/resource.py
14f8ab
+++ b/geo-replication/syncdaemon/resource.py
14f8ab
@@ -426,7 +426,7 @@ class Server(object):
14f8ab
                 e['stat']['uid'] = uid
14f8ab
                 e['stat']['gid'] = gid
14f8ab
 
14f8ab
-            if cmd_ret == EEXIST:
14f8ab
+            if cmd_ret in [EEXIST, ESTALE]:
14f8ab
                 if dst:
14f8ab
                     en = e['entry1']
14f8ab
                 else:
14f8ab
@@ -510,6 +510,12 @@ class Server(object):
14f8ab
             entry = e['entry']
14f8ab
             uid = 0
14f8ab
             gid = 0
14f8ab
+
14f8ab
+            # Skip entry processing if it's marked true during gfid
14f8ab
+            # conflict resolution
14f8ab
+            if e['skip_entry']:
14f8ab
+                continue
14f8ab
+
14f8ab
             if e.get("stat", {}):
14f8ab
                 # Copy UID/GID value and then reset to zero. Copied UID/GID
14f8ab
                 # will be used to run chown once entry is created.
14f8ab
@@ -688,7 +694,7 @@ class Server(object):
14f8ab
             if blob:
14f8ab
                 cmd_ret = errno_wrap(Xattr.lsetxattr,
14f8ab
                                      [pg, 'glusterfs.gfid.newfile', blob],
14f8ab
-                                     [EEXIST, ENOENT],
14f8ab
+                                     [EEXIST, ENOENT, ESTALE],
14f8ab
                                      [ESTALE, EINVAL, EBUSY])
14f8ab
                 collect_failure(e, cmd_ret, uid, gid)
14f8ab
 
14f8ab
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
14f8ab
index 67ac167..1a55ed2 100644
14f8ab
--- a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
14f8ab
+++ b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
14f8ab
@@ -203,6 +203,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
14f8ab
 TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
14f8ab
 TEST "echo sampledata > $master_mnt/rsync_option_test_file"
14f8ab
 
14f8ab
+#rename with existing destination case BUG:1694820
14f8ab
+TEST create_rename_with_existing_destination ${master_mnt}
14f8ab
+#verify rename with existing destination case BUG:1694820
14f8ab
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
14f8ab
+
14f8ab
 #Verify arequal for whole volume
14f8ab
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
14f8ab
 
14f8ab
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
14f8ab
index 8b64370..d0c0fc9 100644
14f8ab
--- a/tests/00-geo-rep/georep-basic-dr-rsync.t
14f8ab
+++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
14f8ab
@@ -204,6 +204,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
14f8ab
 TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
14f8ab
 TEST "echo sampledata > $master_mnt/rsync_option_test_file"
14f8ab
 
14f8ab
+#rename with existing destination case BUG:1694820
14f8ab
+TEST create_rename_with_existing_destination ${master_mnt}
14f8ab
+#verify rename with existing destination case BUG:1694820
14f8ab
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
14f8ab
+
14f8ab
 #Verify arequal for whole volume
14f8ab
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
14f8ab
 
14f8ab
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
14f8ab
index 1726d0b..cb530ad 100644
14f8ab
--- a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
14f8ab
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
14f8ab
@@ -202,6 +202,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/s
14f8ab
 #rsnapshot usecase
14f8ab
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
14f8ab
 
14f8ab
+#rename with existing destination case BUG:1694820
14f8ab
+TEST create_rename_with_existing_destination ${master_mnt}
14f8ab
+#verify rename with existing destination case BUG:1694820
14f8ab
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
14f8ab
+
14f8ab
 #Verify arequal for whole volume
14f8ab
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
14f8ab
 
14f8ab
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh.t b/tests/00-geo-rep/georep-basic-dr-tarssh.t
14f8ab
index c5d16ac..9e2f613 100644
14f8ab
--- a/tests/00-geo-rep/georep-basic-dr-tarssh.t
14f8ab
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh.t
14f8ab
@@ -202,6 +202,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
14f8ab
 #rsnapshot usecase
14f8ab
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
14f8ab
 
14f8ab
+#rename with existing destination case BUG:1694820
14f8ab
+TEST create_rename_with_existing_destination ${master_mnt}
14f8ab
+#verify rename with existing destination case BUG:1694820
14f8ab
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
14f8ab
+
14f8ab
 #Verify arequal for whole volume
14f8ab
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
14f8ab
 
14f8ab
diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc
14f8ab
index d723129..e357ba8 100644
14f8ab
--- a/tests/geo-rep.rc
14f8ab
+++ b/tests/geo-rep.rc
14f8ab
@@ -403,3 +403,39 @@ function check_slave_read_only()
14f8ab
     gluster volume info $1 | grep 'features.read-only: on'
14f8ab
     echo $?
14f8ab
 }
14f8ab
+
14f8ab
+function create_rename_with_existing_destination()
14f8ab
+{
14f8ab
+    dir=$1/rename_with_existing_destination
14f8ab
+    mkdir $dir
14f8ab
+    for i in {1..5}
14f8ab
+    do
14f8ab
+        echo "Data_set$i" > $dir/data_set$i
14f8ab
+        mv $dir/data_set$i $dir/data_set -f
14f8ab
+    done
14f8ab
+}
14f8ab
+
14f8ab
+function verify_rename_with_existing_destination()
14f8ab
+{
14f8ab
+    dir=$1/rename_with_existing_destination
14f8ab
+
14f8ab
+    if [ ! -d $dir ]; then
14f8ab
+        echo 1
14f8ab
+    elif [ ! -f $dir/data_set ]; then
14f8ab
+        echo 2
14f8ab
+    elif [ -f $dir/data_set1 ]; then
14f8ab
+        echo 3
14f8ab
+    elif [ -f $dir/data_set2 ]; then
14f8ab
+        echo 4
14f8ab
+    elif [ -f $dir/data_set3 ]; then
14f8ab
+        echo 5
14f8ab
+    elif [ -f $dir/data_set4 ]; then
14f8ab
+        echo 6
14f8ab
+    elif [ -f $dir/data_set5 ]; then
14f8ab
+        echo 7
14f8ab
+    elif test "XData_set5" != "X$(cat $dir/data_set)"; then
14f8ab
+        echo 8
14f8ab
+    else
14f8ab
+        echo 0
14f8ab
+    fi
14f8ab
+}
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab