233933
From 69ac1fd2da7a57f2f0854412863911959bf71fde Mon Sep 17 00:00:00 2001
233933
From: Sunny Kumar <sunkumar@redhat.com>
233933
Date: Tue, 2 Apr 2019 12:38:09 +0530
233933
Subject: [PATCH 161/169] geo-rep: Fix rename with existing destination with
233933
 same gfid
233933
233933
Problem:
233933
   Geo-rep fails to sync the rename properly if destination exists.
233933
It results in source to be remained on slave causing more number of
233933
files on slave. Also heavy rename workload like logrotate caused
233933
lot of ESTALE errors
233933
233933
Cause:
233933
   Geo-rep fails to sync rename if destination exists if creation
233933
of source file also falls into single batch of changelogs being
233933
processed. This is because, after fixing problematic gfids verifying
233933
from master, while re-processing original entries, CREATE also was
233933
re-processed causing more files on slave and rename to be failed.
233933
233933
Solution:
233933
   Entries need to be removed from retrial list after fixing
233933
problematic gfids on slave so that it's not re-created again on slave.
233933
   Also treat ESTALE as EEXIST so that the error is properly handled
233933
verifying the op on master volume.
233933
233933
Backport of:
233933
 > Patch: https://review.gluster.org/22519
233933
 > Change-Id: I50cf289e06b997adddff0552bf2466d9201dd1f9
233933
 > BUG: 1694820
233933
 > Signed-off-by: Kotresh HR <khiremat@redhat.com>
233933
 > Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
233933
233933
Change-Id: I50cf289e06b997adddff0552bf2466d9201dd1f9
233933
fixes: bz#1708121
233933
Signed-off-by: Kotresh HR <khiremat@redhat.com>
233933
Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
233933
Reviewed-on: https://code.engineering.redhat.com/gerrit/172393
233933
Tested-by: RHGS Build Bot <nigelb@redhat.com>
233933
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
233933
---
233933
 geo-replication/syncdaemon/master.py              | 41 +++++++++++++++++++++--
233933
 geo-replication/syncdaemon/resource.py            | 10 ++++--
233933
 tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t  |  5 +++
233933
 tests/00-geo-rep/georep-basic-dr-rsync.t          |  5 +++
233933
 tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t |  5 +++
233933
 tests/00-geo-rep/georep-basic-dr-tarssh.t         |  5 +++
233933
 tests/geo-rep.rc                                  | 36 ++++++++++++++++++++
233933
 7 files changed, 102 insertions(+), 5 deletions(-)
233933
233933
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
233933
index 3da7610..42c86d7 100644
233933
--- a/geo-replication/syncdaemon/master.py
233933
+++ b/geo-replication/syncdaemon/master.py
233933
@@ -65,6 +65,9 @@ def _volinfo_hook_relax_foreign(self):
233933
 def edct(op, **ed):
233933
     dct = {}
233933
     dct['op'] = op
233933
+    # This is used in automatic gfid conflict resolution.
233933
+    # When marked True, it's skipped during re-processing.
233933
+    dct['skip_entry'] = False
233933
     for k in ed:
233933
         if k == 'stat':
233933
             st = ed[k]
233933
@@ -792,6 +795,7 @@ class GMasterChangelogMixin(GMasterCommon):
233933
         pfx = gauxpfx()
233933
         fix_entry_ops = []
233933
         failures1 = []
233933
+        remove_gfids = set()
233933
         for failure in failures:
233933
             if failure[2]['name_mismatch']:
233933
                 pbname = failure[2]['slave_entry']
233933
@@ -822,6 +826,18 @@ class GMasterChangelogMixin(GMasterCommon):
233933
                             edct('UNLINK',
233933
                                  gfid=failure[2]['slave_gfid'],
233933
                                  entry=pbname))
233933
+                    remove_gfids.add(slave_gfid)
233933
+                    if op in ['RENAME']:
233933
+                        # If renamed gfid doesn't exists on master, remove
233933
+                        # rename entry and unlink src on slave
233933
+                        st = lstat(os.path.join(pfx, failure[0]['gfid']))
233933
+                        if isinstance(st, int) and st == ENOENT:
233933
+                            logging.debug("Unlink source %s" % repr(failure))
233933
+                            remove_gfids.add(failure[0]['gfid'])
233933
+                            fix_entry_ops.append(
233933
+                                edct('UNLINK',
233933
+                                     gfid=failure[0]['gfid'],
233933
+                                     entry=failure[0]['entry']))
233933
                 # Takes care of scenarios of hardlinks/renames on master
233933
                 elif not isinstance(st, int):
233933
                     if matching_disk_gfid(slave_gfid, pbname):
233933
@@ -831,7 +847,12 @@ class GMasterChangelogMixin(GMasterCommon):
233933
                                         ' Safe to ignore, take out entry',
233933
                                         retry_count=retry_count,
233933
                                         entry=repr(failure)))
233933
-                        entries.remove(failure[0])
233933
+                        remove_gfids.add(failure[0]['gfid'])
233933
+                        if op == 'RENAME':
233933
+                            fix_entry_ops.append(
233933
+                                edct('UNLINK',
233933
+                                     gfid=failure[0]['gfid'],
233933
+                                     entry=failure[0]['entry']))
233933
                     # The file exists on master but with different name.
233933
                     # Probably renamed and got missed during xsync crawl.
233933
                     elif failure[2]['slave_isdir']:
233933
@@ -856,7 +877,10 @@ class GMasterChangelogMixin(GMasterCommon):
233933
                                             'take out entry',
233933
                                             retry_count=retry_count,
233933
                                             entry=repr(failure)))
233933
-                            entries.remove(failure[0])
233933
+                            try:
233933
+                                entries.remove(failure[0])
233933
+                            except ValueError:
233933
+                                pass
233933
                         else:
233933
                             rename_dict = edct('RENAME', gfid=slave_gfid,
233933
                                                entry=src_entry,
233933
@@ -896,7 +920,10 @@ class GMasterChangelogMixin(GMasterCommon):
233933
                                     'ignore, take out entry',
233933
                                     retry_count=retry_count,
233933
                                     entry=repr(failure)))
233933
-                    entries.remove(failure[0])
233933
+                    try:
233933
+                        entries.remove(failure[0])
233933
+                    except ValueError:
233933
+                        pass
233933
                 else:
233933
                     logging.info(lf('Fixing ENOENT error in slave. Create '
233933
                                     'parent directory on slave.',
233933
@@ -913,6 +940,14 @@ class GMasterChangelogMixin(GMasterCommon):
233933
                         edct('MKDIR', gfid=pargfid, entry=dir_entry,
233933
                              mode=st.st_mode, uid=st.st_uid, gid=st.st_gid))
233933
 
233933
+        logging.debug("remove_gfids: %s" % repr(remove_gfids))
233933
+        if remove_gfids:
233933
+            for e in entries:
233933
+                if e['op'] in ['MKDIR', 'MKNOD', 'CREATE', 'RENAME'] \
233933
+                   and e['gfid'] in remove_gfids:
233933
+                    logging.debug("Removed entry op from retrial list: entry: %s" % repr(e))
233933
+                    e['skip_entry'] = True
233933
+
233933
         if fix_entry_ops:
233933
             # Process deletions of entries whose gfids are mismatched
233933
             failures1 = self.slave.server.entry_ops(fix_entry_ops)
233933
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
233933
index c290d86..f54ccd9 100644
233933
--- a/geo-replication/syncdaemon/resource.py
233933
+++ b/geo-replication/syncdaemon/resource.py
233933
@@ -426,7 +426,7 @@ class Server(object):
233933
                 e['stat']['uid'] = uid
233933
                 e['stat']['gid'] = gid
233933
 
233933
-            if cmd_ret == EEXIST:
233933
+            if cmd_ret in [EEXIST, ESTALE]:
233933
                 if dst:
233933
                     en = e['entry1']
233933
                 else:
233933
@@ -510,6 +510,12 @@ class Server(object):
233933
             entry = e['entry']
233933
             uid = 0
233933
             gid = 0
233933
+
233933
+            # Skip entry processing if it's marked true during gfid
233933
+            # conflict resolution
233933
+            if e['skip_entry']:
233933
+                continue
233933
+
233933
             if e.get("stat", {}):
233933
                 # Copy UID/GID value and then reset to zero. Copied UID/GID
233933
                 # will be used to run chown once entry is created.
233933
@@ -688,7 +694,7 @@ class Server(object):
233933
             if blob:
233933
                 cmd_ret = errno_wrap(Xattr.lsetxattr,
233933
                                      [pg, 'glusterfs.gfid.newfile', blob],
233933
-                                     [EEXIST, ENOENT],
233933
+                                     [EEXIST, ENOENT, ESTALE],
233933
                                      [ESTALE, EINVAL, EBUSY])
233933
                 collect_failure(e, cmd_ret, uid, gid)
233933
 
233933
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
233933
index 67ac167..1a55ed2 100644
233933
--- a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
233933
+++ b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
233933
@@ -203,6 +203,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
233933
 TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
233933
 TEST "echo sampledata > $master_mnt/rsync_option_test_file"
233933
 
233933
+#rename with existing destination case BUG:1694820
233933
+TEST create_rename_with_existing_destination ${master_mnt}
233933
+#verify rename with existing destination case BUG:1694820
233933
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
233933
+
233933
 #Verify arequal for whole volume
233933
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
233933
 
233933
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
233933
index 8b64370..d0c0fc9 100644
233933
--- a/tests/00-geo-rep/georep-basic-dr-rsync.t
233933
+++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
233933
@@ -204,6 +204,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
233933
 TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
233933
 TEST "echo sampledata > $master_mnt/rsync_option_test_file"
233933
 
233933
+#rename with existing destination case BUG:1694820
233933
+TEST create_rename_with_existing_destination ${master_mnt}
233933
+#verify rename with existing destination case BUG:1694820
233933
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
233933
+
233933
 #Verify arequal for whole volume
233933
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
233933
 
233933
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
233933
index 1726d0b..cb530ad 100644
233933
--- a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
233933
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
233933
@@ -202,6 +202,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/s
233933
 #rsnapshot usecase
233933
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
233933
 
233933
+#rename with existing destination case BUG:1694820
233933
+TEST create_rename_with_existing_destination ${master_mnt}
233933
+#verify rename with existing destination case BUG:1694820
233933
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
233933
+
233933
 #Verify arequal for whole volume
233933
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
233933
 
233933
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh.t b/tests/00-geo-rep/georep-basic-dr-tarssh.t
233933
index c5d16ac..9e2f613 100644
233933
--- a/tests/00-geo-rep/georep-basic-dr-tarssh.t
233933
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh.t
233933
@@ -202,6 +202,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
233933
 #rsnapshot usecase
233933
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
233933
 
233933
+#rename with existing destination case BUG:1694820
233933
+TEST create_rename_with_existing_destination ${master_mnt}
233933
+#verify rename with existing destination case BUG:1694820
233933
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
233933
+
233933
 #Verify arequal for whole volume
233933
 EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
233933
 
233933
diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc
233933
index d723129..e357ba8 100644
233933
--- a/tests/geo-rep.rc
233933
+++ b/tests/geo-rep.rc
233933
@@ -403,3 +403,39 @@ function check_slave_read_only()
233933
     gluster volume info $1 | grep 'features.read-only: on'
233933
     echo $?
233933
 }
233933
+
233933
+function create_rename_with_existing_destination()
233933
+{
233933
+    dir=$1/rename_with_existing_destination
233933
+    mkdir $dir
233933
+    for i in {1..5}
233933
+    do
233933
+        echo "Data_set$i" > $dir/data_set$i
233933
+        mv $dir/data_set$i $dir/data_set -f
233933
+    done
233933
+}
233933
+
233933
+function verify_rename_with_existing_destination()
233933
+{
233933
+    dir=$1/rename_with_existing_destination
233933
+
233933
+    if [ ! -d $dir ]; then
233933
+        echo 1
233933
+    elif [ ! -f $dir/data_set ]; then
233933
+        echo 2
233933
+    elif [ -f $dir/data_set1 ]; then
233933
+        echo 3
233933
+    elif [ -f $dir/data_set2 ]; then
233933
+        echo 4
233933
+    elif [ -f $dir/data_set3 ]; then
233933
+        echo 5
233933
+    elif [ -f $dir/data_set4 ]; then
233933
+        echo 6
233933
+    elif [ -f $dir/data_set5 ]; then
233933
+        echo 7
233933
+    elif test "XData_set5" != "X$(cat $dir/data_set)"; then
233933
+        echo 8
233933
+    else
233933
+        echo 0
233933
+    fi
233933
+}
233933
-- 
233933
1.8.3.1
233933