a3470f
From 81b5fbe38a022b738aa817444d7564534e0a886e Mon Sep 17 00:00:00 2001
a3470f
From: Kotresh HR <khiremat@redhat.com>
a3470f
Date: Fri, 24 Aug 2018 08:30:54 -0400
a3470f
Subject: [PATCH 360/362] geo-rep: Make automatic gfid conflict resolution
a3470f
 optional
a3470f
a3470f
Autmatic gfid conflict resolution needs to be disabled
a3470f
during failover/failback as it might lead to data loss
a3470f
in the following scenario.
a3470f
a3470f
1. Master went down without syncing directory "dir1" to slave.
a3470f
2. When slave is failed over to master, if a new file
a3470f
   is written inside "dir1", creating dir1 again if not
a3470f
   present, "dir1" ends up with different gfid on original
a3470f
   slave.
a3470f
3. When original master is up and failed back, due to
a3470f
   automatic gfid conflict resolution, "dir1" present in
a3470f
   original master is deleted losing all files and only
a3470f
   new file created on original slave is restored.
a3470f
a3470f
Hence during failover/failback, automatic gfid conflict
a3470f
resolution should be disabled. So in these cases, appropriate
a3470f
decision is taken.
a3470f
a3470f
Backport of:
a3470f
 > Patch: https://review.gluster.org/20986/
a3470f
 > BUG: 1622076
a3470f
 > Change-Id: I433616f5d3e13d4b6eb675475bd554ca34928573
a3470f
a3470f
BUG: 1622029
a3470f
Signed-off-by: Kotresh HR <khiremat@redhat.com>
a3470f
Change-Id: I433616f5d3e13d4b6eb675475bd554ca34928573
a3470f
Reviewed-on: https://code.engineering.redhat.com/gerrit/148004
a3470f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
a3470f
Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
a3470f
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
a3470f
---
a3470f
 geo-replication/syncdaemon/gsyncd.py |  2 ++
a3470f
 geo-replication/syncdaemon/master.py | 22 ++++++++++++----------
a3470f
 2 files changed, 14 insertions(+), 10 deletions(-)
a3470f
a3470f
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
a3470f
index fff193b..23d588e 100644
a3470f
--- a/geo-replication/syncdaemon/gsyncd.py
a3470f
+++ b/geo-replication/syncdaemon/gsyncd.py
a3470f
@@ -323,6 +323,8 @@ def main_i():
a3470f
     op.add_option('--changelog-archive-format', metavar='N',
a3470f
                   type=str, default="%Y%m")
a3470f
     op.add_option('--use-meta-volume', default=False, action='store_true')
a3470f
+    op.add_option('--gfid-conflict-resolution', default=True,
a3470f
+                  action='store_true')
a3470f
     op.add_option('--meta-volume-mnt', metavar='N',
a3470f
                   type=str, default="/var/run/gluster/shared_storage")
a3470f
     op.add_option(
a3470f
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
a3470f
index 1399378..6de2c77 100644
a3470f
--- a/geo-replication/syncdaemon/master.py
a3470f
+++ b/geo-replication/syncdaemon/master.py
a3470f
@@ -1163,16 +1163,18 @@ class GMasterChangelogMixin(GMasterCommon):
a3470f
             self.status.inc_value("entry", len(entries))
a3470f
 
a3470f
             failures = self.slave.server.entry_ops(entries)
a3470f
-            count = 0
a3470f
-            while failures and count < self.MAX_OE_RETRIES:
a3470f
-                count += 1
a3470f
-                self.handle_entry_failures(failures, entries)
a3470f
-                logging.info("Retry original entries. count = %s" % count)
a3470f
-                failures = self.slave.server.entry_ops(entries)
a3470f
-                if not failures:
a3470f
-                    logging.info("Sucessfully fixed all entry ops with gfid "
a3470f
-                                 "mismatch")
a3470f
-                    break
a3470f
+
a3470f
+            if boolify(gconf.gfid_conflict_resolution):
a3470f
+                count = 0
a3470f
+                while failures and count < self.MAX_OE_RETRIES:
a3470f
+                    count += 1
a3470f
+                    self.handle_entry_failures(failures, entries)
a3470f
+                    logging.info("Retry original entries. count = %s" % count)
a3470f
+                    failures = self.slave.server.entry_ops(entries)
a3470f
+                    if not failures:
a3470f
+                        logging.info("Sucessfully fixed all entry ops with "
a3470f
+                                     "gfid mismatch")
a3470f
+                        break
a3470f
 
a3470f
             self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
a3470f
 
a3470f
-- 
a3470f
1.8.3.1
a3470f