From 81b5fbe38a022b738aa817444d7564534e0a886e Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Fri, 24 Aug 2018 08:30:54 -0400 Subject: [PATCH 360/362] geo-rep: Make automatic gfid conflict resolution optional Autmatic gfid conflict resolution needs to be disabled during failover/failback as it might lead to data loss in the following scenario. 1. Master went down without syncing directory "dir1" to slave. 2. When slave is failed over to master, if a new file is written inside "dir1", creating dir1 again if not present, "dir1" ends up with different gfid on original slave. 3. When original master is up and failed back, due to automatic gfid conflict resolution, "dir1" present in original master is deleted losing all files and only new file created on original slave is restored. Hence during failover/failback, automatic gfid conflict resolution should be disabled. So in these cases, appropriate decision is taken. Backport of: > Patch: https://review.gluster.org/20986/ > BUG: 1622076 > Change-Id: I433616f5d3e13d4b6eb675475bd554ca34928573 BUG: 1622029 Signed-off-by: Kotresh HR Change-Id: I433616f5d3e13d4b6eb675475bd554ca34928573 Reviewed-on: https://code.engineering.redhat.com/gerrit/148004 Tested-by: RHGS Build Bot Reviewed-by: Aravinda Vishwanathapura Krishna Murthy Reviewed-by: Atin Mukherjee --- geo-replication/syncdaemon/gsyncd.py | 2 ++ geo-replication/syncdaemon/master.py | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index fff193b..23d588e 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -323,6 +323,8 @@ def main_i(): op.add_option('--changelog-archive-format', metavar='N', type=str, default="%Y%m") op.add_option('--use-meta-volume', default=False, action='store_true') + op.add_option('--gfid-conflict-resolution', default=True, + action='store_true') op.add_option('--meta-volume-mnt', metavar='N', type=str, default="/var/run/gluster/shared_storage") op.add_option( diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index 1399378..6de2c77 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -1163,16 +1163,18 @@ class GMasterChangelogMixin(GMasterCommon): self.status.inc_value("entry", len(entries)) failures = self.slave.server.entry_ops(entries) - count = 0 - while failures and count < self.MAX_OE_RETRIES: - count += 1 - self.handle_entry_failures(failures, entries) - logging.info("Retry original entries. count = %s" % count) - failures = self.slave.server.entry_ops(entries) - if not failures: - logging.info("Sucessfully fixed all entry ops with gfid " - "mismatch") - break + + if boolify(gconf.gfid_conflict_resolution): + count = 0 + while failures and count < self.MAX_OE_RETRIES: + count += 1 + self.handle_entry_failures(failures, entries) + logging.info("Retry original entries. count = %s" % count) + failures = self.slave.server.entry_ops(entries) + if not failures: + logging.info("Sucessfully fixed all entry ops with " + "gfid mismatch") + break self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY') -- 1.8.3.1