|
|
a3470f |
From 81b5fbe38a022b738aa817444d7564534e0a886e Mon Sep 17 00:00:00 2001
|
|
|
a3470f |
From: Kotresh HR <khiremat@redhat.com>
|
|
|
a3470f |
Date: Fri, 24 Aug 2018 08:30:54 -0400
|
|
|
a3470f |
Subject: [PATCH 360/362] geo-rep: Make automatic gfid conflict resolution
|
|
|
a3470f |
optional
|
|
|
a3470f |
|
|
|
a3470f |
Autmatic gfid conflict resolution needs to be disabled
|
|
|
a3470f |
during failover/failback as it might lead to data loss
|
|
|
a3470f |
in the following scenario.
|
|
|
a3470f |
|
|
|
a3470f |
1. Master went down without syncing directory "dir1" to slave.
|
|
|
a3470f |
2. When slave is failed over to master, if a new file
|
|
|
a3470f |
is written inside "dir1", creating dir1 again if not
|
|
|
a3470f |
present, "dir1" ends up with different gfid on original
|
|
|
a3470f |
slave.
|
|
|
a3470f |
3. When original master is up and failed back, due to
|
|
|
a3470f |
automatic gfid conflict resolution, "dir1" present in
|
|
|
a3470f |
original master is deleted losing all files and only
|
|
|
a3470f |
new file created on original slave is restored.
|
|
|
a3470f |
|
|
|
a3470f |
Hence during failover/failback, automatic gfid conflict
|
|
|
a3470f |
resolution should be disabled. So in these cases, appropriate
|
|
|
a3470f |
decision is taken.
|
|
|
a3470f |
|
|
|
a3470f |
Backport of:
|
|
|
a3470f |
> Patch: https://review.gluster.org/20986/
|
|
|
a3470f |
> BUG: 1622076
|
|
|
a3470f |
> Change-Id: I433616f5d3e13d4b6eb675475bd554ca34928573
|
|
|
a3470f |
|
|
|
a3470f |
BUG: 1622029
|
|
|
a3470f |
Signed-off-by: Kotresh HR <khiremat@redhat.com>
|
|
|
a3470f |
Change-Id: I433616f5d3e13d4b6eb675475bd554ca34928573
|
|
|
a3470f |
Reviewed-on: https://code.engineering.redhat.com/gerrit/148004
|
|
|
a3470f |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
a3470f |
Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
|
|
|
a3470f |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
a3470f |
---
|
|
|
a3470f |
geo-replication/syncdaemon/gsyncd.py | 2 ++
|
|
|
a3470f |
geo-replication/syncdaemon/master.py | 22 ++++++++++++----------
|
|
|
a3470f |
2 files changed, 14 insertions(+), 10 deletions(-)
|
|
|
a3470f |
|
|
|
a3470f |
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
|
|
|
a3470f |
index fff193b..23d588e 100644
|
|
|
a3470f |
--- a/geo-replication/syncdaemon/gsyncd.py
|
|
|
a3470f |
+++ b/geo-replication/syncdaemon/gsyncd.py
|
|
|
a3470f |
@@ -323,6 +323,8 @@ def main_i():
|
|
|
a3470f |
op.add_option('--changelog-archive-format', metavar='N',
|
|
|
a3470f |
type=str, default="%Y%m")
|
|
|
a3470f |
op.add_option('--use-meta-volume', default=False, action='store_true')
|
|
|
a3470f |
+ op.add_option('--gfid-conflict-resolution', default=True,
|
|
|
a3470f |
+ action='store_true')
|
|
|
a3470f |
op.add_option('--meta-volume-mnt', metavar='N',
|
|
|
a3470f |
type=str, default="/var/run/gluster/shared_storage")
|
|
|
a3470f |
op.add_option(
|
|
|
a3470f |
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
|
|
|
a3470f |
index 1399378..6de2c77 100644
|
|
|
a3470f |
--- a/geo-replication/syncdaemon/master.py
|
|
|
a3470f |
+++ b/geo-replication/syncdaemon/master.py
|
|
|
a3470f |
@@ -1163,16 +1163,18 @@ class GMasterChangelogMixin(GMasterCommon):
|
|
|
a3470f |
self.status.inc_value("entry", len(entries))
|
|
|
a3470f |
|
|
|
a3470f |
failures = self.slave.server.entry_ops(entries)
|
|
|
a3470f |
- count = 0
|
|
|
a3470f |
- while failures and count < self.MAX_OE_RETRIES:
|
|
|
a3470f |
- count += 1
|
|
|
a3470f |
- self.handle_entry_failures(failures, entries)
|
|
|
a3470f |
- logging.info("Retry original entries. count = %s" % count)
|
|
|
a3470f |
- failures = self.slave.server.entry_ops(entries)
|
|
|
a3470f |
- if not failures:
|
|
|
a3470f |
- logging.info("Sucessfully fixed all entry ops with gfid "
|
|
|
a3470f |
- "mismatch")
|
|
|
a3470f |
- break
|
|
|
a3470f |
+
|
|
|
a3470f |
+ if boolify(gconf.gfid_conflict_resolution):
|
|
|
a3470f |
+ count = 0
|
|
|
a3470f |
+ while failures and count < self.MAX_OE_RETRIES:
|
|
|
a3470f |
+ count += 1
|
|
|
a3470f |
+ self.handle_entry_failures(failures, entries)
|
|
|
a3470f |
+ logging.info("Retry original entries. count = %s" % count)
|
|
|
a3470f |
+ failures = self.slave.server.entry_ops(entries)
|
|
|
a3470f |
+ if not failures:
|
|
|
a3470f |
+ logging.info("Sucessfully fixed all entry ops with "
|
|
|
a3470f |
+ "gfid mismatch")
|
|
|
a3470f |
+ break
|
|
|
a3470f |
|
|
|
a3470f |
self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
|
|
|
a3470f |
|
|
|
a3470f |
--
|
|
|
a3470f |
1.8.3.1
|
|
|
a3470f |
|