d2787b
From f2d3866e617d25ea62cda01afddc81ef0db3356e Mon Sep 17 00:00:00 2001
d2787b
From: Xavi Hernandez <xhernandez@redhat.com>
d2787b
Date: Tue, 4 May 2021 22:39:03 +0200
d2787b
Subject: [PATCH 555/584] geo-rep: Improve handling of gfid mismatches
d2787b
d2787b
In some circumstances geo-replication can detect mismatching gfids
d2787b
between primary and secondary. These entries are fixed in an iterative
d2787b
way, assuming that after a fix, a previously failing entry could
d2787b
succeed.
d2787b
d2787b
Previous code was trying to fix them in a loop that can be executed
d2787b
up to 10 times. If some entry cannot be fixed after 10 attempts, it's
d2787b
discarded. These fixes are very slow, so trying to do them many times
d2787b
causes geo-replication to get out of sync.
d2787b
d2787b
To minimize the number of iterations done, this patch checks if the
d2787b
number of entries and failures remains constant after each iteration.
d2787b
If they are constant, it means that nothing else can be fixed, so it
d2787b
makes no sense to do more iterations. This reduces the number of
d2787b
iterations to 2 or 3 in most of the cases, improving geo-replication
d2787b
performance.
d2787b
d2787b
Backport of:
d2787b
> Upstream-patch: https://github.com/gluster/glusterfs/pull/2389
d2787b
> Fixes: #2388
d2787b
> Change-Id: I6d9a623a60045694e1a832195e1dc1fb9e88ae54
d2787b
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
d2787b
d2787b
BUG: 1957191
d2787b
Change-Id: I6d9a623a60045694e1a832195e1dc1fb9e88ae54
d2787b
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
d2787b
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244550
d2787b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d2787b
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
d2787b
---
d2787b
 geo-replication/syncdaemon/master.py | 18 +++++++++++++++++-
d2787b
 1 file changed, 17 insertions(+), 1 deletion(-)
d2787b
d2787b
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
d2787b
index 98637e7..aef9373 100644
d2787b
--- a/geo-replication/syncdaemon/master.py
d2787b
+++ b/geo-replication/syncdaemon/master.py
d2787b
@@ -1224,9 +1224,11 @@ class GMasterChangelogMixin(GMasterCommon):
d2787b
 
d2787b
             if gconf.get("gfid-conflict-resolution"):
d2787b
                 count = 0
d2787b
+                num_entries = len(entries)
d2787b
+                num_failures = len(failures)
d2787b
                 if failures:
d2787b
                     logging.info(lf('Entry ops failed with gfid mismatch',
d2787b
-                                count=len(failures)))
d2787b
+                                    count=num_failures))
d2787b
                 while failures and count < self.MAX_OE_RETRIES:
d2787b
                     count += 1
d2787b
                     self.handle_entry_failures(failures, entries)
d2787b
@@ -1237,6 +1239,20 @@ class GMasterChangelogMixin(GMasterCommon):
d2787b
                                      "gfid mismatch")
d2787b
                         break
d2787b
 
d2787b
+                    # If this iteration has not removed any entry or reduced
d2787b
+                    # the number of failures compared to the previous one, we
d2787b
+                    # don't need to keep iterating because we'll get the same
d2787b
+                    # result in all other attempts.
d2787b
+                    if ((num_entries == len(entries)) and
d2787b
+                        (num_failures == len(failures))):
d2787b
+                        logging.info(lf("No more gfid mismatches can be fixed",
d2787b
+                                        entries=num_entries,
d2787b
+                                        failures=num_failures))
d2787b
+                        break
d2787b
+
d2787b
+                    num_entries = len(entries)
d2787b
+                    num_failures = len(failures)
d2787b
+
d2787b
             self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
d2787b
             self.status.dec_value("entry", len(entries))
d2787b
 
d2787b
-- 
d2787b
1.8.3.1
d2787b