Blame SOURCES/0002-ticket-2058-Add-keep-alive-entry-after-on-line-initi.patch

3280a9
From 29c9e1c3c760f0941b022d45d14c248e9ceb9738 Mon Sep 17 00:00:00 2001
5d2be4
From: progier389 <72748589+progier389@users.noreply.github.com>
5d2be4
Date: Tue, 3 Nov 2020 12:18:50 +0100
3280a9
Subject: [PATCH 2/3] ticket 2058: Add keep alive entry after on-line
5d2be4
 initialization - second version (#4399)
5d2be4
5d2be4
Bug description:
5d2be4
Keep alive entry is not created on target master after on line initialization,
5d2be4
and its RUVelement stays empty until a direct update is issued on that master
5d2be4
5d2be4
Fix description:
5d2be4
The patch allows a consumer (configured as a master) to create (if it did not
5d2be4
exist before) the consumer's keep alive entry. It creates it at the end of a
5d2be4
replication session at a time we are sure the changelog exists and will not
5d2be4
be reset. It allows a consumer to have RUVelement with csn in the RUV at the
5d2be4
first incoming replication session.
5d2be4
5d2be4
That is basically lkrispen's proposal with an associated pytest testcase
5d2be4
5d2be4
Second version changes:
5d2be4
   - moved the testcase to suites/replication/regression_test.py
5d2be4
   - set up the topology from a 2 master topology then
5d2be4
    reinitialized the replicas from an ldif without replication metadata
5d2be4
    rather than using the cli.
5d2be4
   - search for keepalive entries using search_s instead of getEntry
5d2be4
   - add a comment about keep alive entries purpose
5d2be4
5d2be4
last commit:
5d2be4
   - wait that ruv are in sync before checking keep alive entries
5d2be4
5d2be4
Reviewed by: droideck, Firstyear
5d2be4
5d2be4
Platforms tested: F32
5d2be4
5d2be4
relates: #2058
5d2be4
---
5d2be4
 .../suites/replication/regression_test.py     | 130 ++++++++++++++++++
5d2be4
 .../plugins/replication/repl5_replica.c       |  14 ++
5d2be4
 ldap/servers/plugins/replication/repl_extop.c |   4 +
5d2be4
 3 files changed, 148 insertions(+)
5d2be4
5d2be4
diff --git a/dirsrvtests/tests/suites/replication/regression_test.py b/dirsrvtests/tests/suites/replication/regression_test.py
5d2be4
index 844d762b9..14b9d6a44 100644
5d2be4
--- a/dirsrvtests/tests/suites/replication/regression_test.py
5d2be4
+++ b/dirsrvtests/tests/suites/replication/regression_test.py
5d2be4
@@ -98,6 +98,30 @@ def _move_ruv(ldif_file):
5d2be4
         for dn, entry in ldif_list:
5d2be4
             ldif_writer.unparse(dn, entry)
5d2be4
 
5d2be4
+def _remove_replication_data(ldif_file):
5d2be4
+    """ Remove the replication data from ldif file:
5d2be4
+        db2lif without -r includes some of the replica data like 
5d2be4
+        - nsUniqueId
5d2be4
+        - keepalive entries
5d2be4
+        This function filters the ldif fil to remove these data
5d2be4
+    """
5d2be4
+
5d2be4
+    with open(ldif_file) as f:
5d2be4
+        parser = ldif.LDIFRecordList(f)
5d2be4
+        parser.parse()
5d2be4
+
5d2be4
+        ldif_list = parser.all_records
5d2be4
+        # Iterate on a copy of the ldif entry list
5d2be4
+        for dn, entry in ldif_list[:]:
5d2be4
+            if dn.startswith('cn=repl keep alive'):
5d2be4
+                ldif_list.remove((dn,entry))
5d2be4
+            else:
5d2be4
+                entry.pop('nsUniqueId')
5d2be4
+    with open(ldif_file, 'w') as f:
5d2be4
+        ldif_writer = ldif.LDIFWriter(f)
5d2be4
+        for dn, entry in ldif_list:
5d2be4
+            ldif_writer.unparse(dn, entry)
5d2be4
+
5d2be4
 
5d2be4
 @pytest.fixture(scope="module")
5d2be4
 def topo_with_sigkill(request):
5d2be4
@@ -897,6 +921,112 @@ def test_moving_entry_make_online_init_fail(topology_m2):
5d2be4
     assert len(m1entries) == len(m2entries)
5d2be4
 
5d2be4
 
5d2be4
+def get_keepalive_entries(instance,replica):
5d2be4
+    # Returns the keep alive entries that exists with the suffix of the server instance
5d2be4
+    try:
5d2be4
+        entries = instance.search_s(replica.get_suffix(), ldap.SCOPE_ONELEVEL,
5d2be4
+                    "(&(objectclass=ldapsubentry)(cn=repl keep alive*))",
5d2be4
+                    ['cn', 'nsUniqueId', 'modifierTimestamp'])
5d2be4
+    except ldap.LDAPError as e:
5d2be4
+        log.fatal('Failed to retrieve keepalive entry (%s) on instance %s: error %s' % (dn, instance, str(e)))
5d2be4
+        assert False
5d2be4
+    # No error, so lets log the keepalive entries
5d2be4
+    if log.isEnabledFor(logging.DEBUG):
5d2be4
+        for ret in entries:
5d2be4
+            log.debug("Found keepalive entry:\n"+str(ret));
5d2be4
+    return entries
5d2be4
+
5d2be4
+def verify_keepalive_entries(topo, expected):
5d2be4
+    #Check that keep alive entries exists (or not exists) for every masters on every masters
5d2be4
+    #Note: The testing method is quite basic: counting that there is one keepalive entry per master.
5d2be4
+    # that is ok for simple test cases like test_online_init_should_create_keepalive_entries but
5d2be4
+    # not for the general case as keep alive associated with no more existing master may exists
5d2be4
+    # (for example after: db2ldif / demote a master / ldif2db / init other masters)
5d2be4
+    # ==> if the function is somehow pushed in lib389, a check better than simply counting the entries
5d2be4
+    # should be done.
5d2be4
+    for masterId in topo.ms:
5d2be4
+        master=topo.ms[masterId]
5d2be4
+        for replica in Replicas(master).list():
5d2be4
+            if (replica.get_role() != ReplicaRole.MASTER):
5d2be4
+               continue
5d2be4
+            replica_info = f'master: {masterId} RID: {replica.get_rid()} suffix: {replica.get_suffix()}'
5d2be4
+            log.debug(f'Checking keepAliveEntries on {replica_info}')
5d2be4
+            keepaliveEntries = get_keepalive_entries(master, replica);
5d2be4
+            expectedCount = len(topo.ms) if expected else 0
5d2be4
+            foundCount = len(keepaliveEntries)
5d2be4
+            if (foundCount == expectedCount):
5d2be4
+                log.debug(f'Found {foundCount} keepalive entries as expected on {replica_info}.')
5d2be4
+            else:
5d2be4
+                log.error(f'{foundCount} Keepalive entries are found '
5d2be4
+                          f'while {expectedCount} were expected on {replica_info}.')
5d2be4
+                assert False
5d2be4
+
5d2be4
+
5d2be4
+def test_online_init_should_create_keepalive_entries(topo_m2):
5d2be4
+    """Check that keep alive entries are created when initializinf a master from another one
5d2be4
+
5d2be4
+    :id: d5940e71-d18a-4b71-aaf7-b9185361fffe
5d2be4
+    :setup: Two masters replication setup
5d2be4
+    :steps:
5d2be4
+        1. Generate ldif without replication data
5d2be4
+        2  Init both masters from that ldif
5d2be4
+        3  Check that keep alive entries does not exists
5d2be4
+        4  Perform on line init of master2 from master1
5d2be4
+        5  Check that keep alive entries exists
5d2be4
+    :expectedresults:
5d2be4
+        1. No error while generating ldif
5d2be4
+        2. No error while importing the ldif file
5d2be4
+        3. No keepalive entrie should exists on any masters
5d2be4
+        4. No error while initializing master2
5d2be4
+        5. All keepalive entries should exist on every masters
5d2be4
+
5d2be4
+    """
5d2be4
+
5d2be4
+    repl = ReplicationManager(DEFAULT_SUFFIX)
5d2be4
+    m1 = topo_m2.ms["master1"]
5d2be4
+    m2 = topo_m2.ms["master2"]
5d2be4
+    # Step 1: Generate ldif without replication data
5d2be4
+    m1.stop()
5d2be4
+    m2.stop()
5d2be4
+    ldif_file = '%s/norepl.ldif' % m1.get_ldif_dir()
5d2be4
+    m1.db2ldif(bename=DEFAULT_BENAME, suffixes=[DEFAULT_SUFFIX],
5d2be4
+               excludeSuffixes=None, repl_data=False,
5d2be4
+               outputfile=ldif_file, encrypt=False)
5d2be4
+    # Remove replication metadata that are still in the ldif
5d2be4
+    _remove_replication_data(ldif_file)
5d2be4
+
5d2be4
+    # Step 2: Init both masters from that ldif
5d2be4
+    m1.ldif2db(DEFAULT_BENAME, None, None, None, ldif_file)
5d2be4
+    m2.ldif2db(DEFAULT_BENAME, None, None, None, ldif_file)
5d2be4
+    m1.start()
5d2be4
+    m2.start()
5d2be4
+
5d2be4
+    """ Replica state is now as if CLI setup has been done using:
5d2be4
+        dsconf master1 replication enable --suffix "${SUFFIX}" --role master
5d2be4
+        dsconf master2 replication enable --suffix "${SUFFIX}" --role master
5d2be4
+        dsconf master1 replication create-manager --name "${REPLICATION_MANAGER_NAME}" --passwd "${REPLICATION_MANAGER_PASSWORD}"
5d2be4
+        dsconf master2 replication create-manager --name "${REPLICATION_MANAGER_NAME}" --passwd "${REPLICATION_MANAGER_PASSWORD}"
5d2be4
+        dsconf master1 repl-agmt create --suffix "${SUFFIX}"
5d2be4
+        dsconf master2 repl-agmt create --suffix "${SUFFIX}"
5d2be4
+    """
5d2be4
+
5d2be4
+    # Step 3: No keepalive entrie should exists on any masters
5d2be4
+    verify_keepalive_entries(topo_m2, False)
5d2be4
+
5d2be4
+    # Step 4: Perform on line init of master2 from master1
5d2be4
+    agmt = Agreements(m1).list()[0]
5d2be4
+    agmt.begin_reinit()
5d2be4
+    (done, error) = agmt.wait_reinit()
5d2be4
+    assert done is True
5d2be4
+    assert error is False
5d2be4
+
5d2be4
+    # Step 5: All keepalive entries should exists on every masters
5d2be4
+    #  Verify the keep alive entry once replication is in sync
5d2be4
+    # (that is the step that fails when bug is not fixed)
5d2be4
+    repl.wait_for_ruv(m2,m1)
5d2be4
+    verify_keepalive_entries(topo_m2, True);
5d2be4
+
5d2be4
+
5d2be4
 if __name__ == '__main__':
5d2be4
     # Run isolated
5d2be4
     # -s for DEBUG mode
5d2be4
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
5d2be4
index f01782330..f0ea0f8ef 100644
5d2be4
--- a/ldap/servers/plugins/replication/repl5_replica.c
5d2be4
+++ b/ldap/servers/plugins/replication/repl5_replica.c
5d2be4
@@ -373,6 +373,20 @@ replica_destroy(void **arg)
5d2be4
     slapi_ch_free((void **)arg);
5d2be4
 }
5d2be4
 
5d2be4
+/******************************************************************************
5d2be4
+ ******************** REPLICATION KEEP ALIVE ENTRIES **************************
5d2be4
+ ******************************************************************************
5d2be4
+ * They are subentries of the replicated suffix and there is one per master.  *
5d2be4
+ * These entries exist only to trigger a change that get replicated over the  *
5d2be4
+ * topology.                                                                  *
5d2be4
+ * Their main purpose is to generate records in the changelog and they are    *
5d2be4
+ * updated from time to time by fractional replication to insure that at      *
5d2be4
+ * least a change must be replicated by FR after a great number of not        *
5d2be4
+ * replicated changes are found in the changelog. The interest is that the    *
5d2be4
+ * fractional RUV get then updated so less changes need to be walked in the   *
5d2be4
+ * changelog when searching for the first change to send                      *
5d2be4
+ ******************************************************************************/
5d2be4
+
5d2be4
 #define KEEP_ALIVE_ATTR "keepalivetimestamp"
5d2be4
 #define KEEP_ALIVE_ENTRY "repl keep alive"
5d2be4
 #define KEEP_ALIVE_DN_FORMAT "cn=%s %d,%s"
5d2be4
diff --git a/ldap/servers/plugins/replication/repl_extop.c b/ldap/servers/plugins/replication/repl_extop.c
5d2be4
index 14c8e0bcc..af486f730 100644
5d2be4
--- a/ldap/servers/plugins/replication/repl_extop.c
5d2be4
+++ b/ldap/servers/plugins/replication/repl_extop.c
5d2be4
@@ -1173,6 +1173,10 @@ multimaster_extop_EndNSDS50ReplicationRequest(Slapi_PBlock *pb)
5d2be4
                 */
5d2be4
                 if (cl5GetState() == CL5_STATE_OPEN) {
5d2be4
                     replica_log_ruv_elements(r);
5d2be4
+                    /* now that the changelog is open and started, we can alos cretae the
5d2be4
+                     * keep alive entry without risk that db and cl will not match
5d2be4
+                     */
5d2be4
+                    replica_subentry_check(replica_get_root(r), replica_get_rid(r));
5d2be4
                 }
5d2be4
 
5d2be4
                 /* ONREPL code that dealt with new RUV, etc was moved into the code
5d2be4
-- 
5d2be4
2.26.2
5d2be4