Blame SOURCES/0034-Ticket-48954-replication-fails-because-anchorcsn-can.patch

7c7f29
From 1cd2d9b06b8bc006078ed26bb0d3cbe808681a86 Mon Sep 17 00:00:00 2001
7c7f29
From: Ludwig Krispenz <lkrispen@redhat.com>
7c7f29
Date: Fri, 12 Aug 2016 14:06:21 +0200
7c7f29
Subject: [PATCH 34/35] Ticket 48954 - replication fails because anchorcsn
7c7f29
 cannot be found
7c7f29
7c7f29
Bug Description: the anchorcsn is calculated based on supploier and consumer
7c7f29
                 ruv. If this csn is not found in the changelog
7c7f29
                 replication stops.
7c7f29
7c7f29
Fix Description: Fix consists of two parts
7c7f29
                 1. log start-iteration csn record for all replicas
7c7f29
                    after initialization
7c7f29
                 2. If the csn still cannot be found
7c7f29
                    - log an error
7c7f29
                    - use the closest csn available by calling
7c7f29
                      cursor->c_get with DB_SET_RANGE instead of DB_SET
7c7f29
7c7f29
https://fedorahosted.org/389/ticket/48954
7c7f29
7c7f29
Reviewed by: Noriko, Thierry. thanks
7c7f29
7c7f29
(cherry picked from commit 0721856d5a203689c15ea66ffe6c94ce4d785bd7)
7c7f29
---
7c7f29
 ldap/servers/plugins/replication/cl5_clcache.c   | 32 +++++++++++--
7c7f29
 ldap/servers/plugins/replication/repl5_replica.c | 58 ++++++++++++++++--------
7c7f29
 2 files changed, 67 insertions(+), 23 deletions(-)
7c7f29
7c7f29
diff --git a/ldap/servers/plugins/replication/cl5_clcache.c b/ldap/servers/plugins/replication/cl5_clcache.c
7c7f29
index 2d3bb28..74f0fec 100644
7c7f29
--- a/ldap/servers/plugins/replication/cl5_clcache.c
7c7f29
+++ b/ldap/servers/plugins/replication/cl5_clcache.c
7c7f29
@@ -376,6 +376,7 @@ clcache_load_buffer_bulk ( CLC_Buffer *buf, int flag )
7c7f29
 	DBC *cursor = NULL;
7c7f29
 	int rc = 0;
7c7f29
 	int tries = 0;
7c7f29
+	int use_flag = flag;
7c7f29
 
7c7f29
 #if 0 /* txn control seems not improving anything so turn it off */
7c7f29
 	if ( *(_pool->pl_dbenv) ) {
7c7f29
@@ -400,20 +401,44 @@ clcache_load_buffer_bulk ( CLC_Buffer *buf, int flag )
7c7f29
 retry:
7c7f29
 	if ( 0 == ( rc = clcache_open_cursor ( txn, buf, &cursor )) ) {
7c7f29
 
7c7f29
-		if ( flag == DB_NEXT ) {
7c7f29
+		if ( use_flag == DB_NEXT ) {
7c7f29
 			/* For bulk read, position the cursor before read the next block */
7c7f29
 			rc = cursor->c_get ( cursor,
7c7f29
 								 & buf->buf_key,
7c7f29
 								 & buf->buf_data,
7c7f29
 								 DB_SET );
7c7f29
+			if (rc == DB_NOTFOUND) {
7c7f29
+				/* the start position in the changelog is not found
7c7f29
+				 * 1. log an error
7c7f29
+				 * 2. try to find another starting position as close
7c7f29
+				 *    as possible
7c7f29
+				 */
7c7f29
+				slapi_log_error ( SLAPI_LOG_FATAL, "clcache_load_buffer_bulk",
7c7f29
+							"changelog record with csn (%s) not found for DB_NEXT\n",
7c7f29
+							(char *)buf->buf_key.data );
7c7f29
+				rc = cursor->c_get ( cursor, & buf->buf_key, & buf->buf_data,
7c7f29
+							 DB_SET_RANGE );
7c7f29
+				/* this moves the cursor ahead of the tageted csn,
7c7f29
+				 * so we achieved what was intended with DB_SET/DB_NEXT
7c7f29
+				 * continute at this csn.
7c7f29
+				 */
7c7f29
+				use_flag = DB_CURRENT;
7c7f29
+			}
7c7f29
 		}
7c7f29
 
7c7f29
 		/*
7c7f29
 		 * Continue if the error is no-mem since we don't need to
7c7f29
 		 * load in the key record anyway with DB_SET.
7c7f29
 		 */
7c7f29
-		if ( 0 == rc || DB_BUFFER_SMALL == rc )
7c7f29
-			rc = clcache_cursor_get ( cursor, buf, flag );
7c7f29
+		if ( 0 == rc || DB_BUFFER_SMALL == rc ) {
7c7f29
+			rc = clcache_cursor_get ( cursor, buf, use_flag );
7c7f29
+			if ( rc == DB_NOTFOUND && use_flag == DB_SET) {
7c7f29
+				slapi_log_error ( SLAPI_LOG_FATAL, "clcache_load_buffer_bulk",
7c7f29
+							"changelog record with csn (%s) not found for DB_SET\n",
7c7f29
+							(char *)buf->buf_key.data );
7c7f29
+				rc = clcache_cursor_get ( cursor, buf, DB_SET_RANGE );
7c7f29
+			}
7c7f29
+		}
7c7f29
 
7c7f29
 	}
7c7f29
 
7c7f29
@@ -434,6 +459,7 @@ retry:
7c7f29
 		/* back off */
7c7f29
 		interval = PR_MillisecondsToInterval(slapi_rand() % 100);
7c7f29
 		DS_Sleep(interval);
7c7f29
+		use_flag = flag;
7c7f29
 		goto retry;
7c7f29
 	}
7c7f29
 	if ((rc == DB_LOCK_DEADLOCK) && (tries >= MAX_TRIALS)) {
7c7f29
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
7c7f29
index b5d65ef..7360d97 100644
7c7f29
--- a/ldap/servers/plugins/replication/repl5_replica.c
7c7f29
+++ b/ldap/servers/plugins/replication/repl5_replica.c
7c7f29
@@ -3794,41 +3794,59 @@ replica_remove_legacy_attr (const Slapi_DN *repl_root_sdn, const char *attr)
7c7f29
     slapi_mods_done (&smods);
7c7f29
     slapi_pblock_destroy (pb);
7c7f29
 }
7c7f29
+typedef struct replinfo {
7c7f29
+    char *repl_gen;
7c7f29
+    char *repl_name;
7c7f29
+} replinfo;
7c7f29
+
7c7f29
+static int
7c7f29
+replica_log_start_iteration(const ruv_enum_data *rid_data, void *data)
7c7f29
+{
7c7f29
+    int rc = 0;
7c7f29
+    replinfo *r_info = (replinfo *)data;
7c7f29
+    slapi_operation_parameters op_params;
7c7f29
+
7c7f29
+    if (rid_data->csn == NULL) return 0;
7c7f29
+
7c7f29
+    memset (&op_params, 0, sizeof (op_params));
7c7f29
+    op_params.operation_type = SLAPI_OPERATION_DELETE;
7c7f29
+    op_params.target_address.sdn = slapi_sdn_new_ndn_byval(START_ITERATION_ENTRY_DN);
7c7f29
+    op_params.target_address.uniqueid = START_ITERATION_ENTRY_UNIQUEID;
7c7f29
+    op_params.csn = csn_dup(rid_data->csn);
7c7f29
+    rc = cl5WriteOperation(r_info->repl_name, r_info->repl_gen, &op_params, PR_FALSE);
7c7f29
+    if (rc == CL5_SUCCESS)
7c7f29
+        rc = 0;
7c7f29
+    else
7c7f29
+        rc = -1;
7c7f29
+
7c7f29
+    slapi_sdn_free(&op_params.target_address.sdn);
7c7f29
+    csn_free (&op_params.csn);
7c7f29
+
7c7f29
+    return rc;
7c7f29
+}
7c7f29
 
7c7f29
 static int 
7c7f29
 replica_log_ruv_elements_nolock (const Replica *r)
7c7f29
 {
7c7f29
     int rc = 0;
7c7f29
-    slapi_operation_parameters op_params;
7c7f29
     RUV *ruv;
7c7f29
     char *repl_gen; 
7c7f29
-    CSN *csn = NULL;
7c7f29
+    replinfo r_info;
7c7f29
 
7c7f29
     ruv = (RUV*) object_get_data (r->repl_ruv);
7c7f29
     PR_ASSERT (ruv);
7c7f29
 
7c7f29
-    if ((ruv_get_min_csn(ruv, &csn) == RUV_SUCCESS) && csn)
7c7f29
-    {
7c7f29
         /* we log it as a delete operation to have the least number of fields
7c7f29
            to set. the entry can be identified by a special target uniqueid and
7c7f29
            special target dn */
7c7f29
-        memset (&op_params, 0, sizeof (op_params));
7c7f29
-        op_params.operation_type = SLAPI_OPERATION_DELETE;
7c7f29
-        op_params.target_address.sdn = slapi_sdn_new_ndn_byval(START_ITERATION_ENTRY_DN);
7c7f29
-        op_params.target_address.uniqueid = START_ITERATION_ENTRY_UNIQUEID;
7c7f29
-        op_params.csn = csn;
7c7f29
-        repl_gen = ruv_get_replica_generation (ruv);
7c7f29
-
7c7f29
-        rc = cl5WriteOperation(r->repl_name, repl_gen, &op_params, PR_FALSE); 
7c7f29
-        if (rc == CL5_SUCCESS)
7c7f29
-            rc = 0;
7c7f29
-        else
7c7f29
-            rc = -1;
7c7f29
+    repl_gen = ruv_get_replica_generation (ruv);
7c7f29
 
7c7f29
-        slapi_ch_free ((void**)&repl_gen);
7c7f29
-        slapi_sdn_free(&op_params.target_address.sdn);
7c7f29
-        csn_free (&csn;;
7c7f29
-    }
7c7f29
+    r_info.repl_name = r->repl_name;
7c7f29
+    r_info.repl_gen = repl_gen;
7c7f29
+
7c7f29
+    rc = ruv_enumerate_elements(ruv, replica_log_start_iteration, &r_info);
7c7f29
+
7c7f29
+    slapi_ch_free ((void**)&repl_gen);
7c7f29
 
7c7f29
     return rc;
7c7f29
 }
7c7f29
-- 
7c7f29
2.4.11
7c7f29