Blob Blame History Raw
From 1cd2d9b06b8bc006078ed26bb0d3cbe808681a86 Mon Sep 17 00:00:00 2001
From: Ludwig Krispenz <lkrispen@redhat.com>
Date: Fri, 12 Aug 2016 14:06:21 +0200
Subject: [PATCH 34/35] Ticket 48954 - replication fails because anchorcsn
 cannot be found

Bug Description: the anchorcsn is calculated based on supploier and consumer
                 ruv. If this csn is not found in the changelog
                 replication stops.

Fix Description: Fix consists of two parts
                 1. log start-iteration csn record for all replicas
                    after initialization
                 2. If the csn still cannot be found
                    - log an error
                    - use the closest csn available by calling
                      cursor->c_get with DB_SET_RANGE instead of DB_SET

https://fedorahosted.org/389/ticket/48954

Reviewed by: Noriko, Thierry. thanks

(cherry picked from commit 0721856d5a203689c15ea66ffe6c94ce4d785bd7)
---
 ldap/servers/plugins/replication/cl5_clcache.c   | 32 +++++++++++--
 ldap/servers/plugins/replication/repl5_replica.c | 58 ++++++++++++++++--------
 2 files changed, 67 insertions(+), 23 deletions(-)

diff --git a/ldap/servers/plugins/replication/cl5_clcache.c b/ldap/servers/plugins/replication/cl5_clcache.c
index 2d3bb28..74f0fec 100644
--- a/ldap/servers/plugins/replication/cl5_clcache.c
+++ b/ldap/servers/plugins/replication/cl5_clcache.c
@@ -376,6 +376,7 @@ clcache_load_buffer_bulk ( CLC_Buffer *buf, int flag )
 	DBC *cursor = NULL;
 	int rc = 0;
 	int tries = 0;
+	int use_flag = flag;
 
 #if 0 /* txn control seems not improving anything so turn it off */
 	if ( *(_pool->pl_dbenv) ) {
@@ -400,20 +401,44 @@ clcache_load_buffer_bulk ( CLC_Buffer *buf, int flag )
 retry:
 	if ( 0 == ( rc = clcache_open_cursor ( txn, buf, &cursor )) ) {
 
-		if ( flag == DB_NEXT ) {
+		if ( use_flag == DB_NEXT ) {
 			/* For bulk read, position the cursor before read the next block */
 			rc = cursor->c_get ( cursor,
 								 & buf->buf_key,
 								 & buf->buf_data,
 								 DB_SET );
+			if (rc == DB_NOTFOUND) {
+				/* the start position in the changelog is not found
+				 * 1. log an error
+				 * 2. try to find another starting position as close
+				 *    as possible
+				 */
+				slapi_log_error ( SLAPI_LOG_FATAL, "clcache_load_buffer_bulk",
+							"changelog record with csn (%s) not found for DB_NEXT\n",
+							(char *)buf->buf_key.data );
+				rc = cursor->c_get ( cursor, & buf->buf_key, & buf->buf_data,
+							 DB_SET_RANGE );
+				/* this moves the cursor ahead of the tageted csn,
+				 * so we achieved what was intended with DB_SET/DB_NEXT
+				 * continute at this csn.
+				 */
+				use_flag = DB_CURRENT;
+			}
 		}
 
 		/*
 		 * Continue if the error is no-mem since we don't need to
 		 * load in the key record anyway with DB_SET.
 		 */
-		if ( 0 == rc || DB_BUFFER_SMALL == rc )
-			rc = clcache_cursor_get ( cursor, buf, flag );
+		if ( 0 == rc || DB_BUFFER_SMALL == rc ) {
+			rc = clcache_cursor_get ( cursor, buf, use_flag );
+			if ( rc == DB_NOTFOUND && use_flag == DB_SET) {
+				slapi_log_error ( SLAPI_LOG_FATAL, "clcache_load_buffer_bulk",
+							"changelog record with csn (%s) not found for DB_SET\n",
+							(char *)buf->buf_key.data );
+				rc = clcache_cursor_get ( cursor, buf, DB_SET_RANGE );
+			}
+		}
 
 	}
 
@@ -434,6 +459,7 @@ retry:
 		/* back off */
 		interval = PR_MillisecondsToInterval(slapi_rand() % 100);
 		DS_Sleep(interval);
+		use_flag = flag;
 		goto retry;
 	}
 	if ((rc == DB_LOCK_DEADLOCK) && (tries >= MAX_TRIALS)) {
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
index b5d65ef..7360d97 100644
--- a/ldap/servers/plugins/replication/repl5_replica.c
+++ b/ldap/servers/plugins/replication/repl5_replica.c
@@ -3794,41 +3794,59 @@ replica_remove_legacy_attr (const Slapi_DN *repl_root_sdn, const char *attr)
     slapi_mods_done (&smods);
     slapi_pblock_destroy (pb);
 }
+typedef struct replinfo {
+    char *repl_gen;
+    char *repl_name;
+} replinfo;
+
+static int
+replica_log_start_iteration(const ruv_enum_data *rid_data, void *data)
+{
+    int rc = 0;
+    replinfo *r_info = (replinfo *)data;
+    slapi_operation_parameters op_params;
+
+    if (rid_data->csn == NULL) return 0;
+
+    memset (&op_params, 0, sizeof (op_params));
+    op_params.operation_type = SLAPI_OPERATION_DELETE;
+    op_params.target_address.sdn = slapi_sdn_new_ndn_byval(START_ITERATION_ENTRY_DN);
+    op_params.target_address.uniqueid = START_ITERATION_ENTRY_UNIQUEID;
+    op_params.csn = csn_dup(rid_data->csn);
+    rc = cl5WriteOperation(r_info->repl_name, r_info->repl_gen, &op_params, PR_FALSE);
+    if (rc == CL5_SUCCESS)
+        rc = 0;
+    else
+        rc = -1;
+
+    slapi_sdn_free(&op_params.target_address.sdn);
+    csn_free (&op_params.csn);
+
+    return rc;
+}
 
 static int 
 replica_log_ruv_elements_nolock (const Replica *r)
 {
     int rc = 0;
-    slapi_operation_parameters op_params;
     RUV *ruv;
     char *repl_gen; 
-    CSN *csn = NULL;
+    replinfo r_info;
 
     ruv = (RUV*) object_get_data (r->repl_ruv);
     PR_ASSERT (ruv);
 
-    if ((ruv_get_min_csn(ruv, &csn) == RUV_SUCCESS) && csn)
-    {
         /* we log it as a delete operation to have the least number of fields
            to set. the entry can be identified by a special target uniqueid and
            special target dn */
-        memset (&op_params, 0, sizeof (op_params));
-        op_params.operation_type = SLAPI_OPERATION_DELETE;
-        op_params.target_address.sdn = slapi_sdn_new_ndn_byval(START_ITERATION_ENTRY_DN);
-        op_params.target_address.uniqueid = START_ITERATION_ENTRY_UNIQUEID;
-        op_params.csn = csn;
-        repl_gen = ruv_get_replica_generation (ruv);
-
-        rc = cl5WriteOperation(r->repl_name, repl_gen, &op_params, PR_FALSE); 
-        if (rc == CL5_SUCCESS)
-            rc = 0;
-        else
-            rc = -1;
+    repl_gen = ruv_get_replica_generation (ruv);
 
-        slapi_ch_free ((void**)&repl_gen);
-        slapi_sdn_free(&op_params.target_address.sdn);
-        csn_free (&csn);
-    }
+    r_info.repl_name = r->repl_name;
+    r_info.repl_gen = repl_gen;
+
+    rc = ruv_enumerate_elements(ruv, replica_log_start_iteration, &r_info);
+
+    slapi_ch_free ((void**)&repl_gen);
 
     return rc;
 }
-- 
2.4.11