andykimpe / rpms / 389-ds-base

Forked from rpms/389-ds-base 5 months ago
Clone
dc8c34
From 401132f74937854b8e7e65c2ba392bc156fadd27 Mon Sep 17 00:00:00 2001
dc8c34
From: Ludwig Krispenz <lkrispen@redhat.com>
dc8c34
Date: Fri, 12 Aug 2016 14:06:21 +0200
dc8c34
Subject: [PATCH 388/390] Ticket 48954 - replication fails because anchorcsn
dc8c34
 cannot be found
dc8c34
dc8c34
Bug Description: the anchorcsn is calculated based on supploier and consumer
dc8c34
                 ruv. If this csn is not found in the changelog
dc8c34
                 replication stops.
dc8c34
dc8c34
Fix Description: Fix consists of two parts
dc8c34
                 1. log start-iteration csn record for all replicas
dc8c34
                    after initialization
dc8c34
                 2. If the csn still cannot be found
dc8c34
                    - log an error
dc8c34
                    - use the closest csn available by calling
dc8c34
                      cursor->c_get with DB_SET_RANGE instead of DB_SET
dc8c34
dc8c34
https://fedorahosted.org/389/ticket/48954
dc8c34
dc8c34
Reviewed by: Noriko, Thierry. thanks
dc8c34
dc8c34
(cherry picked from commit 08b0ee8e45d8ce8c869c193ee31ee7f983c59819)
dc8c34
---
dc8c34
 ldap/servers/plugins/replication/cl5_clcache.c   | 32 +++++++++++--
dc8c34
 ldap/servers/plugins/replication/repl5_replica.c | 58 ++++++++++++++++--------
dc8c34
 2 files changed, 67 insertions(+), 23 deletions(-)
dc8c34
dc8c34
diff --git a/ldap/servers/plugins/replication/cl5_clcache.c b/ldap/servers/plugins/replication/cl5_clcache.c
dc8c34
index 9e1d3b7..0b8feee 100644
dc8c34
--- a/ldap/servers/plugins/replication/cl5_clcache.c
dc8c34
+++ b/ldap/servers/plugins/replication/cl5_clcache.c
dc8c34
@@ -406,6 +406,7 @@ clcache_load_buffer_bulk ( CLC_Buffer *buf, int flag )
dc8c34
 	DBC *cursor = NULL;
dc8c34
 	int rc = 0;
dc8c34
 	int tries = 0;
dc8c34
+	int use_flag = flag;
dc8c34
 
dc8c34
 #if 0 /* txn control seems not improving anything so turn it off */
dc8c34
 	if ( *(_pool->pl_dbenv) ) {
dc8c34
@@ -430,20 +431,44 @@ clcache_load_buffer_bulk ( CLC_Buffer *buf, int flag )
dc8c34
 retry:
dc8c34
 	if ( 0 == ( rc = clcache_open_cursor ( txn, buf, &cursor )) ) {
dc8c34
 
dc8c34
-		if ( flag == DB_NEXT ) {
dc8c34
+		if ( use_flag == DB_NEXT ) {
dc8c34
 			/* For bulk read, position the cursor before read the next block */
dc8c34
 			rc = cursor->c_get ( cursor,
dc8c34
 								 & buf->buf_key,
dc8c34
 								 & buf->buf_data,
dc8c34
 								 DB_SET );
dc8c34
+			if (rc == DB_NOTFOUND) {
dc8c34
+				/* the start position in the changelog is not found
dc8c34
+				 * 1. log an error
dc8c34
+				 * 2. try to find another starting position as close
dc8c34
+				 *    as possible
dc8c34
+				 */
dc8c34
+				slapi_log_error ( SLAPI_LOG_FATAL, "clcache_load_buffer_bulk",
dc8c34
+							"changelog record with csn (%s) not found for DB_NEXT\n",
dc8c34
+							(char *)buf->buf_key.data );
dc8c34
+				rc = cursor->c_get ( cursor, & buf->buf_key, & buf->buf_data,
dc8c34
+							 DB_SET_RANGE );
dc8c34
+				/* this moves the cursor ahead of the tageted csn,
dc8c34
+				 * so we achieved what was intended with DB_SET/DB_NEXT
dc8c34
+				 * continute at this csn.
dc8c34
+				 */
dc8c34
+				use_flag = DB_CURRENT;
dc8c34
+			}
dc8c34
 		}
dc8c34
 
dc8c34
 		/*
dc8c34
 		 * Continue if the error is no-mem since we don't need to
dc8c34
 		 * load in the key record anyway with DB_SET.
dc8c34
 		 */
dc8c34
-		if ( 0 == rc || DB_BUFFER_SMALL == rc )
dc8c34
-			rc = clcache_cursor_get ( cursor, buf, flag );
dc8c34
+		if ( 0 == rc || DB_BUFFER_SMALL == rc ) {
dc8c34
+			rc = clcache_cursor_get ( cursor, buf, use_flag );
dc8c34
+			if ( rc == DB_NOTFOUND && use_flag == DB_SET) {
dc8c34
+				slapi_log_error ( SLAPI_LOG_FATAL, "clcache_load_buffer_bulk",
dc8c34
+							"changelog record with csn (%s) not found for DB_SET\n",
dc8c34
+							(char *)buf->buf_key.data );
dc8c34
+				rc = clcache_cursor_get ( cursor, buf, DB_SET_RANGE );
dc8c34
+			}
dc8c34
+		}
dc8c34
 
dc8c34
 	}
dc8c34
 
dc8c34
@@ -464,6 +489,7 @@ retry:
dc8c34
 		/* back off */
dc8c34
 		interval = PR_MillisecondsToInterval(slapi_rand() % 100);
dc8c34
 		DS_Sleep(interval);
dc8c34
+		use_flag = flag;
dc8c34
 		goto retry;
dc8c34
 	}
dc8c34
 	if ((rc == DB_LOCK_DEADLOCK) && (tries >= MAX_TRIALS)) {
dc8c34
diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c
dc8c34
index d25d00d..e7d58cf 100644
dc8c34
--- a/ldap/servers/plugins/replication/repl5_replica.c
dc8c34
+++ b/ldap/servers/plugins/replication/repl5_replica.c
dc8c34
@@ -3513,41 +3513,59 @@ replica_remove_legacy_attr (const Slapi_DN *repl_root_sdn, const char *attr)
dc8c34
     slapi_mods_done (&smods);
dc8c34
     slapi_pblock_destroy (pb);
dc8c34
 }
dc8c34
+typedef struct replinfo {
dc8c34
+    char *repl_gen;
dc8c34
+    char *repl_name;
dc8c34
+} replinfo;
dc8c34
+
dc8c34
+static int
dc8c34
+replica_log_start_iteration(const ruv_enum_data *rid_data, void *data)
dc8c34
+{
dc8c34
+    int rc = 0;
dc8c34
+    replinfo *r_info = (replinfo *)data;
dc8c34
+    slapi_operation_parameters op_params;
dc8c34
+
dc8c34
+    if (rid_data->csn == NULL) return 0;
dc8c34
+
dc8c34
+    memset (&op_params, 0, sizeof (op_params));
dc8c34
+    op_params.operation_type = SLAPI_OPERATION_DELETE;
dc8c34
+    op_params.target_address.sdn = slapi_sdn_new_ndn_byval(START_ITERATION_ENTRY_DN);
dc8c34
+    op_params.target_address.uniqueid = START_ITERATION_ENTRY_UNIQUEID;
dc8c34
+    op_params.csn = csn_dup(rid_data->csn);
dc8c34
+    rc = cl5WriteOperation(r_info->repl_name, r_info->repl_gen, &op_params, PR_FALSE);
dc8c34
+    if (rc == CL5_SUCCESS)
dc8c34
+        rc = 0;
dc8c34
+    else
dc8c34
+        rc = -1;
dc8c34
+
dc8c34
+    slapi_sdn_free(&op_params.target_address.sdn);
dc8c34
+    csn_free (&op_params.csn);
dc8c34
+
dc8c34
+    return rc;
dc8c34
+}
dc8c34
 
dc8c34
 static int 
dc8c34
 replica_log_ruv_elements_nolock (const Replica *r)
dc8c34
 {
dc8c34
     int rc = 0;
dc8c34
-    slapi_operation_parameters op_params;
dc8c34
     RUV *ruv;
dc8c34
     char *repl_gen; 
dc8c34
-    CSN *csn = NULL;
dc8c34
+    replinfo r_info;
dc8c34
 
dc8c34
     ruv = (RUV*) object_get_data (r->repl_ruv);
dc8c34
     PR_ASSERT (ruv);
dc8c34
 
dc8c34
-    if ((ruv_get_min_csn(ruv, &csn) == RUV_SUCCESS) && csn)
dc8c34
-    {
dc8c34
         /* we log it as a delete operation to have the least number of fields
dc8c34
            to set. the entry can be identified by a special target uniqueid and
dc8c34
            special target dn */
dc8c34
-        memset (&op_params, 0, sizeof (op_params));
dc8c34
-        op_params.operation_type = SLAPI_OPERATION_DELETE;
dc8c34
-        op_params.target_address.sdn = slapi_sdn_new_ndn_byval(START_ITERATION_ENTRY_DN);
dc8c34
-        op_params.target_address.uniqueid = START_ITERATION_ENTRY_UNIQUEID;
dc8c34
-        op_params.csn = csn;
dc8c34
-        repl_gen = ruv_get_replica_generation (ruv);
dc8c34
-
dc8c34
-        rc = cl5WriteOperation(r->repl_name, repl_gen, &op_params, PR_FALSE); 
dc8c34
-        if (rc == CL5_SUCCESS)
dc8c34
-            rc = 0;
dc8c34
-        else
dc8c34
-            rc = -1;
dc8c34
+    repl_gen = ruv_get_replica_generation (ruv);
dc8c34
 
dc8c34
-        slapi_ch_free ((void**)&repl_gen);
dc8c34
-        slapi_sdn_free(&op_params.target_address.sdn);
dc8c34
-        csn_free (&csn;;
dc8c34
-    }
dc8c34
+    r_info.repl_name = r->repl_name;
dc8c34
+    r_info.repl_gen = repl_gen;
dc8c34
+
dc8c34
+    rc = ruv_enumerate_elements(ruv, replica_log_start_iteration, &r_info);
dc8c34
+
dc8c34
+    slapi_ch_free ((void**)&repl_gen);
dc8c34
 
dc8c34
     return rc;
dc8c34
 }
dc8c34
-- 
dc8c34
2.4.11
dc8c34