Blame SOURCES/0057-Ticket-49020-do-not-treat-missing-csn-as-fatal.patch

4aa5b2
From b2cda86aeddd85ecb712d047824e6d25da9222a7 Mon Sep 17 00:00:00 2001
4aa5b2
From: Ludwig Krispenz <lkrispen@redhat.com>
4aa5b2
Date: Mon, 5 Dec 2016 09:59:38 +0100
4aa5b2
Subject: [PATCH 57/57] Ticket 49020 - do not treat missing csn as fatal
4aa5b2
4aa5b2
    This patch removes the automatic choice of an alternative csn when
4aa5b2
    the calculated anchor csn is not found.
4aa5b2
4aa5b2
    In that case it does no longer go to fatal state but will retry later.
4aa5b2
4aa5b2
    It also adds a configuration parameter to thr replication agreement to
4aa5b2
    allow to pick a "next best" anchorcsn if the original is not found to
4aa5b2
    keep replicatio going.
4aa5b2
4aa5b2
    Reviewed by: Noriko, William
4aa5b2
4aa5b2
(cherry picked from commit a2dee8fe6faa9fef5824d7852887b21f1158284a)
4aa5b2
---
4aa5b2
 ldap/schema/01core389.ldif                         |   3 +-
4aa5b2
 ldap/servers/plugins/replication/cl5_api.c         |  16 ++-
4aa5b2
 ldap/servers/plugins/replication/cl5_clcache.c     |  43 ++++----
4aa5b2
 ldap/servers/plugins/replication/cl5_clcache.h     |   2 +-
4aa5b2
 ldap/servers/plugins/replication/repl5.h           |   4 +
4aa5b2
 ldap/servers/plugins/replication/repl5_agmt.c      | 114 +++++++++++++++++++++
4aa5b2
 ldap/servers/plugins/replication/repl5_agmtlist.c  |  13 +++
4aa5b2
 .../plugins/replication/repl5_inc_protocol.c       |  12 ++-
4aa5b2
 ldap/servers/plugins/replication/repl_globals.c    |   1 +
4aa5b2
 9 files changed, 171 insertions(+), 37 deletions(-)
4aa5b2
4aa5b2
diff --git a/ldap/schema/01core389.ldif b/ldap/schema/01core389.ldif
4aa5b2
index ab07a0b..dfa4729 100644
4aa5b2
--- a/ldap/schema/01core389.ldif
4aa5b2
+++ b/ldap/schema/01core389.ldif
4aa5b2
@@ -298,6 +298,7 @@ attributeTypes: ( 2.16.840.1.113730.3.1.2330 NAME 'nsslapd-logging-backend' DESC
4aa5b2
 attributeTypes: ( 2.16.840.1.113730.3.1.2331 NAME 'nsslapd-logging-hr-timestamps-enabled' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 X-ORIGIN 'Netscape Directory Server' )
4aa5b2
 attributeTypes: ( 2.16.840.1.113730.3.1.2332 NAME 'allowWeakDHParam' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 X-ORIGIN 'Netscape Directory Server' )
4aa5b2
 attributeTypes: ( 2.16.840.1.113730.3.1.2333 NAME 'nsds5ReplicaReleaseTimeout' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 SINGLE-VALUE X-ORIGIN 'Netscape Directory Server' )
4aa5b2
+attributeTypes: ( 2.16.840.1.113730.3.1.2335 NAME 'nsds5ReplicaIgnoreMissingChange' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 SINGLE-VALUE X-ORIGIN 'Netscape Directory Server' )
4aa5b2
 #
4aa5b2
 # objectclasses
4aa5b2
 #
4aa5b2
@@ -309,7 +310,7 @@ objectClasses: ( 2.16.840.1.113730.3.2.110 NAME 'nsMappingTree' DESC 'Netscape d
4aa5b2
 objectClasses: ( 2.16.840.1.113730.3.2.104 NAME 'nsContainer' DESC 'Netscape defined objectclass' SUP top  MUST ( CN ) X-ORIGIN 'Netscape Directory Server' )
4aa5b2
 objectClasses: ( 2.16.840.1.113730.3.2.108 NAME 'nsDS5Replica' DESC 'Netscape defined objectclass' SUP top  MUST ( nsDS5ReplicaRoot $  nsDS5ReplicaId ) MAY (cn $ nsds5ReplicaPreciseTombstonePurging $ nsds5ReplicaCleanRUV $ nsds5ReplicaAbortCleanRUV $ nsDS5ReplicaType $ nsDS5ReplicaBindDN $ nsState $ nsDS5ReplicaName $ nsDS5Flags $ nsDS5Task $ nsDS5ReplicaReferral $ nsDS5ReplicaAutoReferral $ nsds5ReplicaPurgeDelay $ nsds5ReplicaTombstonePurgeInterval $ nsds5ReplicaChangeCount $ nsds5ReplicaLegacyConsumer $ nsds5ReplicaProtocolTimeout $ nsds5ReplicaBackoffMin $ nsds5ReplicaBackoffMax $ nsds5ReplicaReleaseTimeout ) X-ORIGIN 'Netscape Directory Server' )
4aa5b2
 objectClasses: ( 2.16.840.1.113730.3.2.113 NAME 'nsTombstone' DESC 'Netscape defined objectclass' SUP top MAY ( nstombstonecsn $ nsParentUniqueId $ nscpEntryDN ) X-ORIGIN 'Netscape Directory Server' )
4aa5b2
-objectClasses: ( 2.16.840.1.113730.3.2.103 NAME 'nsDS5ReplicationAgreement' DESC 'Netscape defined objectclass' SUP top MUST ( cn ) MAY ( nsds5ReplicaCleanRUVNotified $ nsDS5ReplicaHost $ nsDS5ReplicaPort $ nsDS5ReplicaTransportInfo $ nsDS5ReplicaBindDN $ nsDS5ReplicaCredentials $ nsDS5ReplicaBindMethod $ nsDS5ReplicaRoot $ nsDS5ReplicatedAttributeList $ nsDS5ReplicatedAttributeListTotal $ nsDS5ReplicaUpdateSchedule $ nsds5BeginReplicaRefresh $ description $ nsds50ruv $ nsruvReplicaLastModified $ nsds5ReplicaTimeout $ nsds5replicaChangesSentSinceStartup $ nsds5replicaLastUpdateEnd $ nsds5replicaLastUpdateStart $ nsds5replicaLastUpdateStatus $ nsds5replicaUpdateInProgress $ nsds5replicaLastInitEnd $ nsds5ReplicaEnabled $ nsds5replicaLastInitStart $ nsds5replicaLastInitStatus $ nsds5debugreplicatimeout $ nsds5replicaBusyWaitTime $ nsds5ReplicaStripAttrs $ nsds5replicaSessionPauseTime $ nsds5ReplicaProtocolTimeout $ nsds5ReplicaFlowControlWindow $ nsds5ReplicaFlowControlPause $ nsDS5ReplicaWaitForAsyncResults ) X-ORIGIN 'Netscape Directory Server' )
4aa5b2
+objectClasses: ( 2.16.840.1.113730.3.2.103 NAME 'nsDS5ReplicationAgreement' DESC 'Netscape defined objectclass' SUP top MUST ( cn ) MAY ( nsds5ReplicaCleanRUVNotified $ nsDS5ReplicaHost $ nsDS5ReplicaPort $ nsDS5ReplicaTransportInfo $ nsDS5ReplicaBindDN $ nsDS5ReplicaCredentials $ nsDS5ReplicaBindMethod $ nsDS5ReplicaRoot $ nsDS5ReplicatedAttributeList $ nsDS5ReplicatedAttributeListTotal $ nsDS5ReplicaUpdateSchedule $ nsds5BeginReplicaRefresh $ description $ nsds50ruv $ nsruvReplicaLastModified $ nsds5ReplicaTimeout $ nsds5replicaChangesSentSinceStartup $ nsds5replicaLastUpdateEnd $ nsds5replicaLastUpdateStart $ nsds5replicaLastUpdateStatus $ nsds5replicaUpdateInProgress $ nsds5replicaLastInitEnd $ nsds5ReplicaEnabled $ nsds5replicaLastInitStart $ nsds5replicaLastInitStatus $ nsds5debugreplicatimeout $ nsds5replicaBusyWaitTime $ nsds5ReplicaStripAttrs $ nsds5replicaSessionPauseTime $ nsds5ReplicaProtocolTimeout $ nsds5ReplicaFlowControlWindow $ nsds5ReplicaFlowControlPause $ nsDS5ReplicaWaitForAsyncResults $ nsds5ReplicaIgnoreMissingChange) X-ORIGIN 'Netscape Directory Server' )
4aa5b2
 objectClasses: ( 2.16.840.1.113730.3.2.39 NAME 'nsslapdConfig' DESC 'Netscape defined objectclass' SUP top MAY ( cn ) X-ORIGIN 'Netscape Directory Server' )
4aa5b2
 objectClasses: ( 2.16.840.1.113730.3.2.317 NAME 'nsSaslMapping' DESC 'Netscape defined objectclass' SUP top MUST ( cn $ nsSaslMapRegexString $ nsSaslMapBaseDNTemplate $ nsSaslMapFilterTemplate ) MAY ( nsSaslMapPriority ) X-ORIGIN 'Netscape Directory Server' )
4aa5b2
 objectClasses: ( 2.16.840.1.113730.3.2.43 NAME 'nsSNMP' DESC 'Netscape defined objectclass' SUP top MUST ( cn $ nsSNMPEnabled ) MAY ( nsSNMPOrganization $ nsSNMPLocation $ nsSNMPContact $ nsSNMPDescription $ nsSNMPName $ nsSNMPMasterHost $ nsSNMPMasterPort ) X-ORIGIN 'Netscape Directory Server' )
4aa5b2
diff --git a/ldap/servers/plugins/replication/cl5_api.c b/ldap/servers/plugins/replication/cl5_api.c
4aa5b2
index 6a09aea..f8b2cea 100644
4aa5b2
--- a/ldap/servers/plugins/replication/cl5_api.c
4aa5b2
+++ b/ldap/servers/plugins/replication/cl5_api.c
4aa5b2
@@ -310,7 +310,7 @@ static int _cl5WriteBervals (struct berval **bv, char** buff, u_int32_t *size);
4aa5b2
 static PRBool _cl5ValidReplayIterator (const CL5ReplayIterator *iterator);
4aa5b2
 #endif
4aa5b2
 static int _cl5PositionCursorForReplay (ReplicaId consumerRID, const RUV *consumerRuv,
4aa5b2
-			Object *replica, Object *fileObject, CL5ReplayIterator **iterator);
4aa5b2
+		Object *replica, Object *fileObject, CL5ReplayIterator **iterator, int *continue_on_missing);
4aa5b2
 static int _cl5CheckMissingCSN (const CSN *minCsn, const RUV *supplierRUV, CL5DBFile *file);
4aa5b2
 
4aa5b2
 /* changelog trimming */
4aa5b2
@@ -1536,7 +1536,7 @@ int cl5CreateReplayIteratorEx (Private_Repl_Protocol *prp, const RUV *consumerRu
4aa5b2
     	/* iterate through the ruv in csn order to find first master for which 
4aa5b2
 	       we can replay changes */		    
4aa5b2
 		
4aa5b2
-		rc = _cl5PositionCursorForReplay (consumerRID, consumerRuv, replica, obj, iterator);
4aa5b2
+		rc = _cl5PositionCursorForReplay (consumerRID, consumerRuv, replica, obj, iterator, NULL);
4aa5b2
 	}
4aa5b2
 	else
4aa5b2
 	{
4aa5b2
@@ -1597,7 +1597,13 @@ int cl5CreateReplayIterator (Private_Repl_Protocol *prp, const RUV *consumerRuv,
4aa5b2
     	/* iterate through the ruv in csn order to find first master for which 
4aa5b2
 	       we can replay changes */		    
4aa5b2
 		ReplicaId consumerRID = agmt_get_consumer_rid ( prp->agmt, prp->conn );
4aa5b2
-		rc = _cl5PositionCursorForReplay (consumerRID, consumerRuv, replica, obj, iterator);
4aa5b2
+		int continue_on_missing = agmt_get_ignoremissing ( prp->agmt);
4aa5b2
+		int save_cont_miss = continue_on_missing;
4aa5b2
+		rc = _cl5PositionCursorForReplay (consumerRID, consumerRuv, replica, obj, iterator, &continue_on_missing);
4aa5b2
+		if (save_cont_miss == 1 && continue_on_missing ==0) {
4aa5b2
+			/* the option to continue once on a missing csn was used, rest */
4aa5b2
+			agmt_set_ignoremissing ( prp->agmt, 0);
4aa5b2
+		}
4aa5b2
 	}
4aa5b2
 	else
4aa5b2
 	{
4aa5b2
@@ -5516,7 +5522,7 @@ struct replica_hash_entry
4aa5b2
 
4aa5b2
 
4aa5b2
 static int _cl5PositionCursorForReplay (ReplicaId consumerRID, const RUV *consumerRuv,
4aa5b2
-		Object *replica, Object *fileObj, CL5ReplayIterator **iterator)
4aa5b2
+		Object *replica, Object *fileObj, CL5ReplayIterator **iterator, int *continue_on_missing)
4aa5b2
 {
4aa5b2
 	CLC_Buffer *clcache = NULL;
4aa5b2
 	CL5DBFile *file;
4aa5b2
@@ -5560,7 +5566,7 @@ static int _cl5PositionCursorForReplay (ReplicaId consumerRID, const RUV *consum
4aa5b2
 	rc = clcache_get_buffer ( &clcache, file->db, consumerRID, consumerRuv, supplierRuv );
4aa5b2
 	if ( rc != 0 ) goto done;
4aa5b2
 
4aa5b2
-	rc = clcache_load_buffer (clcache, &startCSN);
4aa5b2
+	rc = clcache_load_buffer (clcache, &startCSN, continue_on_missing);
4aa5b2
 
4aa5b2
         if (rc == 0) {
4aa5b2
 		haveChanges = PR_TRUE;
4aa5b2
diff --git a/ldap/servers/plugins/replication/cl5_clcache.c b/ldap/servers/plugins/replication/cl5_clcache.c
4aa5b2
index ca8b841..47789f4 100644
4aa5b2
--- a/ldap/servers/plugins/replication/cl5_clcache.c
4aa5b2
+++ b/ldap/servers/plugins/replication/cl5_clcache.c
4aa5b2
@@ -324,7 +324,7 @@ clcache_return_buffer ( CLC_Buffer **buf )
4aa5b2
  *		       historic reason.
4aa5b2
  */
4aa5b2
 int
4aa5b2
-clcache_load_buffer ( CLC_Buffer *buf, CSN **anchorCSN )
4aa5b2
+clcache_load_buffer ( CLC_Buffer *buf, CSN **anchorCSN, int *continue_on_miss )
4aa5b2
 {
4aa5b2
 	int rc = 0;
4aa5b2
         int flag = DB_NEXT;
4aa5b2
@@ -345,6 +345,22 @@ clcache_load_buffer ( CLC_Buffer *buf, CSN **anchorCSN )
4aa5b2
 		if (anchorCSN) *anchorCSN = buf->buf_current_csn;
4aa5b2
 		rc = clcache_load_buffer_bulk ( buf, flag );
4aa5b2
 
4aa5b2
+		if (rc == DB_NOTFOUND && continue_on_miss && *continue_on_miss) {
4aa5b2
+			/* make replication going using next best startcsn */
4aa5b2
+			slapi_log_error(SLAPI_LOG_FATAL, buf->buf_agmt_name,
4aa5b2
+					"clcache_load_buffer - Can't load changelog buffer starting at CSN %s with flag(%s). "
4aa5b2
+					"Trying to use an alterantive start CSN.\n",
4aa5b2
+					(char*)buf->buf_key.data,
4aa5b2
+					flag==DB_NEXT?"DB_NEXT":"DB_SET" );
4aa5b2
+			rc = clcache_load_buffer_bulk ( buf, DB_SET_RANGE );
4aa5b2
+			if (rc == 0) {
4aa5b2
+				slapi_log_error(SLAPI_LOG_FATAL, buf->buf_agmt_name,
4aa5b2
+					"clcache_load_buffer - Using alternative start iteration csn: %s \n",
4aa5b2
+					(char*)buf->buf_key.data);
4aa5b2
+			}
4aa5b2
+			/* the use of alternative start csns can be limited, record its usage */
4aa5b2
+			(*continue_on_miss)--;
4aa5b2
+		}
4aa5b2
 		/* Reset some flag variables */
4aa5b2
 		if ( rc == 0 ) {
4aa5b2
 			int i;
4aa5b2
@@ -407,23 +423,6 @@ retry:
4aa5b2
 								 & buf->buf_key,
4aa5b2
 								 & buf->buf_data,
4aa5b2
 								 DB_SET );
4aa5b2
-			if (rc == DB_NOTFOUND) {
4aa5b2
-				/* the start position in the changelog is not found
4aa5b2
-				 * 1. log an error
4aa5b2
-				 * 2. try to find another starting position as close
4aa5b2
-				 *    as possible
4aa5b2
-				 */
4aa5b2
-				slapi_log_error ( SLAPI_LOG_FATAL, "clcache_load_buffer_bulk",
4aa5b2
-							"changelog record with csn (%s) not found for DB_NEXT\n",
4aa5b2
-							(char *)buf->buf_key.data );
4aa5b2
-				rc = cursor->c_get ( cursor, & buf->buf_key, & buf->buf_data,
4aa5b2
-							 DB_SET_RANGE );
4aa5b2
-				/* this moves the cursor ahead of the tageted csn,
4aa5b2
-				 * so we achieved what was intended with DB_SET/DB_NEXT
4aa5b2
-				 * continute at this csn.
4aa5b2
-				 */
4aa5b2
-				use_flag = DB_CURRENT;
4aa5b2
-			}
4aa5b2
 		}
4aa5b2
 
4aa5b2
 		/*
4aa5b2
@@ -432,12 +431,6 @@ retry:
4aa5b2
 		 */
4aa5b2
 		if ( 0 == rc || DB_BUFFER_SMALL == rc ) {
4aa5b2
 			rc = clcache_cursor_get ( cursor, buf, use_flag );
4aa5b2
-			if ( rc == DB_NOTFOUND && use_flag == DB_SET) {
4aa5b2
-				slapi_log_error ( SLAPI_LOG_FATAL, "clcache_load_buffer_bulk",
4aa5b2
-							"changelog record with csn (%s) not found for DB_SET\n",
4aa5b2
-							(char *)buf->buf_key.data );
4aa5b2
-				rc = clcache_cursor_get ( cursor, buf, DB_SET_RANGE );
4aa5b2
-			}
4aa5b2
 		}
4aa5b2
 
4aa5b2
 	}
4aa5b2
@@ -511,7 +504,7 @@ clcache_get_next_change ( CLC_Buffer *buf, void **key, size_t *keylen, void **da
4aa5b2
 		 * We're done with the current buffer. Now load the next chunk.
4aa5b2
 		 */
4aa5b2
 		if ( NULL == *key && CLC_STATE_READY == buf->buf_state ) {
4aa5b2
-			rc = clcache_load_buffer ( buf, NULL );
4aa5b2
+			rc = clcache_load_buffer ( buf, NULL, NULL );
4aa5b2
 			if ( 0 == rc && buf->buf_record_ptr ) {
4aa5b2
 				DB_MULTIPLE_KEY_NEXT ( buf->buf_record_ptr, &buf->buf_data,
4aa5b2
 								   *key, *keylen, *data, *datalen );
4aa5b2
diff --git a/ldap/servers/plugins/replication/cl5_clcache.h b/ldap/servers/plugins/replication/cl5_clcache.h
4aa5b2
index 75b2817..6809542 100644
4aa5b2
--- a/ldap/servers/plugins/replication/cl5_clcache.h
4aa5b2
+++ b/ldap/servers/plugins/replication/cl5_clcache.h
4aa5b2
@@ -23,7 +23,7 @@ typedef struct clc_buffer CLC_Buffer;
4aa5b2
 int	 clcache_init ( DB_ENV **dbenv );
4aa5b2
 void clcache_set_config ();
4aa5b2
 int	 clcache_get_buffer ( CLC_Buffer **buf, DB *db, ReplicaId consumer_rid, const RUV *consumer_ruv, const RUV *local_ruv );
4aa5b2
-int	 clcache_load_buffer ( CLC_Buffer *buf, CSN **anchorCSN );
4aa5b2
+int	 clcache_load_buffer ( CLC_Buffer *buf, CSN **anchorCSN, int *continue_on_miss );
4aa5b2
 void clcache_return_buffer ( CLC_Buffer **buf );
4aa5b2
 int	 clcache_get_next_change ( CLC_Buffer *buf, void **key, size_t *keylen, void **data, size_t *datalen, CSN **csn );
4aa5b2
 void clcache_destroy ();
4aa5b2
diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h
4aa5b2
index 13a38fd..6582876 100644
4aa5b2
--- a/ldap/servers/plugins/replication/repl5.h
4aa5b2
+++ b/ldap/servers/plugins/replication/repl5.h
4aa5b2
@@ -154,6 +154,7 @@ extern const char *type_replicaReleaseTimeout;
4aa5b2
 extern const char *type_replicaBackoffMin;
4aa5b2
 extern const char *type_replicaBackoffMax;
4aa5b2
 extern const char *type_replicaPrecisePurge;
4aa5b2
+extern const char *type_replicaIgnoreMissingChange;
4aa5b2
 
4aa5b2
 /* Attribute names for windows replication agreements */
4aa5b2
 extern const char *type_nsds7WindowsReplicaArea;
4aa5b2
@@ -317,6 +318,7 @@ long agmt_get_busywaittime(const Repl_Agmt *ra);
4aa5b2
 long agmt_get_pausetime(const Repl_Agmt *ra);
4aa5b2
 long agmt_get_flowcontrolwindow(const Repl_Agmt *ra);
4aa5b2
 long agmt_get_flowcontrolpause(const Repl_Agmt *ra);
4aa5b2
+long agmt_get_ignoremissing(const Repl_Agmt *ra);
4aa5b2
 int agmt_start(Repl_Agmt *ra);
4aa5b2
 int windows_agmt_start(Repl_Agmt *ra); 
4aa5b2
 int agmt_stop(Repl_Agmt *ra);
4aa5b2
@@ -339,6 +341,7 @@ int agmt_set_schedule_from_entry( Repl_Agmt *ra, const Slapi_Entry *e );
4aa5b2
 int agmt_set_timeout_from_entry( Repl_Agmt *ra, const Slapi_Entry *e );
4aa5b2
 int agmt_set_flowcontrolwindow_from_entry(Repl_Agmt *ra, const Slapi_Entry *e);
4aa5b2
 int agmt_set_flowcontrolpause_from_entry(Repl_Agmt *ra, const Slapi_Entry *e);
4aa5b2
+int agmt_set_ignoremissing_from_entry(Repl_Agmt *ra, const Slapi_Entry *e);
4aa5b2
 int agmt_set_busywaittime_from_entry( Repl_Agmt *ra, const Slapi_Entry *e );
4aa5b2
 int agmt_set_pausetime_from_entry( Repl_Agmt *ra, const Slapi_Entry *e );
4aa5b2
 int agmt_set_credentials_from_entry( Repl_Agmt *ra, const Slapi_Entry *e );
4aa5b2
@@ -385,6 +388,7 @@ int agmt_set_enabled_from_entry(Repl_Agmt *ra, Slapi_Entry *e, char *returntext)
4aa5b2
 char **agmt_get_attrs_to_strip(Repl_Agmt *ra);
4aa5b2
 int agmt_set_attrs_to_strip(Repl_Agmt *ra, Slapi_Entry *e);
4aa5b2
 int agmt_set_timeout(Repl_Agmt *ra, long timeout);
4aa5b2
+int agmt_set_ignoremissing(Repl_Agmt *ra, long ignoremissing);
4aa5b2
 void agmt_update_done(Repl_Agmt *ra, int is_total);
4aa5b2
 PRUint64 agmt_get_protocol_timeout(Repl_Agmt *agmt);
4aa5b2
 void agmt_set_protocol_timeout(Repl_Agmt *agmt, PRUint64 timeout);
4aa5b2
diff --git a/ldap/servers/plugins/replication/repl5_agmt.c b/ldap/servers/plugins/replication/repl5_agmt.c
4aa5b2
index 52cc8b6..b089f97 100644
4aa5b2
--- a/ldap/servers/plugins/replication/repl5_agmt.c
4aa5b2
+++ b/ldap/servers/plugins/replication/repl5_agmt.c
4aa5b2
@@ -124,6 +124,7 @@ typedef struct repl5agmt {
4aa5b2
 	long flowControlPause; /* When nb of not acknowledged entries overpass totalUpdateWindow
4aa5b2
 	                        * This is the duration (in msec) that the RA will pause before sending the next entry
4aa5b2
 	                        */
4aa5b2
+	long ignoreMissingChange;	/* if set replication will try to continue even if change cannot be found in changelog */
4aa5b2
 	Slapi_RWLock *attr_lock; /* RW lock for all the stripped attrs */
4aa5b2
 	int WaitForAsyncResults; /* Pass to DS_Sleep(PR_MillisecondsToInterval(WaitForAsyncResults))
4aa5b2
 	                          * in repl5_inc_waitfor_async_results */
4aa5b2
@@ -137,6 +138,7 @@ static int get_agmt_status(Slapi_PBlock *pb, Slapi_Entry* e,
4aa5b2
 static int agmt_set_bind_method_no_lock(Repl_Agmt *ra, const Slapi_Entry *e);
4aa5b2
 static int agmt_set_transportinfo_no_lock(Repl_Agmt *ra, const Slapi_Entry *e);
4aa5b2
 static ReplicaId agmt_maxcsn_get_rid(char *maxcsn);
4aa5b2
+static void agmt_replica_reset_ignoremissing (const Repl_Agmt *agmt);
4aa5b2
 
4aa5b2
 /*
4aa5b2
 Schema for replication agreement:
4aa5b2
@@ -349,6 +351,21 @@ agmt_new_from_entry(Slapi_Entry *e)
4aa5b2
 		}
4aa5b2
 	}
4aa5b2
 
4aa5b2
+	/* continue on missing change ? */
4aa5b2
+	ra->ignoreMissingChange = 0;
4aa5b2
+	tmpstr = slapi_entry_attr_get_charptr(e, type_replicaIgnoreMissingChange);
4aa5b2
+	if (NULL != tmpstr)
4aa5b2
+	{
4aa5b2
+		if (strcasecmp(tmpstr,"off") == 0 || strcasecmp(tmpstr,"never") == 0) {
4aa5b2
+			ra->ignoreMissingChange = 0;
4aa5b2
+		} else if (strcasecmp(tmpstr,"on") == 0 || strcasecmp(tmpstr,"once") == 0) {
4aa5b2
+			ra->ignoreMissingChange = 1;
4aa5b2
+		} else if (strcasecmp(tmpstr,"always") == 0) {
4aa5b2
+			ra->ignoreMissingChange = -1;
4aa5b2
+		}
4aa5b2
+		slapi_ch_free_string(&tmpstr);
4aa5b2
+	}
4aa5b2
+
4aa5b2
 	/* DN of entry at root of replicated area */
4aa5b2
 	tmpstr = slapi_entry_attr_get_charptr(e, type_nsds5ReplicaRoot);
4aa5b2
 	if (NULL != tmpstr)
4aa5b2
@@ -1131,6 +1148,16 @@ agmt_get_flowcontrolpause(const Repl_Agmt *ra)
4aa5b2
 	PR_Unlock(ra->lock);
4aa5b2
 	return return_value;
4aa5b2
 }
4aa5b2
+long
4aa5b2
+agmt_get_ignoremissing(const Repl_Agmt *ra)
4aa5b2
+{
4aa5b2
+	long return_value;
4aa5b2
+	PR_ASSERT(NULL != ra);
4aa5b2
+	PR_Lock(ra->lock);
4aa5b2
+	return_value = ra->ignoreMissingChange;
4aa5b2
+	PR_Unlock(ra->lock);
4aa5b2
+	return return_value;
4aa5b2
+}
4aa5b2
 /*
4aa5b2
  * Warning - reference to the long name of the agreement is returned.
4aa5b2
  * The long name of an agreement is the DN of the agreement entry,
4aa5b2
@@ -1996,6 +2023,48 @@ agmt_set_flowcontrolpause_from_entry(Repl_Agmt *ra, const Slapi_Entry *e)
4aa5b2
 	}
4aa5b2
 	return return_value;
4aa5b2
 }
4aa5b2
+/* add comment here */
4aa5b2
+int
4aa5b2
+agmt_set_ignoremissing_from_entry(Repl_Agmt *ra, const Slapi_Entry *e)
4aa5b2
+{
4aa5b2
+	Slapi_Attr *sattr = NULL;
4aa5b2
+	int return_value = -1;
4aa5b2
+
4aa5b2
+	PR_ASSERT(NULL != ra);
4aa5b2
+	PR_Lock(ra->lock);
4aa5b2
+	if (ra->stop_in_progress)
4aa5b2
+	{
4aa5b2
+		PR_Unlock(ra->lock);
4aa5b2
+		return return_value;
4aa5b2
+	}
4aa5b2
+
4aa5b2
+	slapi_entry_attr_find(e, type_replicaIgnoreMissingChange, &sattr);
4aa5b2
+	if (NULL != sattr)
4aa5b2
+	{
4aa5b2
+		Slapi_Value *sval = NULL;
4aa5b2
+		slapi_attr_first_value(sattr, &sval);
4aa5b2
+		if (NULL != sval)
4aa5b2
+		{
4aa5b2
+			const char *tmpval = slapi_value_get_string(sval);
4aa5b2
+			if (strcasecmp(tmpval,"off") == 0 || strcasecmp(tmpval,"never") == 0) {
4aa5b2
+				ra->ignoreMissingChange = 0;
4aa5b2
+				return_value = 0;
4aa5b2
+			} else if (strcasecmp(tmpval,"on") == 0 || strcasecmp(tmpval,"once") == 0) {
4aa5b2
+				ra->ignoreMissingChange = 1;
4aa5b2
+				return_value = 0;
4aa5b2
+			} else if (strcasecmp(tmpval,"always") == 0) {
4aa5b2
+				ra->ignoreMissingChange = -1;
4aa5b2
+				return_value = 0;
4aa5b2
+			}
4aa5b2
+		}
4aa5b2
+	}
4aa5b2
+	PR_Unlock(ra->lock);
4aa5b2
+	if (return_value == 0)
4aa5b2
+	{
4aa5b2
+		prot_notify_agmt_changed(ra->protocol, ra->long_name);
4aa5b2
+	}
4aa5b2
+	return return_value;
4aa5b2
+}
4aa5b2
 
4aa5b2
 int
4aa5b2
 agmt_set_timeout(Repl_Agmt *ra, long timeout)
4aa5b2
@@ -2036,6 +2105,20 @@ agmt_set_flowcontrolpause(Repl_Agmt *ra, long pause)
4aa5b2
 
4aa5b2
     return 0;
4aa5b2
 }
4aa5b2
+int
4aa5b2
+agmt_set_ignoremissing(Repl_Agmt *ra, long ignoremissing)
4aa5b2
+{
4aa5b2
+    PR_Lock(ra->lock);
4aa5b2
+    if (ra->stop_in_progress){
4aa5b2
+        PR_Unlock(ra->lock);
4aa5b2
+        return -1;
4aa5b2
+    }
4aa5b2
+    ra->ignoreMissingChange = ignoremissing;
4aa5b2
+    PR_Unlock(ra->lock);
4aa5b2
+    /* if reset to 0 update the entry */
4aa5b2
+    agmt_replica_reset_ignoremissing(ra);
4aa5b2
+    return 0;
4aa5b2
+}
4aa5b2
 
4aa5b2
 /*
4aa5b2
  * Set or reset the busywaittime
4aa5b2
@@ -2272,6 +2355,37 @@ agmt_replica_init_done (const Repl_Agmt *agmt)
4aa5b2
     slapi_pblock_destroy (pb);
4aa5b2
 }
4aa5b2
 
4aa5b2
+
4aa5b2
+/* delete nsds5replicaIgnoreMissingChange attribute */
4aa5b2
+static void
4aa5b2
+agmt_replica_reset_ignoremissing (const Repl_Agmt *agmt)
4aa5b2
+{
4aa5b2
+    int rc;
4aa5b2
+    Slapi_PBlock *pb = slapi_pblock_new ();
4aa5b2
+    LDAPMod *mods [2];
4aa5b2
+    LDAPMod mod;
4aa5b2
+
4aa5b2
+    mods[0] = &mod;
4aa5b2
+    mods[1] = NULL;
4aa5b2
+    mod.mod_op = LDAP_MOD_DELETE | LDAP_MOD_BVALUES;
4aa5b2
+    mod.mod_type = (char*)type_replicaIgnoreMissingChange;
4aa5b2
+    mod.mod_bvalues = NULL;
4aa5b2
+
4aa5b2
+    slapi_modify_internal_set_pb_ext(pb, agmt->dn, mods, NULL/* controls */,
4aa5b2
+          NULL/* uniqueid */, repl_get_plugin_identity (PLUGIN_MULTIMASTER_REPLICATION), 0/* flags */);
4aa5b2
+    slapi_modify_internal_pb (pb);
4aa5b2
+
4aa5b2
+    slapi_pblock_get(pb, SLAPI_PLUGIN_INTOP_RESULT, &rc);
4aa5b2
+    if (rc != LDAP_SUCCESS && rc != LDAP_NO_SUCH_ATTRIBUTE)
4aa5b2
+    {
4aa5b2
+        slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "agmt_replica_ignoremissing: "
4aa5b2
+                        "failed to remove (%s) attribute from (%s) entry; LDAP error - %d\n",
4aa5b2
+                        type_replicaIgnoreMissingChange, slapi_sdn_get_ndn (agmt->dn), rc);
4aa5b2
+    }
4aa5b2
+
4aa5b2
+    slapi_pblock_destroy (pb);
4aa5b2
+}
4aa5b2
+
4aa5b2
 /* Agreement object is acquired on behalf of the caller.
4aa5b2
    The caller is responsible for releasing the object
4aa5b2
    when it is no longer used */
4aa5b2
diff --git a/ldap/servers/plugins/replication/repl5_agmtlist.c b/ldap/servers/plugins/replication/repl5_agmtlist.c
4aa5b2
index f50862f..2e6a7de 100644
4aa5b2
--- a/ldap/servers/plugins/replication/repl5_agmtlist.c
4aa5b2
+++ b/ldap/servers/plugins/replication/repl5_agmtlist.c
4aa5b2
@@ -399,6 +399,19 @@ agmtlist_modify_callback(Slapi_PBlock *pb, Slapi_Entry *entryBefore, Slapi_Entry
4aa5b2
 			}
4aa5b2
 		}
4aa5b2
 		else if (slapi_attr_types_equivalent(mods[i]->mod_type,
4aa5b2
+					type_replicaIgnoreMissingChange))
4aa5b2
+		{
4aa5b2
+			/* New replica timeout */
4aa5b2
+			if (agmt_set_ignoremissing_from_entry(agmt, e) != 0)
4aa5b2
+			{
4aa5b2
+				slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "agmtlist_modify_callback - "
4aa5b2
+						"Failed to update the ignorMissingChange attribute for agreement %s\n",
4aa5b2
+						agmt_get_long_name(agmt));
4aa5b2
+				*returncode = LDAP_OPERATIONS_ERROR;
4aa5b2
+				rc = SLAPI_DSE_CALLBACK_ERROR;
4aa5b2
+			}
4aa5b2
+		}
4aa5b2
+		else if (slapi_attr_types_equivalent(mods[i]->mod_type,
4aa5b2
 					type_nsds5ReplicaBusyWaitTime))
4aa5b2
 		{
4aa5b2
 			/* New replica busywaittime */
4aa5b2
diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c
4aa5b2
index d1de6c5..5ab865a 100644
4aa5b2
--- a/ldap/servers/plugins/replication/repl5_inc_protocol.c
4aa5b2
+++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c
4aa5b2
@@ -1706,16 +1706,18 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu
4aa5b2
 				agmt_get_long_name(prp->agmt));
4aa5b2
 			agmt_set_last_update_status(prp->agmt, 0, NSDS50_REPL_CL_ERROR,
4aa5b2
 				"Data required to update replica has been purged from the changelog. "
4aa5b2
-				"The replica must be reinitialized.");
4aa5b2
-			return_value = UPDATE_FATAL_ERROR;
4aa5b2
+				"If the error persists the replica must be reinitialized.");
4aa5b2
+			return_value = UPDATE_TRANSIENT_ERROR;
4aa5b2
 			break;
4aa5b2
 		case CL5_MISSING_DATA:   /* data should be in the changelog, but is missing */
4aa5b2
 			slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name,
4aa5b2
-				"%s: Missing data encountered\n",
4aa5b2
+				"send_updates - %s: Missing data encountered. "
4aa5b2
+				"If the error persists the replica must be reinitialized.\n",
4aa5b2
 				agmt_get_long_name(prp->agmt));
4aa5b2
 			agmt_set_last_update_status(prp->agmt, 0, NSDS50_REPL_CL_ERROR,
4aa5b2
-				"Changelog data is missing");
4aa5b2
-			return_value = UPDATE_FATAL_ERROR;
4aa5b2
+				"Changelog data is missing. "
4aa5b2
+				"If the error persists the replica must be reinitialized.");
4aa5b2
+			return_value = UPDATE_TRANSIENT_ERROR;
4aa5b2
 			break;
4aa5b2
 		case CL5_UNKNOWN_ERROR:   /* unclassified error */
4aa5b2
 			slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name,
4aa5b2
diff --git a/ldap/servers/plugins/replication/repl_globals.c b/ldap/servers/plugins/replication/repl_globals.c
4aa5b2
index 8b891fb..ab85e7f 100644
4aa5b2
--- a/ldap/servers/plugins/replication/repl_globals.c
4aa5b2
+++ b/ldap/servers/plugins/replication/repl_globals.c
4aa5b2
@@ -114,6 +114,7 @@ const char *type_nsds5ReplicaStripAttrs = "nsds5ReplicaStripAttrs";
4aa5b2
 const char* type_nsds5ReplicaFlowControlWindow = "nsds5ReplicaFlowControlWindow";
4aa5b2
 const char* type_nsds5ReplicaFlowControlPause = "nsds5ReplicaFlowControlPause";
4aa5b2
 const char *type_nsds5WaitForAsyncResults = "nsds5ReplicaWaitForAsyncResults";
4aa5b2
+const char* type_replicaIgnoreMissingChange = "nsds5ReplicaIgnoreMissingChange";
4aa5b2
 
4aa5b2
 /* windows sync specific attributes */
4aa5b2
 const char *type_nsds7WindowsReplicaArea = "nsds7WindowsReplicaSubtree";
4aa5b2
-- 
4aa5b2
2.4.11
4aa5b2