From c160271fccf19f1ca90253bcbb81df7831e76f88 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 02 2016 13:53:43 +0000 Subject: import 389-ds-base-1.3.4.0-33.el7_2 --- diff --git a/SOURCES/0095-Ticket-48766-Replication-changelog-can-incorrectly-s.patch b/SOURCES/0095-Ticket-48766-Replication-changelog-can-incorrectly-s.patch new file mode 100644 index 0000000..c6fda99 --- /dev/null +++ b/SOURCES/0095-Ticket-48766-Replication-changelog-can-incorrectly-s.patch @@ -0,0 +1,744 @@ +From a39e2b7cba91b9f13fe54123b7e8b510bf5bcee8 Mon Sep 17 00:00:00 2001 +From: Ludwig Krispenz +Date: Wed, 8 Jun 2016 11:28:07 +0200 +Subject: [PATCH 95/99] Ticket 48766 - Replication changelog can incorrectly + skip over updates + +Bug Description: + The changelog iterator uses a buffer to load and send changes, when the buffer is empty + there were scenarios when the straing point for reloading the buffer was incorrectly set + and changes were skipped + +Fix Description: reworked clcach buffer code following design at + http://www.port389.org/docs/389ds/design/changelog-processing-in-repl-state-sending-updates.html + +https://fedorahosted.org/389/ticket/48766 + +Reviewed by: Mark and Thierry, thanks + +(cherry picked from commit b08df71aa9eb18572f58e55e8d6b9ef7fe181773) +(cherry picked from commit ec15a75ccdba713e4d74dcd760e3244ba43b6191) +--- + ldap/servers/plugins/replication/cl5_api.c | 171 +++------------ + ldap/servers/plugins/replication/cl5_clcache.c | 292 +++++++++++++++---------- + ldap/servers/plugins/replication/cl5_clcache.h | 2 +- + 3 files changed, 214 insertions(+), 251 deletions(-) + +diff --git a/ldap/servers/plugins/replication/cl5_api.c b/ldap/servers/plugins/replication/cl5_api.c +index ae23353..3adaf86 100644 +--- a/ldap/servers/plugins/replication/cl5_api.c ++++ b/ldap/servers/plugins/replication/cl5_api.c +@@ -5489,18 +5489,13 @@ static int _cl5PositionCursorForReplay (ReplicaId consumerRID, const RUV *consum + { + CLC_Buffer *clcache = NULL; + CL5DBFile *file; +- int i; +- CSN **csns = NULL; + CSN *startCSN = NULL; +- CSN *minCSN = NULL; + char csnStr [CSN_STRSIZE]; + int rc = CL5_SUCCESS; + Object *supplierRuvObj = NULL; + RUV *supplierRuv = NULL; +- PRBool newReplica; + PRBool haveChanges = PR_FALSE; + char *agmt_name; +- ReplicaId rid; + + PR_ASSERT (consumerRuv && replica && fileObj && iterator); + csnStr[0] = '\0'; +@@ -5528,111 +5523,32 @@ static int _cl5PositionCursorForReplay (ReplicaId consumerRID, const RUV *consum + ruv_dump (supplierRuv, agmt_name, NULL); + } + +- /* +- * get the sorted list of SupplierMinCSN (if no ConsumerMaxCSN) +- * and ConsumerMaxCSN for those RIDs where consumer is not +- * up-to-date. +- */ +- csns = cl5BuildCSNList (consumerRuv, supplierRuv); +- if (csns == NULL) +- { +- rc = CL5_NOTFOUND; +- goto done; +- } + +- /* iterate over elements of consumer's (and/or supplier's) ruv */ +- for (i = 0; csns[i]; i++) +- { +- CSN *consumerMaxCSN = NULL; +- +- rid = csn_get_replicaid(csns[i]); +- +- /* +- * Skip CSN that is originated from the consumer. +- * If RID==65535, the CSN is originated from a +- * legacy consumer. In this case the supplier +- * and the consumer may have the same RID. +- */ +- if ((rid == consumerRID && rid != MAX_REPLICA_ID) || (is_cleaned_rid(rid)) ) +- continue; ++ /* initialize the changelog buffer and do the initial load */ + +- startCSN = csns[i]; ++ rc = clcache_get_buffer ( &clcache, file->db, consumerRID, consumerRuv, supplierRuv ); ++ if ( rc != 0 ) goto done; + +- rc = clcache_get_buffer ( &clcache, file->db, consumerRID, consumerRuv, supplierRuv ); +- if ( rc != 0 ) goto done; +- +- /* This is the first loading of this iteration. For replicas +- * already known to the consumer, we exclude the last entry +- * sent to the consumer by using DB_NEXT. However, for +- * replicas new to the consumer, we include the first change +- * ever generated by that replica. +- */ +- newReplica = ruv_get_largest_csn_for_replica (consumerRuv, rid, &consumerMaxCSN); +- csn_free(&consumerMaxCSN); +- rc = clcache_load_buffer (clcache, startCSN, (newReplica ? DB_SET : DB_NEXT)); +- +- /* there is a special case which can occur just after migration - in this case, +- the consumer RUV will contain the last state of the supplier before migration, +- but the supplier will have an empty changelog, or the supplier changelog will +- not contain any entries within the consumer min and max CSN - also, since +- the purge RUV contains no CSNs, the changelog has never been purged +- ASSUMPTIONS - it is assumed that the supplier had no pending changes to send +- to any consumers; that is, we can assume that no changes were lost due to +- either changelog purging or database reload - bug# 603061 - richm@netscape.com +- */ +- if ((rc == DB_NOTFOUND) && !ruv_has_csns(file->purgeRUV)) +- { +- char mincsnStr[CSN_STRSIZE]; +- +- /* use the supplier min csn for the buffer start csn - we know +- this csn is in our changelog */ +- if ((RUV_SUCCESS == ruv_get_min_csn_ext(supplierRuv, &minCSN, 1 /* ignore cleaned rids */)) && +- minCSN) +- { /* must now free startCSN */ +- if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) { +- csn_as_string(startCSN, PR_FALSE, csnStr); +- csn_as_string(minCSN, PR_FALSE, mincsnStr); +- slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, +- "%s: CSN %s not found and no purging, probably a reinit\n", +- agmt_name, csnStr); +- slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, +- "%s: Will try to use supplier min CSN %s to load changelog\n", +- agmt_name, mincsnStr); +- } +- startCSN = minCSN; +- rc = clcache_load_buffer (clcache, startCSN, DB_SET); +- } +- else +- { +- if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) { +- csn_as_string(startCSN, PR_FALSE, csnStr); +- slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, +- "%s: CSN %s not found and no purging, probably a reinit\n", +- agmt_name, csnStr); +- slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, +- "%s: Could not get the min csn from the supplier RUV\n", +- agmt_name); +- } +- rc = CL5_RUV_ERROR; +- goto done; +- } +- } ++ rc = clcache_load_buffer (clcache, &startCSN); + + if (rc == 0) { +- haveChanges = PR_TRUE; +- rc = CL5_SUCCESS; +- if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) { +- csn_as_string(startCSN, PR_FALSE, csnStr); +- slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, +- "%s: CSN %s found, position set for replay\n", agmt_name, csnStr); +- } +- if (startCSN != csns[i]) { +- csn_free(&startCSN); +- } +- break; ++ haveChanges = PR_TRUE; ++ rc = CL5_SUCCESS; ++ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) { ++ csn_as_string(startCSN, PR_FALSE, csnStr); ++ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl, ++ "%s: CSN %s found, position set for replay\n", agmt_name, csnStr); ++ } + } +- else if (rc == DB_NOTFOUND) /* entry not found */ +- { ++ else if (rc == DB_NOTFOUND) { ++ /* buffer not loaded. ++ * either because no changes have to be sent ==> startCSN is NULL ++ * or the calculated startCSN cannot be found in the changelog ++ */ ++ if (startCSN == NULL) { ++ rc = CL5_NOTFOUND; ++ goto done; ++ } + /* check whether this csn should be present */ + rc = _cl5CheckMissingCSN (startCSN, supplierRuv, file); + if (rc == CL5_MISSING_DATA) /* we should have had the change but we don't */ +@@ -5650,17 +5566,6 @@ static int _cl5PositionCursorForReplay (ReplicaId consumerRID, const RUV *consum + "%s: CSN %s not found, we aren't as up to date, or we purged\n", + agmt_name, csnStr); + } +- if (startCSN != csns[i]) { +- csn_free(&startCSN); +- } +- if (rc == CL5_MISSING_DATA) /* we should have had the change but we don't */ +- { +- break; +- } +- else /* we are not as up to date or we purged */ +- { +- continue; +- } + } + else + { +@@ -5669,34 +5574,29 @@ static int _cl5PositionCursorForReplay (ReplicaId consumerRID, const RUV *consum + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "%s: Failed to retrieve change with CSN %s; db error - %d %s\n", + agmt_name, csnStr, rc, db_strerror(rc)); +- if (startCSN != csns[i]) { +- csn_free(&startCSN); +- } + + rc = CL5_DB_ERROR; +- break; +- } ++ } + +- } /* end for */ + + /* setup the iterator */ + if (haveChanges) + { +- *iterator = (CL5ReplayIterator*) slapi_ch_calloc (1, sizeof (CL5ReplayIterator)); ++ *iterator = (CL5ReplayIterator*) slapi_ch_calloc (1, sizeof (CL5ReplayIterator)); + +- if (*iterator == NULL) +- { +- slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, ++ if (*iterator == NULL) ++ { ++ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl, + "%s: _cl5PositionCursorForReplay: failed to allocate iterator\n", agmt_name); +- rc = CL5_MEMORY_ERROR; +- goto done; +- } ++ rc = CL5_MEMORY_ERROR; ++ goto done; ++ } + + /* ONREPL - should we make a copy of both RUVs here ?*/ +- (*iterator)->fileObj = fileObj; +- (*iterator)->clcache = clcache; clcache = NULL; +- (*iterator)->consumerRID = consumerRID; +- (*iterator)->consumerRuv = consumerRuv; ++ (*iterator)->fileObj = fileObj; ++ (*iterator)->clcache = clcache; clcache = NULL; ++ (*iterator)->consumerRID = consumerRID; ++ (*iterator)->consumerRuv = consumerRuv; + (*iterator)->supplierRuvObj = supplierRuvObj; + } + else if (rc == CL5_SUCCESS) +@@ -5706,11 +5606,8 @@ static int _cl5PositionCursorForReplay (ReplicaId consumerRID, const RUV *consum + } + + done: +- if ( clcache ) +- clcache_return_buffer ( &clcache ); +- +- if (csns) +- cl5DestroyCSNList (&csns); ++ if ( clcache ) ++ clcache_return_buffer ( &clcache ); + + if (rc != CL5_SUCCESS) + { +diff --git a/ldap/servers/plugins/replication/cl5_clcache.c b/ldap/servers/plugins/replication/cl5_clcache.c +index b53d7c0..2d3bb28 100644 +--- a/ldap/servers/plugins/replication/cl5_clcache.c ++++ b/ldap/servers/plugins/replication/cl5_clcache.c +@@ -39,6 +39,7 @@ + #define DEFAULT_CLC_BUFFER_COUNT_MAX 0 + #define DEFAULT_CLC_BUFFER_PAGE_COUNT 32 + #define DEFAULT_CLC_BUFFER_PAGE_SIZE 1024 ++#define WORK_CLC_BUFFER_PAGE_SIZE 8*DEFAULT_CLC_BUFFER_PAGE_SIZE + + enum { + CLC_STATE_READY = 0, /* ready to iterate */ +@@ -56,8 +57,9 @@ struct csn_seq_ctrl_block { + ReplicaId rid; /* RID this block serves */ + CSN *consumer_maxcsn; /* Don't send CSN <= this */ + CSN *local_maxcsn; /* Don't send CSN > this */ +- CSN *prev_local_maxcsn; /* */ +- int state; /* CLC_STATE_* */ ++ CSN *prev_local_maxcsn; /* Copy of last state at buffer loading */ ++ CSN *local_mincsn; /* Used to determin anchor csn*/ ++ int state; /* CLC_STATE_* */ + }; + + /* +@@ -70,6 +72,8 @@ struct clc_buffer { + ReplicaId buf_consumer_rid; /* help checking threshold csn */ + const RUV *buf_consumer_ruv; /* used to skip change */ + const RUV *buf_local_ruv; /* used to refresh local_maxcsn */ ++ int buf_ignoreConsumerRID; /* how to handle updates from consumer */ ++ int buf_load_cnt; /* number of loads for session */ + + /* + * fields for retriving data from DB +@@ -90,7 +94,6 @@ struct clc_buffer { + int buf_max_cscbs; + + /* fields for debugging stat */ +- int buf_load_cnt; /* number of loads for session */ + int buf_record_cnt; /* number of changes for session */ + int buf_record_skipped; /* number of changes skipped */ + int buf_skipped_new_rid; /* number of changes skipped due to new_rid */ +@@ -133,7 +136,8 @@ struct clc_pool { + static struct clc_pool *_pool = NULL; /* process's buffer pool */ + + /* static prototypes */ +-static int clcache_adjust_anchorcsn ( CLC_Buffer *buf ); ++static int clcache_initial_anchorcsn ( CLC_Buffer *buf, int *flag ); ++static int clcache_adjust_anchorcsn ( CLC_Buffer *buf, int *flag ); + static void clcache_refresh_consumer_maxcsns ( CLC_Buffer *buf ); + static int clcache_refresh_local_maxcsns ( CLC_Buffer *buf ); + static int clcache_skip_change ( CLC_Buffer *buf ); +@@ -251,8 +255,23 @@ clcache_get_buffer ( CLC_Buffer **buf, DB *db, ReplicaId consumer_rid, const RUV + } + + if ( NULL != *buf ) { ++ CSN *c_csn = NULL; ++ CSN *l_csn = NULL; + (*buf)->buf_consumer_ruv = consumer_ruv; + (*buf)->buf_local_ruv = local_ruv; ++ (*buf)->buf_load_flag = DB_MULTIPLE_KEY; ++ ruv_get_largest_csn_for_replica (consumer_ruv, consumer_rid, &c_csn); ++ ruv_get_largest_csn_for_replica (local_ruv, consumer_rid, &l_csn); ++ if (l_csn && csn_compare(l_csn, c_csn) > 0) { ++ /* the supplier has updates for the consumer RID and ++ * these updates are newer than on the consumer ++ */ ++ (*buf)->buf_ignoreConsumerRID = 0; ++ } else { ++ (*buf)->buf_ignoreConsumerRID = 1; ++ } ++ csn_free(&c_csn); ++ csn_free(&l_csn); + } + else { + slapi_log_error ( SLAPI_LOG_FATAL, get_thread_private_agmtname(), +@@ -305,36 +324,25 @@ clcache_return_buffer ( CLC_Buffer **buf ) + * historic reason. + */ + int +-clcache_load_buffer ( CLC_Buffer *buf, CSN *anchorcsn, int flag ) ++clcache_load_buffer ( CLC_Buffer *buf, CSN **anchorCSN ) + { + int rc = 0; ++ int flag = DB_NEXT; + ++ if (anchorCSN) *anchorCSN = NULL; + clcache_refresh_local_maxcsns ( buf ); + +- /* Set the loading key */ +- if ( anchorcsn ) { ++ if (buf->buf_load_cnt == 0 ) { + clcache_refresh_consumer_maxcsns ( buf ); +- buf->buf_load_flag = DB_MULTIPLE_KEY; +- csn_as_string ( anchorcsn, 0, (char*)buf->buf_key.data ); +- slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name, +- "session start: anchorcsn=%s\n", (char*)buf->buf_key.data ); +- } +- else if ( csn_get_time(buf->buf_current_csn) == 0 ) { +- /* time == 0 means this csn has never been set */ +- rc = DB_NOTFOUND; +- } +- else if ( clcache_adjust_anchorcsn ( buf ) != 0 ) { +- rc = DB_NOTFOUND; +- } +- else { +- csn_as_string ( buf->buf_current_csn, 0, (char*)buf->buf_key.data ); +- slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name, +- "load next: anchorcsn=%s\n", (char*)buf->buf_key.data ); ++ rc = clcache_initial_anchorcsn ( buf, &flag ); ++ } else { ++ rc = clcache_adjust_anchorcsn ( buf, &flag ); + } + + if ( rc == 0 ) { + + buf->buf_state = CLC_STATE_READY; ++ if (anchorCSN) *anchorCSN = buf->buf_current_csn; + rc = clcache_load_buffer_bulk ( buf, flag ); + + /* Reset some flag variables */ +@@ -344,21 +352,15 @@ clcache_load_buffer ( CLC_Buffer *buf, CSN *anchorcsn, int flag ) + buf->buf_cscbs[i]->state = CLC_STATE_READY; + } + } +- else if ( anchorcsn ) { +- /* Report error only when the missing is persistent */ +- if ( buf->buf_missing_csn && csn_compare (buf->buf_missing_csn, anchorcsn) == 0 ) { +- if (!buf->buf_prev_missing_csn || csn_compare (buf->buf_prev_missing_csn, anchorcsn)) { +- slapi_log_error ( SLAPI_LOG_FATAL, buf->buf_agmt_name, +- "Can't locate CSN %s in the changelog (DB rc=%d). If replication stops, the consumer may need to be reinitialized.\n", +- (char*)buf->buf_key.data, rc ); +- csn_dup_or_init_by_csn (&buf->buf_prev_missing_csn, anchorcsn); +- } +- } +- else { +- csn_dup_or_init_by_csn (&buf->buf_missing_csn, anchorcsn); +- } ++ else { ++ slapi_log_error ( SLAPI_LOG_FATAL, buf->buf_agmt_name, ++ "Can't locate CSN %s in the changelog (DB rc=%d). If replication stops, the consumer may need to be reinitialized.\n", ++ (char*)buf->buf_key.data, rc ); + } ++ } else if (rc == CLC_STATE_DONE) { ++ rc = DB_NOTFOUND; + } ++ + if ( rc != 0 ) { + slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name, + "clcache_load_buffer: rc=%d\n", rc ); +@@ -483,7 +485,7 @@ clcache_get_next_change ( CLC_Buffer *buf, void **key, size_t *keylen, void **da + * We're done with the current buffer. Now load the next chunk. + */ + if ( NULL == *key && CLC_STATE_READY == buf->buf_state ) { +- rc = clcache_load_buffer ( buf, NULL, DB_NEXT ); ++ rc = clcache_load_buffer ( buf, NULL ); + if ( 0 == rc && buf->buf_record_ptr ) { + DB_MULTIPLE_KEY_NEXT ( buf->buf_record_ptr, &buf->buf_data, + *key, *keylen, *data, *datalen ); +@@ -521,7 +523,6 @@ clcache_refresh_consumer_maxcsns ( CLC_Buffer *buf ) + int i; + + for ( i = 0; i < buf->buf_num_cscbs; i++ ) { +- csn_free(&buf->buf_cscbs[i]->consumer_maxcsn); + ruv_get_largest_csn_for_replica ( + buf->buf_consumer_ruv, + buf->buf_cscbs[i]->rid, +@@ -538,14 +539,11 @@ clcache_refresh_local_maxcsn ( const ruv_enum_data *rid_data, void *data ) + int i; + + rid = csn_get_replicaid ( rid_data->csn ); +- +- /* +- * No need to create cscb for consumer's RID. +- * If RID==65535, the CSN is originated from a +- * legacy consumer. In this case the supplier +- * and the consumer may have the same RID. ++ /* we do not handle updates originated at the consumer if not required ++ * and we ignore RID which have been cleaned + */ +- if ( rid == buf->buf_consumer_rid && rid != MAX_REPLICA_ID ) ++ if ( (rid == buf->buf_consumer_rid && buf->buf_ignoreConsumerRID) || ++ is_cleaned_rid(rid) ) + return rc; + + for ( i = 0; i < buf->buf_num_cscbs; i++ ) { +@@ -564,9 +562,20 @@ clcache_refresh_local_maxcsn ( const ruv_enum_data *rid_data, void *data ) + } + buf->buf_cscbs[i]->rid = rid; + buf->buf_num_cscbs++; ++ /* this is the first time we have a local change for the RID ++ * we need to check what the consumer knows about it. ++ */ ++ ruv_get_largest_csn_for_replica ( ++ buf->buf_consumer_ruv, ++ buf->buf_cscbs[i]->rid, ++ &buf->buf_cscbs[i]->consumer_maxcsn ); + } + ++ if (buf->buf_cscbs[i]->local_maxcsn) ++ csn_dup_or_init_by_csn ( &buf->buf_cscbs[i]->prev_local_maxcsn, buf->buf_cscbs[i]->local_maxcsn ); ++ + csn_dup_or_init_by_csn ( &buf->buf_cscbs[i]->local_maxcsn, rid_data->csn ); ++ csn_dup_or_init_by_csn ( &buf->buf_cscbs[i]->local_mincsn, rid_data->min_csn ); + + if ( buf->buf_cscbs[i]->consumer_maxcsn && + csn_compare (buf->buf_cscbs[i]->consumer_maxcsn, rid_data->csn) >= 0 ) { +@@ -580,88 +589,147 @@ clcache_refresh_local_maxcsn ( const ruv_enum_data *rid_data, void *data ) + static int + clcache_refresh_local_maxcsns ( CLC_Buffer *buf ) + { +- int i; + +- for ( i = 0; i < buf->buf_num_cscbs; i++ ) { +- csn_dup_or_init_by_csn ( &buf->buf_cscbs[i]->prev_local_maxcsn, +- buf->buf_cscbs[i]->local_maxcsn ); +- } + return ruv_enumerate_elements ( buf->buf_local_ruv, clcache_refresh_local_maxcsn, buf ); + } + + /* + * Algorithm: + * +- * 1. Snapshot local RUVs; +- * 2. Load buffer; +- * 3. Send to the consumer only those CSNs that are covered +- * by the RUVs snapshot taken in the first step; +- * All CSNs that are covered by the RUVs snapshot taken in the +- * first step are guaranteed in consecutive order for the respected +- * RIDs because of the the CSN pending list control; +- * A CSN that is not covered by the RUVs snapshot may be out of order +- * since it is possible that a smaller CSN might not have committed +- * yet by the time the buffer was loaded. +- * 4. Determine anchorcsn for each RID: +- * +- * Case| Local vs. Buffer | New Local | Next +- * | MaxCSN MaxCSN | MaxCSN | Anchor-CSN +- * ----+-------------------+-----------+---------------- +- * 1 | Cl >= Cb | * | Cb +- * 2 | Cl < Cb | Cl | Cb +- * 3 | Cl < Cb | Cl2 | Cl +- * +- * 5. Determine anchorcsn for next load: ++ * 1. Determine anchorcsn for each RID: ++ * 2. Determine anchorcsn for next load: + * Anchor-CSN = min { all Next-Anchor-CSN, Buffer-MaxCSN } + */ + static int +-clcache_adjust_anchorcsn ( CLC_Buffer *buf ) ++clcache_initial_anchorcsn ( CLC_Buffer *buf, int *flag ) + { + PRBool hasChange = PR_FALSE; + struct csn_seq_ctrl_block *cscb; + int i; ++ CSN *anchorcsn = NULL; + + if ( buf->buf_state == CLC_STATE_READY ) { + for ( i = 0; i < buf->buf_num_cscbs; i++ ) { ++ CSN *rid_anchor = NULL; ++ int rid_flag = DB_NEXT; + cscb = buf->buf_cscbs[i]; + +- if ( cscb->state == CLC_STATE_UP_TO_DATE ) +- continue; ++ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) { ++ char prevmax[CSN_STRSIZE]; ++ char local[CSN_STRSIZE]; ++ char curr[CSN_STRSIZE]; ++ char conmaxcsn[CSN_STRSIZE]; ++ csn_as_string(cscb->prev_local_maxcsn, 0, prevmax); ++ csn_as_string(cscb->local_maxcsn, 0, local); ++ csn_as_string(buf->buf_current_csn, 0, curr); ++ csn_as_string(cscb->consumer_maxcsn, 0, conmaxcsn); ++ slapi_log_error(SLAPI_LOG_REPL, "clcache_initial_anchorcsn" , ++ "%s - (cscb %d - state %d) - csnPrevMax (%s) " ++ "csnMax (%s) csnBuf (%s) csnConsumerMax (%s)\n", ++ buf->buf_agmt_name, i, cscb->state, prevmax, local, ++ curr, conmaxcsn); ++ } + +- /* +- * Case 3 unsafe ruv change: next buffer load should start +- * from where the maxcsn in the old ruv was. Since each +- * cscb has remembered the maxcsn sent to the consumer, +- * CSNs that may be loaded again could easily be skipped. +- */ +- if ( cscb->prev_local_maxcsn && +- csn_compare (cscb->prev_local_maxcsn, buf->buf_current_csn) < 0 && +- csn_compare (cscb->local_maxcsn, cscb->prev_local_maxcsn) != 0 ) { ++ if (cscb->consumer_maxcsn == NULL) { ++ /* the consumer hasn't seen changes for this RID */ ++ rid_anchor = cscb->local_mincsn; ++ rid_flag = DB_SET; ++ } else if ( csn_compare (cscb->local_maxcsn, cscb->consumer_maxcsn) > 0 ) { ++ rid_anchor = cscb->consumer_maxcsn; ++ } ++ ++ if (rid_anchor && (anchorcsn == NULL || ++ ( csn_compare(rid_anchor, anchorcsn) < 0))) { ++ anchorcsn = rid_anchor; ++ *flag = rid_flag; + hasChange = PR_TRUE; +- cscb->state = CLC_STATE_READY; +- csn_init_by_csn ( buf->buf_current_csn, cscb->prev_local_maxcsn ); +- csn_as_string ( cscb->prev_local_maxcsn, 0, (char*)buf->buf_key.data ); +- slapi_log_error ( SLAPI_LOG_REPL, buf->buf_agmt_name, +- "adjust anchor csn upon %s\n", +- ( cscb->state == CLC_STATE_CSN_GT_RUV ? "out of sequence csn" : "unsafe ruv change") ); +- continue; + } + +- /* +- * check if there are still changes to send for this RID +- * Assume we had compared the local maxcsn and the consumer +- * max csn before this function was called and hence the +- * cscb->state had been set accordingly. +- */ +- if ( hasChange == PR_FALSE && +- csn_compare (cscb->local_maxcsn, buf->buf_current_csn) > 0 ) { ++ ++ } ++ } ++ ++ if ( !hasChange ) { ++ buf->buf_state = CLC_STATE_DONE; ++ } else { ++ csn_init_by_csn(buf->buf_current_csn, anchorcsn); ++ csn_as_string(buf->buf_current_csn, 0, (char *)buf->buf_key.data); ++ slapi_log_error(SLAPI_LOG_REPL, "clcache_initial_anchorcsn", ++ "anchor is now: %s\n", (char *)buf->buf_key.data); ++ } ++ ++ return buf->buf_state; ++} ++ ++static int ++clcache_adjust_anchorcsn ( CLC_Buffer *buf, int *flag ) ++{ ++ PRBool hasChange = PR_FALSE; ++ struct csn_seq_ctrl_block *cscb; ++ int i; ++ CSN *anchorcsn = NULL; ++ ++ if ( buf->buf_state == CLC_STATE_READY ) { ++ for ( i = 0; i < buf->buf_num_cscbs; i++ ) { ++ CSN *rid_anchor = NULL; ++ int rid_flag = DB_NEXT; ++ cscb = buf->buf_cscbs[i]; ++ ++ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) { ++ char prevmax[CSN_STRSIZE]; ++ char local[CSN_STRSIZE]; ++ char curr[CSN_STRSIZE]; ++ char conmaxcsn[CSN_STRSIZE]; ++ csn_as_string(cscb->prev_local_maxcsn, 0, prevmax); ++ csn_as_string(cscb->local_maxcsn, 0, local); ++ csn_as_string(buf->buf_current_csn, 0, curr); ++ csn_as_string(cscb->consumer_maxcsn, 0, conmaxcsn); ++ slapi_log_error(SLAPI_LOG_REPL, "clcache_adjust_anchorcsn" , ++ "%s - (cscb %d - state %d) - csnPrevMax (%s) " ++ "csnMax (%s) csnBuf (%s) csnConsumerMax (%s)\n", ++ buf->buf_agmt_name, i, cscb->state, prevmax, local, ++ curr, conmaxcsn); ++ } ++ ++ if (csn_compare (cscb->local_maxcsn, cscb->prev_local_maxcsn) == 0 || ++ csn_compare (cscb->prev_local_maxcsn, buf->buf_current_csn) > 0 ) { ++ if (csn_compare (cscb->local_maxcsn, cscb->consumer_maxcsn) > 0 ) { ++ rid_anchor = buf->buf_current_csn; ++ } ++ } else { ++ /* prev local max csn < csnBuffer AND different from local maxcsn */ ++ if (cscb->prev_local_maxcsn == NULL) { ++ if (cscb->consumer_maxcsn == NULL) { ++ /* the consumer hasn't seen changes for this RID */ ++ rid_anchor = cscb->local_mincsn; ++ rid_flag = DB_SET; ++ } else if ( csn_compare (cscb->local_maxcsn, cscb->consumer_maxcsn) > 0 ) { ++ rid_anchor = cscb->consumer_maxcsn; ++ } ++ } else { ++ /* csnPrevMaxSup > 0 */ ++ rid_anchor = cscb->consumer_maxcsn; ++ } ++ } ++ ++ if (rid_anchor && (anchorcsn == NULL || ++ ( csn_compare(rid_anchor, anchorcsn) < 0))) { ++ anchorcsn = rid_anchor; ++ *flag = rid_flag; + hasChange = PR_TRUE; + } ++ ++ + } + } + + if ( !hasChange ) { + buf->buf_state = CLC_STATE_DONE; ++ } else { ++ csn_init_by_csn(buf->buf_current_csn, anchorcsn); ++ csn_as_string(buf->buf_current_csn, 0, (char *)buf->buf_key.data); ++ slapi_log_error(SLAPI_LOG_REPL, "clcache_adjust_anchorcsn", ++ "anchor is now: %s\n", (char *)buf->buf_key.data); + } + + return buf->buf_state; +@@ -675,7 +743,6 @@ clcache_skip_change ( CLC_Buffer *buf ) + int skip = 1; + int i; + char buf_cur_csn_str[CSN_STRSIZE]; +- char oth_csn_str[CSN_STRSIZE]; + + do { + +@@ -688,25 +755,14 @@ clcache_skip_change ( CLC_Buffer *buf ) + * legacy consumer. In this case the supplier + * and the consumer may have the same RID. + */ +- if (rid == buf->buf_consumer_rid && rid != MAX_REPLICA_ID){ +- CSN *cons_maxcsn = NULL; +- +- ruv_get_max_csn(buf->buf_consumer_ruv, &cons_maxcsn); +- if ( csn_compare ( buf->buf_current_csn, cons_maxcsn) > 0 ) { +- /* +- * The consumer must have been "restored" and needs this newer update. +- */ +- skip = 0; +- } else if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) { ++ if (rid == buf->buf_consumer_rid && buf->buf_ignoreConsumerRID){ ++ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) { + csn_as_string(buf->buf_current_csn, 0, buf_cur_csn_str); +- csn_as_string(cons_maxcsn, 0, oth_csn_str); + slapi_log_error(SLAPI_LOG_REPL, buf->buf_agmt_name, +- "Skipping update because the changelog buffer current csn [%s] is " +- "less than or equal to the consumer max csn [%s]\n", +- buf_cur_csn_str, oth_csn_str); ++ "Skipping update because the consumer with Rid: [%d] is " ++ "ignored\n", rid); + buf->buf_skipped_csn_gt_cons_maxcsn++; + } +- csn_free(&cons_maxcsn); + break; + } + +@@ -821,6 +877,7 @@ clcache_free_cscb ( struct csn_seq_ctrl_block ** cscb ) + csn_free ( & (*cscb)->consumer_maxcsn ); + csn_free ( & (*cscb)->local_maxcsn ); + csn_free ( & (*cscb)->prev_local_maxcsn ); ++ csn_free ( & (*cscb)->local_mincsn ); + slapi_ch_free ( (void **) cscb ); + } + +@@ -1003,6 +1060,15 @@ clcache_cursor_get ( DBC *cursor, CLC_Buffer *buf, int flag ) + { + int rc; + ++ if (buf->buf_data.ulen > WORK_CLC_BUFFER_PAGE_SIZE) { ++ /* ++ * The buffer size had to be increased, ++ * reset it to a smaller working size, ++ * if not sufficient it will be increased again ++ */ ++ buf->buf_data.ulen = WORK_CLC_BUFFER_PAGE_SIZE; ++ } ++ + rc = cursor->c_get ( cursor, + & buf->buf_key, + & buf->buf_data, +diff --git a/ldap/servers/plugins/replication/cl5_clcache.h b/ldap/servers/plugins/replication/cl5_clcache.h +index 4c459ab..75b2817 100644 +--- a/ldap/servers/plugins/replication/cl5_clcache.h ++++ b/ldap/servers/plugins/replication/cl5_clcache.h +@@ -23,7 +23,7 @@ typedef struct clc_buffer CLC_Buffer; + int clcache_init ( DB_ENV **dbenv ); + void clcache_set_config (); + int clcache_get_buffer ( CLC_Buffer **buf, DB *db, ReplicaId consumer_rid, const RUV *consumer_ruv, const RUV *local_ruv ); +-int clcache_load_buffer ( CLC_Buffer *buf, CSN *startCSN, int flag ); ++int clcache_load_buffer ( CLC_Buffer *buf, CSN **anchorCSN ); + void clcache_return_buffer ( CLC_Buffer **buf ); + int clcache_get_next_change ( CLC_Buffer *buf, void **key, size_t *keylen, void **data, size_t *datalen, CSN **csn ); + void clcache_destroy (); +-- +2.4.11 + diff --git a/SOURCES/0096-Ticket-47788-Supplier-can-skip-a-failing-update-alth.patch b/SOURCES/0096-Ticket-47788-Supplier-can-skip-a-failing-update-alth.patch new file mode 100644 index 0000000..2367de5 --- /dev/null +++ b/SOURCES/0096-Ticket-47788-Supplier-can-skip-a-failing-update-alth.patch @@ -0,0 +1,446 @@ +From 09cff2c4c01bbcaf45df553869d0b6cb8acfad2b Mon Sep 17 00:00:00 2001 +From: Mark Reynolds +Date: Sun, 17 Jan 2016 18:25:43 -0500 +Subject: [PATCH 96/99] Ticket 47788 - Supplier can skip a failing update, + although it should retry + +Bug Description: If a replicated update fails on the consumer, + the update is never tried. This is due to the + replication async result thread missing the failure + before another update is replicated and it succeeds. + + This second update that succeeds updates the consumer + RUV. This makes it appear that the consumer is caught + up, and the supplier never resends that original + failed update. + +Fix Description: When a replicated update fails, and its an error we can + not ignore, the connection is closed. Which stops the + replication session, and prevents any further updates + coming in and updating the consumer RUV. This allows + the supplier to correctly retry the operation that + failed on the next replication session. + +https://fedorahosted.org/389/ticket/47788 + +Reviewed by: nhosoi, wibrown, and rmeggins (Thanks!!!) + +(cherry picked from commit ab6501a963c94b2b6b5fa8d1924519ef1c26b0bd) +(cherry picked from commit 407c545f07c06520f8378649fc0ac8fe20748dc7) +--- + ldap/servers/plugins/replication/repl5.h | 1 + + .../servers/plugins/replication/repl5_connection.c | 19 +-- + .../plugins/replication/repl5_inc_protocol.c | 182 ++++++++++++--------- + ldap/servers/plugins/replication/repl5_plugins.c | 60 ++++++- + ldap/servers/plugins/replication/urp.c | 2 +- + 5 files changed, 168 insertions(+), 96 deletions(-) + +diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h +index df92ca0..307da82 100644 +--- a/ldap/servers/plugins/replication/repl5.h ++++ b/ldap/servers/plugins/replication/repl5.h +@@ -608,6 +608,7 @@ void replica_incr_agmt_count(Replica *r); + void replica_decr_agmt_count(Replica *r); + PRUint64 replica_get_precise_purging(Replica *r); + void replica_set_precise_purging(Replica *r, PRUint64 on_off); ++PRBool ignore_error_and_keep_going(int error); + + /* The functions below handles the state flag */ + /* Current internal state flags */ +diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c +index 1515ca1..d193938 100644 +--- a/ldap/servers/plugins/replication/repl5_connection.c ++++ b/ldap/servers/plugins/replication/repl5_connection.c +@@ -480,17 +480,17 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda + conn->last_ldap_error = rc; + close_connection_internal(conn); /* we already have the lock */ + return_value = CONN_NOT_CONNECTED; ++ goto done; + } + else if (IS_DISCONNECT_ERROR(err)) + { + conn->last_ldap_error = err; + close_connection_internal(conn); /* we already have the lock */ + return_value = CONN_NOT_CONNECTED; ++ goto done; + } + /* Got a result */ +- if ((rc == LDAP_SUCCESS) && (err == LDAP_BUSY)) +- return_value = CONN_BUSY; +- else if (retoidp) ++ if (retoidp /* total update */) + { + if (!((rc == LDAP_SUCCESS) && (err == LDAP_BUSY))) + { +@@ -519,16 +519,11 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda + } + return_value = LDAP_SUCCESS == conn->last_ldap_error ? CONN_OPERATION_SUCCESS : CONN_OPERATION_FAILED; + } +- /* +- * XXXggood do I need to free matched, referrals, +- * anything else? Or can I pass NULL for the args +- * I'm not interested in? +- */ +- /* Good question! Meanwhile, as RTM aproaches, let's free them... */ +- slapi_ch_free((void **) &errmsg); +- slapi_ch_free((void **) &matched); +- charray_free(referrals); + conn->status = STATUS_CONNECTED; ++done: ++ slapi_ch_free_string(&errmsg); ++ slapi_ch_free_string(&matched); ++ charray_free(referrals); + } + if (res) ldap_msgfree(res); + PR_Unlock(conn->lock); /* release the conn lock */ +diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c +index 244bbb2..927f835 100644 +--- a/ldap/servers/plugins/replication/repl5_inc_protocol.c ++++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c +@@ -146,7 +146,6 @@ static void protocol_sleep(Private_Repl_Protocol *prp, PRIntervalTime duration); + static int send_updates(Private_Repl_Protocol *prp, RUV *ruv, PRUint32 *num_changes_sent); + static void repl5_inc_backoff_expired(time_t timer_fire_time, void *arg); + static int examine_update_vector(Private_Repl_Protocol *prp, RUV *ruv); +-static PRBool ignore_error_and_keep_going(int error); + static const char* state2name (int state); + static const char* event2name (int event); + static const char* op2string (int op); +@@ -450,11 +449,13 @@ repl5_inc_flow_control_results(Repl_Agmt *agmt, result_data *rd) + PR_Unlock(rd->lock); + } + +-static void ++static int + repl5_inc_waitfor_async_results(result_data *rd) + { + int done = 0; + int loops = 0; ++ int rc = UPDATE_NO_MORE_UPDATES; ++ + /* Keep pulling results off the LDAP connection until we catch up to the last message id stored in the rd */ + while (!done && !slapi_is_shutting_down()) + { +@@ -470,6 +471,10 @@ repl5_inc_waitfor_async_results(result_data *rd) + } else if (rd->abort && (rd->result == UPDATE_CONNECTION_LOST)) { + done = 1; /* no connection == no more results */ + } ++ /* ++ * Return the last operation result ++ */ ++ rc = rd->result; + PR_Unlock(rd->lock); + if (!done) { + /* If not then sleep a bit */ +@@ -487,6 +492,7 @@ repl5_inc_waitfor_async_results(result_data *rd) + done = 1; + } + } ++ return rc; + } + + /* +@@ -1467,78 +1473,84 @@ static int + repl5_inc_update_from_op_result(Private_Repl_Protocol *prp, ConnResult replay_crc, int connection_error, char *csn_str, char *uniqueid, ReplicaId replica_id, int* finished, PRUint32 *num_changes_sent) + { + int return_value = 0; +- +- /* Indentation is wrong here so we can get a sensible cvs diff */ +- if (CONN_OPERATION_SUCCESS != replay_crc) +- { +- /* Figure out what to do next */ +- if (CONN_OPERATION_FAILED == replay_crc) +- { +- /* Map ldap error code to return value */ +- if (!ignore_error_and_keep_going(connection_error)) +- { +- return_value = UPDATE_TRANSIENT_ERROR; +- *finished = 1; +- } +- else +- { +- agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); +- } +- slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, +- "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", +- agmt_get_long_name(prp->agmt), +- uniqueid, csn_str, +- ldap_err2string(connection_error), connection_error, +- *finished ? "Will retry later" : "Skipping"); +- } +- else if (CONN_NOT_CONNECTED == replay_crc) +- { +- /* We lost the connection - enter backoff state */ + +- return_value = UPDATE_CONNECTION_LOST; +- *finished = 1; +- slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, +- "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " +- "%s(%d). Will retry later.\n", +- agmt_get_long_name(prp->agmt), +- uniqueid, csn_str, +- connection_error ? ldap_err2string(connection_error) : "Connection lost", +- connection_error); +- } +- else if (CONN_TIMEOUT == replay_crc) +- { +- return_value = UPDATE_TIMEOUT; +- *finished = 1; +- slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, +- "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " +- "%s.\n", +- agmt_get_long_name(prp->agmt), +- uniqueid, csn_str, +- connection_error ? ldap_err2string(connection_error) : "Timeout"); +- } +- else if (CONN_LOCAL_ERROR == replay_crc) +- { +- /* +- * Something bad happened on the local server - enter +- * backoff state. +- */ +- return_value = UPDATE_TRANSIENT_ERROR; +- *finished = 1; +- slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, +- "%s: Failed to replay change (uniqueid %s, CSN %s): " +- "Local error. Will retry later.\n", +- agmt_get_long_name(prp->agmt), +- uniqueid, csn_str); +- } +- +- } +- else +- { +- /* Positive response received */ +- (*num_changes_sent)++; +- agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); +- } +- return return_value; ++ if (CONN_OPERATION_SUCCESS != replay_crc) ++ { ++ /* Figure out what to do next */ ++ if (CONN_OPERATION_FAILED == replay_crc) ++ { ++ /* Map ldap error code to return value */ ++ if (!ignore_error_and_keep_going(connection_error)) ++ { ++ return_value = UPDATE_TRANSIENT_ERROR; ++ *finished = 1; ++ } ++ else ++ { ++ agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); ++ } ++ slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, ++ "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", ++ agmt_get_long_name(prp->agmt), ++ uniqueid, csn_str, ++ ldap_err2string(connection_error), connection_error, ++ *finished ? "Will retry later" : "Skipping"); ++ } ++ else if (CONN_NOT_CONNECTED == replay_crc) ++ { ++ /* We lost the connection - enter backoff state */ ++ ++ return_value = UPDATE_CONNECTION_LOST; ++ *finished = 1; ++ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, ++ "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " ++ "%s(%d). Will retry later.\n", ++ agmt_get_long_name(prp->agmt), ++ uniqueid, csn_str, ++ connection_error ? ldap_err2string(connection_error) : "Connection lost", ++ connection_error); ++ } ++ else if (CONN_TIMEOUT == replay_crc) ++ { ++ return_value = UPDATE_TIMEOUT; ++ *finished = 1; ++ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, ++ "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " ++ "%s.\n", ++ agmt_get_long_name(prp->agmt), ++ uniqueid, csn_str, ++ connection_error ? ldap_err2string(connection_error) : "Timeout"); ++ } ++ else if (CONN_LOCAL_ERROR == replay_crc) ++ { ++ /* ++ * Something bad happened on the local server - enter ++ * backoff state. ++ */ ++ return_value = UPDATE_TRANSIENT_ERROR; ++ *finished = 1; ++ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, ++ "%s: Failed to replay change (uniqueid %s, CSN %s): " ++ "Local error. Will retry later.\n", ++ agmt_get_long_name(prp->agmt), ++ uniqueid, csn_str); ++ } ++ if (*finished){ ++ /* ++ * A serious error has occurred, the consumer might have closed ++ * the connection already, but we need to close the conn on the ++ * supplier side to properly set the conn structure as closed. ++ */ ++ conn_disconnect(prp->conn); ++ } ++ } ++ else ++ { ++ /* Positive response received */ ++ (*num_changes_sent)++; ++ agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); ++ } ++ return return_value; + } + + /* +@@ -1556,7 +1568,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu + { + CL5Entry entry; + slapi_operation_parameters op; +- int return_value; ++ int return_value = 0; + int rc; + CL5ReplayIterator *changelog_iterator; + int message_id = 0; +@@ -1929,9 +1941,23 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu + { + /* We need to ensure that we wait until all the responses have been received from our operations */ + if (return_value != UPDATE_CONNECTION_LOST) { +- rd->WaitForAsyncResults = agmt_get_WaitForAsyncResults(prp->agmt); +- /* if connection was lost/closed, there will be nothing to read */ +- repl5_inc_waitfor_async_results(rd); ++ /* ++ * If we already have an error, there is no need to check the ++ * async result thread anymore. ++ */ ++ if (return_value == UPDATE_NO_MORE_UPDATES) ++ { ++ /* ++ * We need to double check that an error hasn't popped up from ++ * the async result thread since our last check. ++ */ ++ int final_result; ++ ++ rd->WaitForAsyncResults = agmt_get_WaitForAsyncResults(prp->agmt); ++ if((final_result = repl5_inc_waitfor_async_results(rd))){ ++ return_value = final_result; ++ } ++ } + } + + rc = repl5_inc_destroy_async_result_thread(rd); +@@ -2220,7 +2246,7 @@ examine_update_vector(Private_Repl_Protocol *prp, RUV *remote_ruv) + * We stop if there's some indication that the server just completely + * failed to process the operation, e.g. LDAP_OPERATIONS_ERROR. + */ +-static PRBool ++PRBool + ignore_error_and_keep_going(int error) + { + int return_value = PR_FALSE; +diff --git a/ldap/servers/plugins/replication/repl5_plugins.c b/ldap/servers/plugins/replication/repl5_plugins.c +index 8992055..c2fa214 100644 +--- a/ldap/servers/plugins/replication/repl5_plugins.c ++++ b/ldap/servers/plugins/replication/repl5_plugins.c +@@ -1231,12 +1231,13 @@ write_changelog_and_ruv (Slapi_PBlock *pb) + static int + process_postop (Slapi_PBlock *pb) + { +- int rc = LDAP_SUCCESS; +- Slapi_Operation *op; ++ Slapi_Operation *op; + Slapi_Backend *be; +- int is_replicated_operation = 0; ++ int is_replicated_operation = 0; + CSN *opcsn = NULL; + char sessionid[REPL_SESSION_ID_SIZE]; ++ int retval = LDAP_SUCCESS; ++ int rc = 0; + + /* we just let fixup operations through */ + slapi_pblock_get( pb, SLAPI_OPERATION, &op ); +@@ -1260,8 +1261,8 @@ process_postop (Slapi_PBlock *pb) + + get_repl_session_id (pb, sessionid, &opcsn); + +- slapi_pblock_get(pb, SLAPI_RESULT_CODE, &rc); +- if (rc == LDAP_SUCCESS) ++ slapi_pblock_get(pb, SLAPI_RESULT_CODE, &retval); ++ if (retval == LDAP_SUCCESS) + { + agmtlist_notify_all(pb); + rc = SLAPI_PLUGIN_SUCCESS; +@@ -1306,6 +1307,55 @@ process_postop (Slapi_PBlock *pb) + slapi_ch_free((void **) &op_params->p.p_modrdn.modrdn_newsuperior_address.uniqueid); + } + } ++ if (!ignore_error_and_keep_going(retval)){ ++ /* ++ * We have an error we can't ignore. Release the replica and close ++ * the connection to stop the replication session. ++ */ ++ consumer_connection_extension *connext = NULL; ++ Slapi_Connection *conn = NULL; ++ char csn_str[CSN_STRSIZE] = {'\0'}; ++ PRUint64 connid = 0; ++ int opid = 0; ++ ++ slapi_pblock_get(pb, SLAPI_CONNECTION, &conn); ++ slapi_pblock_get(pb, SLAPI_OPERATION_ID, &opid); ++ slapi_pblock_get(pb, SLAPI_CONN_ID, &connid); ++ if (conn) ++ { ++ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, ++ "process_postop: Failed to apply update (%s) error (%d). " ++ "Aborting replication session(conn=%" NSPRIu64 " op=%d)\n", ++ csn_as_string(opcsn, PR_FALSE, csn_str), retval, ++ connid, opid); ++ /* ++ * Release this replica so new sessions can begin ++ */ ++ connext = consumer_connection_extension_acquire_exclusive_access(conn, connid, opid); ++ if (connext && connext->replica_acquired) ++ { ++ int zero = 0; ++ Replica *r = (Replica*)object_get_data ((Object*)connext->replica_acquired); ++ ++ replica_relinquish_exclusive_access(r, connid, opid); ++ object_release ((Object*)connext->replica_acquired); ++ connext->replica_acquired = NULL; ++ connext->isreplicationsession = 0; ++ slapi_pblock_set( pb, SLAPI_CONN_IS_REPLICATION_SESSION, &zero ); ++ } ++ if (connext){ ++ consumer_connection_extension_relinquish_exclusive_access(conn, connid, opid, PR_FALSE); ++ } ++ ++ /* ++ * Close the connection to end the current session with the ++ * supplier. This prevents new updates from coming in and ++ * updating the consumer RUV - which would cause this failed ++ * update to be never be replayed. ++ */ ++ slapi_disconnect_server(conn); ++ } ++ } + if (NULL == opcsn) + opcsn = operation_get_csn(op); + if (opcsn) +diff --git a/ldap/servers/plugins/replication/urp.c b/ldap/servers/plugins/replication/urp.c +index 5fe6f55..8d0d735 100644 +--- a/ldap/servers/plugins/replication/urp.c ++++ b/ldap/servers/plugins/replication/urp.c +@@ -122,7 +122,7 @@ urp_add_operation( Slapi_PBlock *pb ) + slapi_log_error(slapi_log_urp, sessionid, + "urp_add (%s): an entry with this uniqueid already exists.\n", + slapi_entry_get_dn_const(existing_uniqueid_entry)); +- op_result= LDAP_UNWILLING_TO_PERFORM; ++ op_result= LDAP_ALREADY_EXISTS; + slapi_pblock_set(pb, SLAPI_RESULT_CODE, &op_result); + rc = SLAPI_PLUGIN_NOOP; /* Ignore this Operation */ + PROFILE_POINT; /* Add Conflict; UniqueID Exists; Ignore */ +-- +2.4.11 + diff --git a/SOURCES/0097-Ticket-47788-Only-check-postop-result-if-its-a-repli.patch b/SOURCES/0097-Ticket-47788-Only-check-postop-result-if-its-a-repli.patch new file mode 100644 index 0000000..1a0eb59 --- /dev/null +++ b/SOURCES/0097-Ticket-47788-Only-check-postop-result-if-its-a-repli.patch @@ -0,0 +1,129 @@ +From a9135e8b535bc58a986d4b19b05e6ce2718c07aa Mon Sep 17 00:00:00 2001 +From: Mark Reynolds +Date: Thu, 11 Feb 2016 16:18:00 -0500 +Subject: [PATCH 97/99] Ticket 47788 - Only check postop result if its a + replication operation + +Bug Description: With IPA tests, without using replication, an error is + being triggered that is stoipping nomral updates from + going through. + +Fix Description: Move the error checking up into the code block above + which only executes for replicated operations. + +https://fedorahosted.org/389/ticket/47788 + +Reviewed by: nhosoi & tbordaz(Thanks!!) + +(cherry picked from commit d7b598da2eff95070936bf7c3e01bcd11c44ed60) +(cherry picked from commit bd254a2eea380ade90700b22567e1d9063890f02) +--- + ldap/servers/plugins/replication/repl5_plugins.c | 87 ++++++++++++------------ + 1 file changed, 44 insertions(+), 43 deletions(-) + +diff --git a/ldap/servers/plugins/replication/repl5_plugins.c b/ldap/servers/plugins/replication/repl5_plugins.c +index c2fa214..bb43b9b 100644 +--- a/ldap/servers/plugins/replication/repl5_plugins.c ++++ b/ldap/servers/plugins/replication/repl5_plugins.c +@@ -1306,54 +1306,55 @@ process_postop (Slapi_PBlock *pb) + slapi_pblock_get( pb, SLAPI_OPERATION_PARAMETERS, &op_params ); + slapi_ch_free((void **) &op_params->p.p_modrdn.modrdn_newsuperior_address.uniqueid); + } +- } +- if (!ignore_error_and_keep_going(retval)){ +- /* +- * We have an error we can't ignore. Release the replica and close +- * the connection to stop the replication session. +- */ +- consumer_connection_extension *connext = NULL; +- Slapi_Connection *conn = NULL; +- char csn_str[CSN_STRSIZE] = {'\0'}; +- PRUint64 connid = 0; +- int opid = 0; + +- slapi_pblock_get(pb, SLAPI_CONNECTION, &conn); +- slapi_pblock_get(pb, SLAPI_OPERATION_ID, &opid); +- slapi_pblock_get(pb, SLAPI_CONN_ID, &connid); +- if (conn) +- { +- slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, +- "process_postop: Failed to apply update (%s) error (%d). " +- "Aborting replication session(conn=%" NSPRIu64 " op=%d)\n", +- csn_as_string(opcsn, PR_FALSE, csn_str), retval, +- connid, opid); ++ if (!ignore_error_and_keep_going(retval)){ + /* +- * Release this replica so new sessions can begin ++ * We have an error we can't ignore. Release the replica and close ++ * the connection to stop the replication session. + */ +- connext = consumer_connection_extension_acquire_exclusive_access(conn, connid, opid); +- if (connext && connext->replica_acquired) ++ consumer_connection_extension *connext = NULL; ++ Slapi_Connection *conn = NULL; ++ char csn_str[CSN_STRSIZE] = {'\0'}; ++ PRUint64 connid = 0; ++ int opid = 0; ++ ++ slapi_pblock_get(pb, SLAPI_CONNECTION, &conn); ++ slapi_pblock_get(pb, SLAPI_OPERATION_ID, &opid); ++ slapi_pblock_get(pb, SLAPI_CONN_ID, &connid); ++ if (conn) + { +- int zero = 0; +- Replica *r = (Replica*)object_get_data ((Object*)connext->replica_acquired); +- +- replica_relinquish_exclusive_access(r, connid, opid); +- object_release ((Object*)connext->replica_acquired); +- connext->replica_acquired = NULL; +- connext->isreplicationsession = 0; +- slapi_pblock_set( pb, SLAPI_CONN_IS_REPLICATION_SESSION, &zero ); +- } +- if (connext){ +- consumer_connection_extension_relinquish_exclusive_access(conn, connid, opid, PR_FALSE); +- } ++ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, ++ "process_postop: Failed to apply update (%s) error (%d). " ++ "Aborting replication session(conn=%" NSPRIu64 " op=%d)\n", ++ csn_as_string(opcsn, PR_FALSE, csn_str), retval, ++ connid, opid); ++ /* ++ * Release this replica so new sessions can begin ++ */ ++ connext = consumer_connection_extension_acquire_exclusive_access(conn, connid, opid); ++ if (connext && connext->replica_acquired) ++ { ++ int zero = 0; ++ Replica *r = (Replica*)object_get_data ((Object*)connext->replica_acquired); ++ ++ replica_relinquish_exclusive_access(r, connid, opid); ++ object_release ((Object*)connext->replica_acquired); ++ connext->replica_acquired = NULL; ++ connext->isreplicationsession = 0; ++ slapi_pblock_set( pb, SLAPI_CONN_IS_REPLICATION_SESSION, &zero ); ++ } ++ if (connext){ ++ consumer_connection_extension_relinquish_exclusive_access(conn, connid, opid, PR_FALSE); ++ } + +- /* +- * Close the connection to end the current session with the +- * supplier. This prevents new updates from coming in and +- * updating the consumer RUV - which would cause this failed +- * update to be never be replayed. +- */ +- slapi_disconnect_server(conn); ++ /* ++ * Close the connection to end the current session with the ++ * supplier. This prevents new updates from coming in and ++ * updating the consumer RUV - which would cause this failed ++ * update to be never be replayed. ++ */ ++ slapi_disconnect_server(conn); ++ } + } + } + if (NULL == opcsn) +-- +2.4.11 + diff --git a/SOURCES/0098-Ticket-48636-Improve-replication-convergence.patch b/SOURCES/0098-Ticket-48636-Improve-replication-convergence.patch new file mode 100644 index 0000000..488b619 --- /dev/null +++ b/SOURCES/0098-Ticket-48636-Improve-replication-convergence.patch @@ -0,0 +1,690 @@ +From 94377fba9dbcfc2fe47a32cc7cb85766813ad482 Mon Sep 17 00:00:00 2001 +From: Mark Reynolds +Date: Wed, 8 Jun 2016 13:06:46 -0400 +Subject: [PATCH 98/99] Ticket 48636 - Improve replication convergence + +Bug Description: In a busy MMR environment where multiple masters are being + updated at the same time the replica sessions stay open for + a very long time. This causes other masters to wait to send + their updates. This causes lop-sided convergence. Where + entries added to the MMR environment, but on different masters, + take a very different amount of time until they are each seen + on all the replicas. + +Fix Description: A new configuratoin setting was added (nsds5ReplicaReleaseTimeout) + to the replica configuration entry. So when replica A tries + to acquire a replica B, replica B send a control back to the + master(master C) that is updating replica B to abort the session. + Master C will continue sending updates for the amount of time + specified in the the "release timeout", then it will "yield" its + current session so other replicas can acquire that replica. + +https://fedorahosted.org/389/ticket/48636 + +Reviewed by: lkrispen & nhosoi(Thanks!!) + +(cherry picked from commit a1545cdae48e4b4e1fc87a168e4d8f959626f112) +(cherry picked from commit a085b0cd6b39fc85821777b7bcd2a8a2482a48bf) +--- + ldap/schema/01core389.ldif | 3 +- + ldap/servers/plugins/replication/repl5.h | 14 ++- + .../plugins/replication/repl5_inc_protocol.c | 102 ++++++++++++---- + ldap/servers/plugins/replication/repl5_plugins.c | 3 +- + ldap/servers/plugins/replication/repl5_replica.c | 135 +++++++++++++++++---- + .../plugins/replication/repl5_replica_config.c | 22 ++++ + ldap/servers/plugins/replication/repl_globals.c | 1 + + 7 files changed, 229 insertions(+), 51 deletions(-) + +diff --git a/ldap/schema/01core389.ldif b/ldap/schema/01core389.ldif +index aebdb5a..14143ed 100644 +--- a/ldap/schema/01core389.ldif ++++ b/ldap/schema/01core389.ldif +@@ -278,6 +278,7 @@ attributeTypes: ( 2.16.840.1.113730.3.1.2311 NAME 'nsds5ReplicaFlowControlPause' + attributeTypes: ( 2.16.840.1.113730.3.1.2313 NAME 'nsslapd-changelogtrim-interval' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 SINGLE-VALUE X-ORIGIN 'Netscape Directory Server' ) + attributeTypes: ( 2.16.840.1.113730.3.1.2314 NAME 'nsslapd-changelogcompactdb-interval' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 SINGLE-VALUE X-ORIGIN 'Netscape Directory Server' ) + attributeTypes: ( 2.16.840.1.113730.3.1.2315 NAME 'nsDS5ReplicaWaitForAsyncResults' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 SINGLE-VALUE X-ORIGIN 'Netscape Directory Server' ) ++attributeTypes: ( 2.16.840.1.113730.3.1.2333 NAME 'nsds5ReplicaReleaseTimeout' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 SINGLE-VALUE X-ORIGIN 'Netscape Directory Server' ) + # + # objectclasses + # +@@ -287,7 +288,7 @@ objectClasses: ( 2.16.840.1.113730.3.2.44 NAME 'nsIndex' DESC 'Netscape defined + objectClasses: ( 2.16.840.1.113730.3.2.109 NAME 'nsBackendInstance' DESC 'Netscape defined objectclass' SUP top MUST ( CN ) X-ORIGIN 'Netscape Directory Server' ) + objectClasses: ( 2.16.840.1.113730.3.2.110 NAME 'nsMappingTree' DESC 'Netscape defined objectclass' SUP top MUST ( CN ) X-ORIGIN 'Netscape Directory Server' ) + objectClasses: ( 2.16.840.1.113730.3.2.104 NAME 'nsContainer' DESC 'Netscape defined objectclass' SUP top MUST ( CN ) X-ORIGIN 'Netscape Directory Server' ) +-objectClasses: ( 2.16.840.1.113730.3.2.108 NAME 'nsDS5Replica' DESC 'Netscape defined objectclass' SUP top MUST ( nsDS5ReplicaRoot $ nsDS5ReplicaId ) MAY (cn $ nsds5ReplicaPreciseTombstonePurging $ nsds5ReplicaCleanRUV $ nsds5ReplicaAbortCleanRUV $ nsDS5ReplicaType $ nsDS5ReplicaBindDN $ nsState $ nsDS5ReplicaName $ nsDS5Flags $ nsDS5Task $ nsDS5ReplicaReferral $ nsDS5ReplicaAutoReferral $ nsds5ReplicaPurgeDelay $ nsds5ReplicaTombstonePurgeInterval $ nsds5ReplicaChangeCount $ nsds5ReplicaLegacyConsumer $ nsds5ReplicaProtocolTimeout $ nsds5ReplicaBackoffMin $ nsds5ReplicaBackoffMax ) X-ORIGIN 'Netscape Directory Server' ) ++objectClasses: ( 2.16.840.1.113730.3.2.108 NAME 'nsDS5Replica' DESC 'Netscape defined objectclass' SUP top MUST ( nsDS5ReplicaRoot $ nsDS5ReplicaId ) MAY (cn $ nsds5ReplicaPreciseTombstonePurging $ nsds5ReplicaCleanRUV $ nsds5ReplicaAbortCleanRUV $ nsDS5ReplicaType $ nsDS5ReplicaBindDN $ nsState $ nsDS5ReplicaName $ nsDS5Flags $ nsDS5Task $ nsDS5ReplicaReferral $ nsDS5ReplicaAutoReferral $ nsds5ReplicaPurgeDelay $ nsds5ReplicaTombstonePurgeInterval $ nsds5ReplicaChangeCount $ nsds5ReplicaLegacyConsumer $ nsds5ReplicaProtocolTimeout $ nsds5ReplicaBackoffMin $ nsds5ReplicaBackoffMax $ nsds5ReplicaReleaseTimeout ) X-ORIGIN 'Netscape Directory Server' ) + objectClasses: ( 2.16.840.1.113730.3.2.113 NAME 'nsTombstone' DESC 'Netscape defined objectclass' SUP top MAY ( nstombstonecsn $ nsParentUniqueId $ nscpEntryDN ) X-ORIGIN 'Netscape Directory Server' ) + objectClasses: ( 2.16.840.1.113730.3.2.103 NAME 'nsDS5ReplicationAgreement' DESC 'Netscape defined objectclass' SUP top MUST ( cn ) MAY ( nsds5ReplicaCleanRUVNotified $ nsDS5ReplicaHost $ nsDS5ReplicaPort $ nsDS5ReplicaTransportInfo $ nsDS5ReplicaBindDN $ nsDS5ReplicaCredentials $ nsDS5ReplicaBindMethod $ nsDS5ReplicaRoot $ nsDS5ReplicatedAttributeList $ nsDS5ReplicatedAttributeListTotal $ nsDS5ReplicaUpdateSchedule $ nsds5BeginReplicaRefresh $ description $ nsds50ruv $ nsruvReplicaLastModified $ nsds5ReplicaTimeout $ nsds5replicaChangesSentSinceStartup $ nsds5replicaLastUpdateEnd $ nsds5replicaLastUpdateStart $ nsds5replicaLastUpdateStatus $ nsds5replicaUpdateInProgress $ nsds5replicaLastInitEnd $ nsds5ReplicaEnabled $ nsds5replicaLastInitStart $ nsds5replicaLastInitStatus $ nsds5debugreplicatimeout $ nsds5replicaBusyWaitTime $ nsds5ReplicaStripAttrs $ nsds5replicaSessionPauseTime $ nsds5ReplicaProtocolTimeout $ nsds5ReplicaFlowControlWindow $ nsds5ReplicaFlowControlPause $ nsDS5ReplicaWaitForAsyncResults ) X-ORIGIN 'Netscape Directory Server' ) + objectClasses: ( 2.16.840.1.113730.3.2.39 NAME 'nsslapdConfig' DESC 'Netscape defined objectclass' SUP top MAY ( cn ) X-ORIGIN 'Netscape Directory Server' ) +diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h +index 307da82..6f6c81a 100644 +--- a/ldap/servers/plugins/replication/repl5.h ++++ b/ldap/servers/plugins/replication/repl5.h +@@ -69,6 +69,10 @@ + #define REPL_ABORT_CLEANRUV_OID "2.16.840.1.113730.3.6.6" + #define REPL_CLEANRUV_GET_MAXCSN_OID "2.16.840.1.113730.3.6.7" + #define REPL_CLEANRUV_CHECK_STATUS_OID "2.16.840.1.113730.3.6.8" ++#define REPL_ABORT_SESSION_OID "2.16.840.1.113730.3.6.9" ++#define SESSION_ACQUIRED 0 ++#define ABORT_SESSION 1 ++#define SESSION_ABORTED 2 + + #define CLEANRUV_ACCEPTED "accepted" + #define CLEANRUV_REJECTED "rejected" +@@ -141,6 +145,7 @@ extern const char *type_nsds5ReplicaStripAttrs; + extern const char *type_nsds5ReplicaFlowControlWindow; + extern const char *type_nsds5ReplicaFlowControlPause; + extern const char *type_replicaProtocolTimeout; ++extern const char *type_replicaReleaseTimeout; + extern const char *type_replicaBackoffMin; + extern const char *type_replicaBackoffMax; + extern const char *type_replicaPrecisePurge; +@@ -526,9 +531,9 @@ Replica *replica_new_from_entry (Slapi_Entry *e, char *errortext, PRBool is_add_ + void replica_destroy(void **arg); + int replica_subentry_update(Slapi_DN *repl_root, ReplicaId rid); + int replica_subentry_check(Slapi_DN *repl_root, ReplicaId rid); +-PRBool replica_get_exclusive_access(Replica *r, PRBool *isInc, PRUint64 connid, int opid, +- const char *locking_purl, +- char **current_purl); ++PRBool replica_get_exclusive_access(Replica *r, PRBool *isInc, PRUint64 connid, ++ int opid, const char *locking_purl, ++ char **current_purl); + void replica_relinquish_exclusive_access(Replica *r, PRUint64 connid, int opid); + PRBool replica_get_tombstone_reap_active(const Replica *r); + const Slapi_DN *replica_get_root(const Replica *r); +@@ -598,6 +603,8 @@ void replica_update_state (time_t when, void *arg); + void replica_reset_csn_pl(Replica *r); + PRUint64 replica_get_protocol_timeout(Replica *r); + void replica_set_protocol_timeout(Replica *r, PRUint64 timeout); ++PRUint64 replica_get_release_timeout(Replica *r); ++void replica_set_release_timeout(Replica *r, PRUint64 timeout); + void replica_set_groupdn_checkinterval(Replica *r, int timeout); + PRUint64 replica_get_backoff_min(Replica *r); + PRUint64 replica_get_backoff_max(Replica *r); +@@ -609,6 +616,7 @@ void replica_decr_agmt_count(Replica *r); + PRUint64 replica_get_precise_purging(Replica *r); + void replica_set_precise_purging(Replica *r, PRUint64 on_off); + PRBool ignore_error_and_keep_going(int error); ++void replica_check_release_timeout(Replica *r, Slapi_PBlock *pb); + + /* The functions below handles the state flag */ + /* Current internal state flags */ +diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c +index 927f835..d6fb898 100644 +--- a/ldap/servers/plugins/replication/repl5_inc_protocol.c ++++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c +@@ -36,6 +36,11 @@ Perhaps these events should be properties of the main protocol. + #include "repl5_prot_private.h" + #include "cl5_api.h" + ++#include "repl5.h" ++#include "repl5_prot_private.h" ++#include "cl5_api.h" ++#include "slapi-plugin.h" ++ + extern int slapi_log_urp; + + /*** from proto-slap.h ***/ +@@ -82,6 +87,7 @@ typedef struct result_data + int flowcontrol_detection; + int result; /* The UPDATE_TRANSIENT_ERROR etc */ + int WaitForAsyncResults; ++ time_t abort_time; + } result_data; + + /* Various states the incremental protocol can pass through */ +@@ -121,6 +127,7 @@ typedef struct result_data + #define EXAMINE_RUV_PARAM_ERROR 405 + + #define MAX_CHANGES_PER_SESSION 10000 ++ + /* + * Maximum time to wait between replication sessions. If we + * don't see any updates for a period equal to this interval, +@@ -240,19 +247,21 @@ repl5_inc_result_threadmain(void *param) + Repl_Connection *conn = rd->prp->conn; + int finished = 0; + int message_id = 0; ++ int yield_session = 0; + + slapi_log_error(SLAPI_LOG_REPL, NULL, "repl5_inc_result_threadmain starting\n"); + while (!finished) + { ++ LDAPControl **returned_controls = NULL; + repl5_inc_operation *op = NULL; +- int connection_error = 0; ++ ReplicaId replica_id = 0; + char *csn_str = NULL; + char *uniqueid = NULL; +- ReplicaId replica_id = 0; +- int operation_code = 0; + char *ldap_error_string = NULL; + time_t time_now = 0; + time_t start_time = time( NULL ); ++ int connection_error = 0; ++ int operation_code = 0; + int backoff_time = 1; + + /* Read the next result */ +@@ -264,7 +273,7 @@ repl5_inc_result_threadmain(void *param) + + while (!finished) + { +- conres = conn_read_result_ex(conn, NULL, NULL, NULL, LDAP_RES_ANY, &message_id, 0); ++ conres = conn_read_result_ex(conn, NULL, NULL, &returned_controls, LDAP_RES_ANY, &message_id, 0); + slapi_log_error(SLAPI_LOG_REPL, NULL, "repl5_inc_result_threadmain: read result for message_id %d\n", message_id); + /* Timeout here means that we didn't block, not a real timeout */ + if (CONN_TIMEOUT == conres) +@@ -292,9 +301,19 @@ repl5_inc_result_threadmain(void *param) + finished = 1; + } + PR_Unlock(rd->lock); +- } else +- { +- /* Something other than a timeout, so we exit the loop */ ++ } else { ++ /* ++ * Something other than a timeout, so we exit the loop. ++ * First check if we were told to abort the session ++ */; ++ Replica *r = (Replica*)object_get_data(rd->prp->replica_object); ++ if (replica_get_release_timeout(r) && ++ slapi_control_present(returned_controls, ++ REPL_ABORT_SESSION_OID, ++ NULL, NULL)) ++ { ++ yield_session = 1; ++ } + break; + } + } +@@ -318,21 +337,29 @@ repl5_inc_result_threadmain(void *param) + } + + conn_get_error_ex(conn, &operation_code, &connection_error, &ldap_error_string); +- slapi_log_error(SLAPI_LOG_REPL, NULL, "repl5_inc_result_threadmain: result %d, %d, %d, %d, %s\n", operation_code,connection_error,conres,message_id,ldap_error_string); +- return_value = repl5_inc_update_from_op_result(rd->prp, conres, connection_error, csn_str, uniqueid, replica_id, &should_finish, &(rd->num_changes_sent)); ++ slapi_log_error(SLAPI_LOG_REPL, NULL, ++ "repl5_inc_result_threadmain: result %d, %d, %d, %d, %s\n", ++ operation_code,connection_error,conres,message_id,ldap_error_string); ++ return_value = repl5_inc_update_from_op_result(rd->prp, conres, connection_error, ++ csn_str, uniqueid, replica_id, &should_finish, ++ &(rd->num_changes_sent)); + if (return_value || should_finish) + { +- slapi_log_error(SLAPI_LOG_REPL, NULL, "repl5_inc_result_threadmain: got op result %d should finish %d\n", return_value, should_finish); ++ slapi_log_error(SLAPI_LOG_REPL, NULL, ++ "repl5_inc_result_threadmain: got op result %d should finish %d\n", ++ return_value, should_finish); + /* If so then we need to take steps to abort the update process */ + PR_Lock(rd->lock); + rd->result = return_value; +- rd->abort = 1; ++ rd->abort = ABORT_SESSION; + PR_Unlock(rd->lock); +- /* We also need to log the error, including details stored from when the operation was sent */ +- /* we cannot finish yet - we still need to waitfor the pending results, then +- the main repl code will shut down this thread */ +- /* we can finish if we have disconnected - in that case, there will be nothing +- to read */ ++ /* ++ * We also need to log the error, including details stored from ++ * when the operation was sent. We cannot finish yet - we still ++ * need to wait for the pending results, then the main repl code ++ * will shut down this thread. We can finish if we have ++ * disconnected - in that case, there will be nothing to read ++ */ + if (return_value == UPDATE_CONNECTION_LOST) { + finished = 1; + } +@@ -341,8 +368,16 @@ repl5_inc_result_threadmain(void *param) + rd->result = return_value; + } + } ++ + /* Should we stop ? */ + PR_Lock(rd->lock); ++ if (!finished && yield_session && rd->abort != SESSION_ABORTED && rd->abort_time == 0) { ++ rd->abort_time = time( NULL ); ++ rd->abort = SESSION_ABORTED; /* only set the abort time once */ ++ slapi_log_error(SLAPI_LOG_REPL, "repl5_inc_result_threadmain", ++ "Abort control detected, setting abort time...(%s)\n", ++ agmt_get_long_name(rd->prp->agmt)); ++ } + if (rd->stop_result_thread) + { + finished = 1; +@@ -468,7 +503,8 @@ repl5_inc_waitfor_async_results(result_data *rd) + if (rd->last_message_id_received >= rd->last_message_id_sent) { + /* If so then we're done */ + done = 1; +- } else if (rd->abort && (rd->result == UPDATE_CONNECTION_LOST)) { ++ } else if (rd->abort && (rd->result == UPDATE_CONNECTION_LOST)) ++ { + done = 1; /* no connection == no more results */ + } + /* +@@ -846,10 +882,10 @@ repl5_inc_run(Private_Repl_Protocol *prp) + if (!busywaittime){ + busywaittime = repl5_get_backoff_min(prp); + } +- prp_priv->backoff = backoff_new(BACKOFF_FIXED, busywaittime, busywaittime); ++ prp_priv->backoff = backoff_new(BACKOFF_FIXED, busywaittime , busywaittime); + } else { + prp_priv->backoff = backoff_new(BACKOFF_EXPONENTIAL, repl5_get_backoff_min(prp), +- repl5_get_backoff_max(prp)); ++ repl5_get_backoff_max(prp)); + } + next_state = STATE_BACKOFF; + backoff_reset(prp_priv->backoff, repl5_inc_backoff_expired, (void *)prp); +@@ -1055,6 +1091,7 @@ repl5_inc_run(Private_Repl_Protocol *prp) + } else if (rc == UPDATE_YIELD){ + dev_debug("repl5_inc_run(STATE_SENDING_UPDATES) -> send_updates = UPDATE_YIELD -> STATE_BACKOFF_START"); + agmt_set_last_update_status(prp->agmt, 0, 0, "Incremental update succeeded and yielded"); ++ use_busy_backoff_timer = PR_TRUE; + next_state = STATE_BACKOFF_START; + } else if (rc == UPDATE_TRANSIENT_ERROR){ + dev_debug("repl5_inc_run(STATE_SENDING_UPDATES) -> send_updates = UPDATE_TRANSIENT_ERROR -> STATE_BACKOFF_START"); +@@ -1099,6 +1136,7 @@ repl5_inc_run(Private_Repl_Protocol *prp) + ruv_destroy(&ruv); ruv = NULL; + } + agmt_update_done(prp->agmt, 0); ++ + /* If timed out, close the connection after released the replica */ + release_replica(prp); + if (rc == UPDATE_TIMEOUT) { +@@ -1681,12 +1719,14 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu + } + else + { +- int finished = 0; + ConnResult replay_crc; +- char csn_str[CSN_STRSIZE]; ++ Replica *replica = (Replica*) object_get_data(prp->replica_object); + PRBool subentry_update_needed = PR_FALSE; ++ PRUint64 release_timeout = replica_get_release_timeout(replica); ++ char csn_str[CSN_STRSIZE]; + int skipped_updates = 0; + int fractional_repl; ++ int finished = 0; + #define FRACTIONAL_SKIPPED_THRESHOLD 100 + + /* Start the results reading thread */ +@@ -1906,7 +1946,20 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu + } + PR_Lock(rd->lock); + /* See if the result thread has hit a problem */ +- if (!finished && rd->abort) ++ ++ if(!finished && rd->abort_time){ ++ time_t current_time = time ( NULL ); ++ if ((current_time - rd->abort_time) >= release_timeout){ ++ rd->result = UPDATE_YIELD; ++ return_value = UPDATE_YIELD; ++ finished = 1; ++ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, ++ "Aborting send_updates...(%s)\n", ++ agmt_get_long_name(rd->prp->agmt)); ++ } ++ } ++ ++ if (!finished && rd->abort == ABORT_SESSION) + { + return_value = rd->result; + finished = 1; +@@ -1916,10 +1969,9 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu + + if (fractional_repl && subentry_update_needed) + { +- Replica *replica; + ReplicaId rid = -1; /* Used to create the replica keep alive subentry */ + Slapi_DN *replarea_sdn = NULL; +- replica = (Replica*) object_get_data(prp->replica_object); ++ + if (replica) + { + rid = replica_get_rid(replica); +@@ -1945,7 +1997,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu + * If we already have an error, there is no need to check the + * async result thread anymore. + */ +- if (return_value == UPDATE_NO_MORE_UPDATES) ++ if (return_value == UPDATE_NO_MORE_UPDATES || return_value == UPDATE_YIELD) + { + /* + * We need to double check that an error hasn't popped up from +diff --git a/ldap/servers/plugins/replication/repl5_plugins.c b/ldap/servers/plugins/replication/repl5_plugins.c +index bb43b9b..9f38d05 100644 +--- a/ldap/servers/plugins/replication/repl5_plugins.c ++++ b/ldap/servers/plugins/replication/repl5_plugins.c +@@ -1077,6 +1077,8 @@ write_changelog_and_ruv (Slapi_PBlock *pb) + r = (Replica*)object_get_data (repl_obj); + PR_ASSERT (r); + ++ replica_check_release_timeout(r, pb); ++ + if (replica_is_flag_set (r, REPLICA_LOG_CHANGES) && + (cl5GetState () == CL5_STATE_OPEN)) + { +@@ -1365,7 +1367,6 @@ process_postop (Slapi_PBlock *pb) + return rc; + } + +- + /* + * Cancel an operation CSN. This removes it from any CSN pending lists. + * This function is called when a previously-generated CSN will not +diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c +index c7cf25f..6d2452a 100644 +--- a/ldap/servers/plugins/replication/repl5_replica.c ++++ b/ldap/servers/plugins/replication/repl5_replica.c +@@ -23,8 +23,8 @@ + + #define RUV_SAVE_INTERVAL (30 * 1000) /* 30 seconds */ + +-#define REPLICA_RDN "cn=replica" +-#define CHANGELOG_RDN "cn=legacy changelog" ++#define REPLICA_RDN "cn=replica" ++#define CHANGELOG_RDN "cn=legacy changelog" + + /* + * A replica is a locally-held copy of a portion of the DIT. +@@ -68,6 +68,8 @@ struct replica { + Slapi_Counter *backoff_max; /* backoff retry maximum */ + Slapi_Counter *precise_purging; /* Enable precise tombstone purging */ + PRUint64 agmt_count; /* Number of agmts */ ++ Slapi_Counter *release_timeout; /* The amount of time to wait before releasing active replica */ ++ PRUint64 abort_session; /* Abort the current replica session */ + }; + + +@@ -201,6 +203,7 @@ replica_new_from_entry (Slapi_Entry *e, char *errortext, PRBool is_add_operation + + /* init the slapi_counter/atomic settings */ + r->protocol_timeout = slapi_counter_new(); ++ r->release_timeout = slapi_counter_new(); + r->backoff_min = slapi_counter_new(); + r->backoff_max = slapi_counter_new(); + +@@ -408,6 +411,7 @@ replica_destroy(void **arg) + } + + slapi_counter_destroy(&r->protocol_timeout); ++ slapi_counter_destroy(&r->release_timeout); + slapi_counter_destroy(&r->backoff_min); + slapi_counter_destroy(&r->backoff_max); + +@@ -585,8 +589,7 @@ replica_subentry_update(Slapi_DN *repl_root, ReplicaId rid) + */ + PRBool + replica_get_exclusive_access(Replica *r, PRBool *isInc, PRUint64 connid, int opid, +- const char *locking_purl, +- char **current_purl) ++ const char *locking_purl, char **current_purl) + { + PRBool rval = PR_TRUE; + +@@ -609,6 +612,15 @@ replica_get_exclusive_access(Replica *r, PRBool *isInc, PRUint64 connid, int opi + { + *current_purl = slapi_ch_strdup(r->locking_purl); + } ++ if (!(r->repl_state_flags & REPLICA_TOTAL_IN_PROGRESS) && ++ replica_get_release_timeout(r)) ++ { ++ /* ++ * We are not doing a total update, so abort the current session ++ * so other replicas can acquire this server. ++ */ ++ r->abort_session = ABORT_SESSION; ++ } + } + else + { +@@ -617,14 +629,17 @@ replica_get_exclusive_access(Replica *r, PRBool *isInc, PRUint64 connid, int opi + connid, opid, + slapi_sdn_get_dn(r->repl_root)); + r->repl_state_flags |= REPLICA_IN_USE; ++ r->abort_session = SESSION_ACQUIRED; + if (isInc && *isInc) + { + r->repl_state_flags |= REPLICA_INCREMENTAL_IN_PROGRESS; + } + else + { +- /* if connid or opid != 0, it's a total update */ +- /* Both set to 0 means we're disabling replication */ ++ /* ++ * If connid or opid != 0, it's a total update. ++ * Both set to 0 means we're disabling replication ++ */ + if (connid || opid) + { + r->repl_state_flags |= REPLICA_TOTAL_IN_PROGRESS; +@@ -652,13 +667,13 @@ replica_relinquish_exclusive_access(Replica *r, PRUint64 connid, int opid) + /* check to see if the replica is in use and log a warning if not */ + if (!(r->repl_state_flags & REPLICA_IN_USE)) + { +- slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, ++ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, + "conn=%" NSPRIu64 " op=%d repl=\"%s\": " + "Replica not in use\n", + connid, opid, + slapi_sdn_get_dn(r->repl_root)); + } else { +- slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, ++ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, + "conn=%" NSPRIu64 " op=%d repl=\"%s\": " + "Released replica held by locking_purl=%s\n", + connid, opid, +@@ -970,6 +985,24 @@ replica_get_protocol_timeout(Replica *r) + } + } + ++PRUint64 ++replica_get_release_timeout(Replica *r) ++{ ++ if(r){ ++ return slapi_counter_get_value(r->release_timeout); ++ } else { ++ return 0; ++ } ++} ++ ++void ++replica_set_release_timeout(Replica *r, PRUint64 limit) ++{ ++ if(r){ ++ slapi_counter_set_value(r->release_timeout, limit); ++ } ++} ++ + void + replica_set_protocol_timeout(Replica *r, PRUint64 timeout) + { +@@ -977,6 +1010,7 @@ replica_set_protocol_timeout(Replica *r, PRUint64 timeout) + slapi_counter_set_value(r->protocol_timeout, timeout); + } + } ++ + void + replica_set_groupdn_checkinterval(Replica *r, int interval) + { +@@ -1064,11 +1098,7 @@ replica_get_legacy_purl (const Replica *r) + char *purl; + + replica_lock(r->repl_lock); +- +- PR_ASSERT (r->legacy_consumer); +- + purl = slapi_ch_strdup(r->legacy_purl); +- + replica_unlock(r->repl_lock); + + return purl; +@@ -1924,6 +1954,7 @@ _replica_init_from_config (Replica *r, Slapi_Entry *e, char *errortext) + int backoff_min; + int backoff_max; + int ptimeout = 0; ++ int release_timeout = 0; + int rc; + + PR_ASSERT (r && e); +@@ -2008,6 +2039,14 @@ _replica_init_from_config (Replica *r, Slapi_Entry *e, char *errortext) + slapi_counter_set_value(r->protocol_timeout, ptimeout); + } + ++ /* Get the release timeout */ ++ release_timeout = slapi_entry_attr_get_int(e, type_replicaReleaseTimeout); ++ if(release_timeout <= 0){ ++ slapi_counter_set_value(r->release_timeout, 0); ++ } else { ++ slapi_counter_set_value(r->release_timeout, release_timeout); ++ } ++ + /* check for precise tombstone purging */ + precise_purging = slapi_entry_attr_get_charptr(e, type_replicaPrecisePurge); + if(precise_purging){ +@@ -4029,21 +4068,21 @@ replica_disable_replication (Replica *r, Object *r_obj) + ruv_get_first_id_and_purl(repl_ruv, &junkrid, &p_locking_purl); + locking_purl = slapi_ch_strdup(p_locking_purl); + p_locking_purl = NULL; +- repl_ruv = NULL; +- while (!replica_get_exclusive_access(r, &isInc, 0, 0, "replica_disable_replication", ++ repl_ruv = NULL; ++ while (!replica_get_exclusive_access(r, &isInc, 0, 0, "replica_disable_replication", + ¤t_purl)) { +- if (!isInc) /* already locked, but not by inc update - break */ +- break; +- isInc = PR_FALSE; +- slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, ++ if (!isInc) /* already locked, but not by inc update - break */ ++ break; ++ isInc = PR_FALSE; ++ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, + "replica_disable_replication: " + "replica %s is already locked by (%s) for incoming " + "incremental update; sleeping 100ms\n", +- slapi_sdn_get_ndn (replica_get_root (r)), ++ slapi_sdn_get_ndn (replica_get_root (r)), + current_purl ? current_purl : "unknown"); + slapi_ch_free_string(¤t_purl); +- DS_Sleep(PR_MillisecondsToInterval(100)); +- } ++ DS_Sleep(PR_MillisecondsToInterval(100)); ++ } + + slapi_ch_free_string(¤t_purl); + slapi_ch_free_string(&locking_purl); +@@ -4281,3 +4320,57 @@ replica_decr_agmt_count(Replica *r) + } + } + } ++ ++/* ++ * Add the "Abort Replication Session" control to the pblock ++ */ ++static void ++replica_add_session_abort_control(Slapi_PBlock *pb) ++{ ++ LDAPControl ctrl = {0}; ++ BerElement *ber; ++ struct berval *bvp; ++ int rc; ++ ++ /* Build the BER payload */ ++ if ( (ber = der_alloc()) == NULL ) { ++ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, ++ "add_session_abort_control: Failed to create ber\n"); ++ return; ++ } ++ rc = ber_printf( ber, "{}"); ++ if (rc != -1) { ++ rc = ber_flatten( ber, &bvp ); ++ } ++ ber_free( ber, 1 ); ++ if ( rc == -1 ) { ++ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, ++ "add_session_abort_control: Failed to flatten ber\n"); ++ return; ++ } ++ ++ ctrl.ldctl_oid = slapi_ch_strdup( REPL_ABORT_SESSION_OID ); ++ ctrl.ldctl_value = *bvp; ++ bvp->bv_val = NULL; ++ ber_bvfree( bvp ); ++ slapi_pblock_set(pb, SLAPI_ADD_RESCONTROL, &ctrl); ++ ++ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, ++ "add_session_abort_control: abort control successfully added to result\n"); ++} ++ ++/* ++ * Check if we have exceeded the failed replica acquire limit, ++ * if so, end the replication session. ++ */ ++void ++replica_check_release_timeout(Replica *r, Slapi_PBlock *pb) ++{ ++ replica_lock(r->repl_lock); ++ if(r->abort_session == ABORT_SESSION){ ++ /* Need to abort this session (just send the control once) */ ++ replica_add_session_abort_control(pb); ++ r->abort_session = SESSION_ABORTED; ++ } ++ replica_unlock(r->repl_lock); ++} +diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c +index 4d7135c..71b3c92 100644 +--- a/ldap/servers/plugins/replication/repl5_replica_config.c ++++ b/ldap/servers/plugins/replication/repl5_replica_config.c +@@ -406,6 +406,11 @@ replica_config_modify (Slapi_PBlock *pb, Slapi_Entry* entryBefore, Slapi_Entry* + if (apply_mods) + replica_set_precise_purging(r, 0); + } ++ else if (strcasecmp (config_attr, type_replicaReleaseTimeout) == 0 ) ++ { ++ if (apply_mods) ++ replica_set_release_timeout(r, 0); ++ } + else + { + *returncode = LDAP_UNWILLING_TO_PERFORM; +@@ -592,6 +597,23 @@ replica_config_modify (Slapi_PBlock *pb, Slapi_Entry* entryBefore, Slapi_Entry* + } + } + } ++ else if (strcasecmp (config_attr, type_replicaReleaseTimeout) == 0 ) ++ { ++ if (apply_mods) ++ { ++ PRUint64 val = atoll(config_attr_value); ++ ++ if(val < 0){ ++ *returncode = LDAP_UNWILLING_TO_PERFORM; ++ PR_snprintf (errortext, SLAPI_DSE_RETURNTEXT_SIZE, ++ "attribute %s value (%s) is invalid, must be a number zero or greater.\n", ++ config_attr, config_attr_value); ++ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "replica_config_modify: %s\n", errortext); ++ break; ++ } ++ replica_set_release_timeout(r, val); ++ } ++ } + else + { + *returncode = LDAP_UNWILLING_TO_PERFORM; +diff --git a/ldap/servers/plugins/replication/repl_globals.c b/ldap/servers/plugins/replication/repl_globals.c +index 331f839..8b891fb 100644 +--- a/ldap/servers/plugins/replication/repl_globals.c ++++ b/ldap/servers/plugins/replication/repl_globals.c +@@ -87,6 +87,7 @@ const char *type_ruvElementUpdatetime = "nsruvReplicaLastModified"; + const char *type_replicaCleanRUV = "nsds5ReplicaCleanRUV"; + const char *type_replicaAbortCleanRUV = "nsds5ReplicaAbortCleanRUV"; + const char *type_replicaProtocolTimeout = "nsds5ReplicaProtocolTimeout"; ++const char *type_replicaReleaseTimeout = "nsds5ReplicaReleaseTimeout"; + const char *type_replicaBackoffMin = "nsds5ReplicaBackoffMin"; + const char *type_replicaBackoffMax = "nsds5ReplicaBackoffMax"; + const char *type_replicaPrecisePurge = "nsds5ReplicaPreciseTombstonePurging"; +-- +2.4.11 + diff --git a/SOURCES/0099-Ticket-48636-Fix-config-validation-check.patch b/SOURCES/0099-Ticket-48636-Fix-config-validation-check.patch new file mode 100644 index 0000000..cc60631 --- /dev/null +++ b/SOURCES/0099-Ticket-48636-Fix-config-validation-check.patch @@ -0,0 +1,52 @@ +From f9a4b8153a1b46da5052ffda60723596048b20fb Mon Sep 17 00:00:00 2001 +From: Mark Reynolds +Date: Tue, 14 Jun 2016 17:19:48 -0400 +Subject: [PATCH 99/99] Ticket 48636 - Fix config validation check + +Bug Description: We were previous checking if an unsigfned int was less than zero + +Fix Description: Improve config validation by using long instead of PRUint64 + +https://fedorahosted.org/389/ticket/48636 + +Reviewed by: nhosoi(Thanks!) + +(cherry picked from commit 43d5ac680f7781f95205db94e5ff2958d39b78a4) +(cherry picked from commit b8239e0da865f33cae930088dd2a746a49f2d32a) +--- + ldap/servers/plugins/replication/repl5_replica_config.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c +index 71b3c92..866a712 100644 +--- a/ldap/servers/plugins/replication/repl5_replica_config.c ++++ b/ldap/servers/plugins/replication/repl5_replica_config.c +@@ -601,17 +601,20 @@ replica_config_modify (Slapi_PBlock *pb, Slapi_Entry* entryBefore, Slapi_Entry* + { + if (apply_mods) + { +- PRUint64 val = atoll(config_attr_value); ++ long val = atol(config_attr_value); + +- if(val < 0){ ++ if (val < 0){ + *returncode = LDAP_UNWILLING_TO_PERFORM; +- PR_snprintf (errortext, SLAPI_DSE_RETURNTEXT_SIZE, ++ PR_snprintf(errortext, SLAPI_DSE_RETURNTEXT_SIZE, + "attribute %s value (%s) is invalid, must be a number zero or greater.\n", + config_attr, config_attr_value); +- slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "replica_config_modify: %s\n", errortext); ++ slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, ++ "replica_config_modify: %s\n", errortext); + break; ++ } else { ++ /* Set the timeout */ ++ replica_set_release_timeout(r, val); + } +- replica_set_release_timeout(r, val); + } + } + else +-- +2.4.11 + diff --git a/SPECS/389-ds-base.spec b/SPECS/389-ds-base.spec index b44a6b1..5adf500 100644 --- a/SPECS/389-ds-base.spec +++ b/SPECS/389-ds-base.spec @@ -34,7 +34,7 @@ Summary: 389 Directory Server (base) Name: 389-ds-base Version: 1.3.4.0 -Release: %{?relprefix}32%{?prerel}%{?dist} +Release: %{?relprefix}33%{?prerel}%{?dist} License: GPLv3+ URL: http://port389.org/ Group: System Environment/Daemons @@ -217,6 +217,11 @@ Patch90: 0091-Ticket-48492-heap-corruption-at-schema-replication.patch Patch91: 0092-Ticket-48808-Paged-results-search-returns-the-blank-.patch Patch92: 0093-Ticket-48808-Add-test-case.patch Patch93: 0094-Ticket-48862-At-startup-DES-to-AES-password-conversi.patch +Patch94: 0095-Ticket-48766-Replication-changelog-can-incorrectly-s.patch +Patch95: 0096-Ticket-47788-Supplier-can-skip-a-failing-update-alth.patch +Patch96: 0097-Ticket-47788-Only-check-postop-result-if-its-a-repli.patch +Patch97: 0098-Ticket-48636-Improve-replication-convergence.patch +Patch98: 0099-Ticket-48636-Fix-config-validation-check.patch %description 389 Directory Server is an LDAPv3 compliant server. The base package includes @@ -374,6 +379,11 @@ cp %{SOURCE2} README.devel %patch91 -p1 %patch92 -p1 %patch93 -p1 +%patch94 -p1 +%patch95 -p1 +%patch96 -p1 +%patch97 -p1 +%patch98 -p1 %build %if %{use_nunc_stans} @@ -568,6 +578,12 @@ fi %endif %changelog +* Thu Jun 30 2016 Noriko Hosoi - 1.3.4.0-33 +- release 1.3.4.0-33 +- Resolves: bug 1351323 - Improve MMR replication convergence (DS 48636) +- Resolves: bug 1351447 - Supplier can skip a failing update, although it should retry. (DS 47788) +- Resolves: bug 1350707 - Replication changelog can incorrectly skip over updates (DS 48766) + * Thu Jun 9 2016 Noriko Hosoi - 1.3.4.0-32 - release 1.3.4.0-32 - Resolves: bug 1344293 - At startup DES to AES password conversion causes timeout in start script (DS 48862)