andykimpe / rpms / 389-ds-base

Forked from rpms/389-ds-base 5 months ago
Clone
dc8c34
From 2cfe91947ec8d40e28dfe2e1fd5bb1b351ca75f1 Mon Sep 17 00:00:00 2001
dc8c34
From: Rich Megginson <rmeggins@redhat.com>
dc8c34
Date: Wed, 6 Nov 2013 14:22:31 -0700
dc8c34
Subject: [PATCH 147/225] Ticket #47585 Replication Failures related to skipped
dc8c34
 entries due to cleaned rids
dc8c34
dc8c34
https://fedorahosted.org/389/ticket/47585
dc8c34
Reviewed by: nhosoi (Thanks!)
dc8c34
Branch: 389-ds-base-1.2.11
dc8c34
Fix Description: If a change was found in the changelog buffer that is
dc8c34
skipped due to having an unknown replica ID (rid), the entire buffer was
dc8c34
marked as CLC_STATE_NEW_RID.  When the buffer is exhausted and the iterator
dc8c34
code goes to read in the new buffer, it would not read in the new buffer
dc8c34
because it only loads a new buffer if the current buffer state is
dc8c34
CLC_STATE_READY.  I don't know why the entire buffer would be marked as
dc8c34
CLC_STATE_NEW_RID and stop iteration.  It seems to me that just the update
dc8c34
should be skipped, but new buffers should be loaded in order to keep sending
dc8c34
non-skipped updates to the consumer.
dc8c34
It is possible for a CSN with an unknown RID to get into the changelog if
dc8c34
the server with that RID had been removed by cleanruv/cleanallruv.  In that
dc8c34
case, the CSN should be skipped.  It is assumed that the change was already
dc8c34
sent - cleanallruv is supposed to wait until all known changes have been
dc8c34
seen before removing the RID from the RUV - so it is safe to skip it.
dc8c34
Added additional debugging, so that we can better tell why changelog entries
dc8c34
were skipped.
dc8c34
Platforms tested: RHEL6 x86_64
dc8c34
Flag Day: no
dc8c34
Doc impact: no
dc8c34
(cherry picked from commit cf08f1274404e4796966011a98a6a0acbbfd6070)
dc8c34
(cherry picked from commit 30bb98fb693ea1aac9774bdc43b923eacd72570a)
dc8c34
(cherry picked from commit fc70e4ac6accaa14d140e333829e98897f6ff164)
dc8c34
(cherry picked from commit 7eefa121784774a2c1535404f0b95b78970b43c2)
dc8c34
(cherry picked from commit c783d159a3baa005f0a5f007d886961e2128bdcc)
dc8c34
---
dc8c34
 ldap/servers/plugins/replication/cl5_clcache.c | 48 ++++++++++++++++++++++----
dc8c34
 1 file changed, 42 insertions(+), 6 deletions(-)
dc8c34
dc8c34
diff --git a/ldap/servers/plugins/replication/cl5_clcache.c b/ldap/servers/plugins/replication/cl5_clcache.c
dc8c34
index 7a6a446..8218312 100644
dc8c34
--- a/ldap/servers/plugins/replication/cl5_clcache.c
dc8c34
+++ b/ldap/servers/plugins/replication/cl5_clcache.c
dc8c34
@@ -120,6 +120,11 @@ struct clc_buffer {
dc8c34
 	int		 	 buf_load_cnt;		/* number of loads for session */
dc8c34
 	int		 	 buf_record_cnt;	/* number of changes for session */
dc8c34
 	int		 	 buf_record_skipped;	/* number of changes skipped */
dc8c34
+	int		 	 buf_skipped_new_rid;	/* number of changes skipped due to new_rid */
dc8c34
+	int		 	 buf_skipped_csn_gt_cons_maxcsn;	/* number of changes skipped due to csn greater than consumer maxcsn */
dc8c34
+	int		 	 buf_skipped_up_to_date;	/* number of changes skipped due to consumer being up-to-date for the given rid */
dc8c34
+	int		 	 buf_skipped_csn_gt_ruv;	/* number of changes skipped due to preceedents are not covered by local RUV snapshot */
dc8c34
+	int		 	 buf_skipped_csn_covered;	/* number of changes skipped due to CSNs already covered by consumer RUV */
dc8c34
 
dc8c34
 	/*
dc8c34
 	 * fields that should be accessed via bl_lock or pl_lock
dc8c34
@@ -252,6 +257,11 @@ clcache_get_buffer ( CLC_Buffer **buf, DB *db, ReplicaId consumer_rid, const RUV
dc8c34
 		(*buf)->buf_record_skipped = 0;
dc8c34
 		(*buf)->buf_cursor = NULL;
dc8c34
 		(*buf)->buf_num_cscbs = 0;
dc8c34
+		(*buf)->buf_skipped_new_rid = 0;
dc8c34
+		(*buf)->buf_skipped_csn_gt_cons_maxcsn = 0;
dc8c34
+		(*buf)->buf_skipped_up_to_date = 0;
dc8c34
+		(*buf)->buf_skipped_csn_gt_ruv = 0;
dc8c34
+		(*buf)->buf_skipped_csn_covered = 0;
dc8c34
 	}
dc8c34
 	else {
dc8c34
 		*buf = clcache_new_buffer ( consumer_rid );
dc8c34
@@ -287,11 +297,16 @@ clcache_return_buffer ( CLC_Buffer **buf )
dc8c34
 	int i;
dc8c34
 
dc8c34
 	slapi_log_error ( SLAPI_LOG_REPL, (*buf)->buf_agmt_name,
dc8c34
-			"session end: state=%d load=%d sent=%d skipped=%d\n",
dc8c34
-			 (*buf)->buf_state,
dc8c34
-			 (*buf)->buf_load_cnt,
dc8c34
-			 (*buf)->buf_record_cnt - (*buf)->buf_record_skipped,
dc8c34
-			 (*buf)->buf_record_skipped );
dc8c34
+			  "session end: state=%d load=%d sent=%d skipped=%d skipped_new_rid=%d "
dc8c34
+			  "skipped_csn_gt_cons_maxcsn=%d skipped_up_to_date=%d "
dc8c34
+			  "skipped_csn_gt_ruv=%d skipped_csn_covered=%d\n",
dc8c34
+			  (*buf)->buf_state,
dc8c34
+			  (*buf)->buf_load_cnt,
dc8c34
+			  (*buf)->buf_record_cnt - (*buf)->buf_record_skipped,
dc8c34
+			  (*buf)->buf_record_skipped, (*buf)->buf_skipped_new_rid,
dc8c34
+			  (*buf)->buf_skipped_csn_gt_cons_maxcsn,
dc8c34
+			  (*buf)->buf_skipped_up_to_date, (*buf)->buf_skipped_csn_gt_ruv,
dc8c34
+			  (*buf)->buf_skipped_csn_covered);
dc8c34
 
dc8c34
 	for ( i = 0; i < (*buf)->buf_num_cscbs; i++ ) {
dc8c34
 		clcache_free_cscb ( &(*buf)->buf_cscbs[i] );
dc8c34
@@ -676,6 +691,8 @@ clcache_skip_change ( CLC_Buffer *buf )
dc8c34
 	ReplicaId rid;
dc8c34
 	int skip = 1;
dc8c34
 	int i;
dc8c34
+	char buf_cur_csn_str[CSN_STRSIZE];
dc8c34
+	char oth_csn_str[CSN_STRSIZE];
dc8c34
 
dc8c34
 	do {
dc8c34
 
dc8c34
@@ -697,6 +714,14 @@ clcache_skip_change ( CLC_Buffer *buf )
dc8c34
 				 *  The consumer must have been "restored" and needs this newer update.
dc8c34
 				 */
dc8c34
 				skip = 0;
dc8c34
+			} else if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
dc8c34
+				csn_as_string(buf->buf_current_csn, 0, buf_cur_csn_str);
dc8c34
+				csn_as_string(cons_maxcsn, 0, oth_csn_str);
dc8c34
+				slapi_log_error(SLAPI_LOG_REPL, buf->buf_agmt_name,
dc8c34
+					"Skipping update because the changelog buffer current csn [%s] is "
dc8c34
+				        "less than or equal to the consumer max csn [%s]\n",
dc8c34
+				        buf_cur_csn_str, oth_csn_str);
dc8c34
+				buf->buf_skipped_csn_gt_cons_maxcsn++;
dc8c34
 			}
dc8c34
 			csn_free(&cons_maxcsn);
dc8c34
 			break;
dc8c34
@@ -714,7 +739,14 @@ clcache_skip_change ( CLC_Buffer *buf )
dc8c34
 
dc8c34
 		/* Skip CSN whose RID is unknown to the local RUV snapshot */
dc8c34
 		if ( i >= buf->buf_num_cscbs ) {
dc8c34
-			buf->buf_state = CLC_STATE_NEW_RID;
dc8c34
+			if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
dc8c34
+				csn_as_string(buf->buf_current_csn, 0, buf_cur_csn_str);
dc8c34
+				slapi_log_error(SLAPI_LOG_REPL, buf->buf_agmt_name,
dc8c34
+					"Skipping update because the changelog buffer current csn [%s] rid "
dc8c34
+				        "[%d] is not in the list of changelog csn buffers (length %d)\n",
dc8c34
+				        buf_cur_csn_str, rid, buf->buf_num_cscbs);
dc8c34
+			}
dc8c34
+			buf->buf_skipped_new_rid++;
dc8c34
 			break;
dc8c34
 		}
dc8c34
 
dc8c34
@@ -722,17 +754,20 @@ clcache_skip_change ( CLC_Buffer *buf )
dc8c34
 
dc8c34
 		/* Skip if the consumer is already up-to-date for the RID */
dc8c34
 		if ( cscb->state == CLC_STATE_UP_TO_DATE ) {
dc8c34
+			buf->buf_skipped_up_to_date++;
dc8c34
 			break;
dc8c34
 		}
dc8c34
 
dc8c34
 		/* Skip CSN whose preceedents are not covered by local RUV snapshot */
dc8c34
 		if ( cscb->state == CLC_STATE_CSN_GT_RUV ) {
dc8c34
+			buf->buf_skipped_csn_gt_ruv++;
dc8c34
 			break;
dc8c34
 		}
dc8c34
 
dc8c34
 		/* Skip CSNs already covered by consumer RUV */
dc8c34
 		if ( cscb->consumer_maxcsn &&
dc8c34
 			 csn_compare ( buf->buf_current_csn, cscb->consumer_maxcsn ) <= 0 ) {
dc8c34
+			buf->buf_skipped_csn_covered++;
dc8c34
 				break;
dc8c34
 		}
dc8c34
 
dc8c34
@@ -762,6 +797,7 @@ clcache_skip_change ( CLC_Buffer *buf )
dc8c34
 
dc8c34
 		/* Skip CSNs not covered by local RUV snapshot */
dc8c34
 		cscb->state = CLC_STATE_CSN_GT_RUV;
dc8c34
+		buf->buf_skipped_csn_gt_ruv++;
dc8c34
 
dc8c34
 	} while (0);
dc8c34
 
dc8c34
-- 
dc8c34
1.8.1.4
dc8c34