andykimpe / rpms / 389-ds-base

Forked from rpms/389-ds-base 5 months ago
Clone
Blob Blame History Raw
From 84c69dee99ae60b9a571b41bb84514a15eec7b74 Mon Sep 17 00:00:00 2001
From: Rich Megginson <rmeggins@redhat.com>
Date: Wed, 15 May 2013 19:39:24 -0600
Subject: [PATCH 60/99] Ticket #47362 - ipa upgrade selinuxusermap data not
 replicating

https://fedorahosted.org/389/ticket/47362
Reviewed by: nhosoi (Thanks!)
Branch: 389-ds-base-1.2.11
Fix Description: When nsslapd-port is set to 0, this causes the
replica purl to be "ldap://hostname:0".  At startup, the MMR code looks to
see if this replica purl is in the RUV, by doing a string comparison of this
purl with the ruv replica purl.  If it is not there, the MMR code wipes out
this ruv element.  Later the code in replica_check_for_data_reload() uses
this RUV to see if it needs to reinit the changelog.  Since the RUV doesn't
match the changelog RUV any more, the changelog is erased, which erases
any changes that were made in the meantime.  The missing RUV element causes
the supplier to attempt to send over changes which may already exist on the
consumer.  If one of these is an ADD, the urp code will correctly flag this
as an attempt to add an entry that already exists, and will turn this into
a replConflict entry.  A subsequent attempt to replicate the same ADD will
cause an error in the urp code which will cause it to return err=53.
Replication will then become stuck on this update - it will keep trying to
send it over and over again, and will not be able to proceed.
The only workaround is a replica reinit of the replica, to get the database
RUV and changelog in a consistent state.
I've also added some additional RUV debugging when using the REPL log level.
Platforms tested: RHEL6 x86_64
Flag Day: no
Doc impact: no
(cherry picked from commit 0c194eb79aa381bf4e4cd05082956218512115a4)
---
 .../plugins/replication/repl5_inc_protocol.c       | 38 ++++++++++++++++++++++
 ldap/servers/plugins/replication/repl5_ruv.c       | 32 ++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c
index 743be57..82b121c 100644
--- a/ldap/servers/plugins/replication/repl5_inc_protocol.c
+++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c
@@ -1931,6 +1931,44 @@ repl5_inc_stop(Private_Repl_Protocol *prp)
 				agmt_get_long_name(prp->agmt),
 				PR_IntervalToSeconds(now-start));
 	}
+	if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+		if (NULL == prp->replica_object) {
+			slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+					"%s: repl5_inc_stop: protocol replica_object is NULL\n",
+					agmt_get_long_name(prp->agmt));
+		} else {
+			Replica *replica;
+			object_acquire(prp->replica_object);
+			replica = object_get_data(prp->replica_object);
+			if (NULL == replica) {
+				slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+						"%s: repl5_inc_stop: replica is NULL\n",
+						agmt_get_long_name(prp->agmt));
+			} else {
+				Object *ruv_obj = replica_get_ruv(replica);
+				if (NULL == ruv_obj) {
+					slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+							"%s: repl5_inc_stop: ruv_obj is NULL\n",
+							agmt_get_long_name(prp->agmt));
+				} else {
+					RUV *ruv;
+					object_acquire(ruv_obj);
+					ruv = (RUV*)object_get_data (ruv_obj);
+					if (NULL == ruv) {
+						slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+								"%s: repl5_inc_stop: ruv is NULL\n",
+								agmt_get_long_name(prp->agmt));
+
+					} else {
+						ruv_dump(ruv, "Database RUV", NULL);
+					}
+					object_release(ruv_obj);
+				}
+			}
+			object_release(prp->replica_object);
+		}
+
+	}
 	return return_value;
 }
 
diff --git a/ldap/servers/plugins/replication/repl5_ruv.c b/ldap/servers/plugins/replication/repl5_ruv.c
index b52dd49..8fbd89c 100644
--- a/ldap/servers/plugins/replication/repl5_ruv.c
+++ b/ldap/servers/plugins/replication/repl5_ruv.c
@@ -208,6 +208,9 @@ ruv_init_from_slapi_attr_and_check_purl(Slapi_Attr *attr, RUV **ruv, ReplicaId *
 			Slapi_Value *value;
 			const struct berval *bval;
 			const char *purl = NULL;
+			char *localhost = get_localhost_DNS();
+			size_t localhostlen = localhost ? strlen(localhost) : 0;
+			int port = config_get_port();
 
 			return_value = RUV_SUCCESS;
 
@@ -236,16 +239,30 @@ ruv_init_from_slapi_attr_and_check_purl(Slapi_Attr *attr, RUV **ruv, ReplicaId *
 						RUVElement *ruve = get_ruvelement_from_berval(bval);
 						if (NULL != ruve)
 						{
+							char *ptr;
 							/* Is the local purl already in the ruv ? */
 							if ( (*contain_purl==0) && ruve->replica_purl && purl && (strncmp(ruve->replica_purl, purl, strlen(purl))==0) )
 							{
 								*contain_purl = ruve->rid;
 							}
+							/* ticket 47362 - nsslapd-port: 0 causes replication to break */
+							else if ((*contain_purl==0) && ruve->replica_purl && (port == 0) && localhost &&
+								 (ptr = strstr(ruve->replica_purl, localhost)) && (ptr != ruve->replica_purl) &&
+								 (*(ptr - 1) == '/') && (*(ptr+localhostlen) == ':'))
+							{
+								/* same hostname, but port number may have been temporarily set to 0
+								 * just allow it with whatever port number is already in the replica_purl
+								 * do not reset the port number, do not tell the configure_ruv code that there
+								 * is anything wrong
+								 */
+								*contain_purl = ruve->rid;
+							}
 							dl_add ((*ruv)->elements, ruve);
 						}
 					}
 				}
 			}
+			slapi_ch_free_string(&localhost);
 		}
 	}
 	return return_value;
@@ -1279,6 +1296,11 @@ ruv_compare_ruv(const RUV *ruv1, const char *ruv1name, const RUV *ruv2, const ch
     const char *ruvbnames[] = {ruv2name, ruv1name};
     const int nitems = 2;
 
+    if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+	ruv_dump(ruv1, (char *)ruv1name, NULL);
+	ruv_dump(ruv2, (char *)ruv2name, NULL);
+    }
+
     /* compare replica generations first */
     if (ruv1->replGen == NULL || ruv2->replGen == NULL) {
         slapi_log_error(loglevel, repl_plugin_name,
@@ -1335,7 +1357,17 @@ ruv_compare_ruv(const RUV *ruv1, const char *ruv1name, const RUV *ruv2, const ch
                                     "than the max CSN [%s] from RUV [%s] for element [%s]\n",
                                     csnstrb, ruvbname, csnstra, ruvaname, ruvelem);
                     rc = RUV_COMP_CSN_DIFFERS;
+                } else {
+                    csn_as_string(replicaa->csn, PR_FALSE, csnstra);
+                    slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+                                    "ruv_compare_ruv: the max CSN [%s] from RUV [%s] is less than "
+                                    "or equal to the max CSN [%s] from RUV [%s] for element [%s]\n",
+                                    csnstrb, ruvbname, csnstra, ruvaname, ruvelem);
                 }
+            } else {
+                slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+                                "ruv_compare_ruv: RUV [%s] has an empty CSN\n",
+                                ruvbname);
             }
         }
     }
-- 
1.8.1.4