Blame SOURCES/0097-Ticket-47759-Crash-in-replication-when-server-is-und.patch

cc3dff
From 5b6deac35adbae20d0821a4530d30f0908ad7478 Mon Sep 17 00:00:00 2001
cc3dff
From: Mark Reynolds <mreynolds@redhat.com>
cc3dff
Date: Mon, 31 Mar 2014 15:17:59 -0400
cc3dff
Subject: [PATCH] Ticket 47759 - Crash in replication when server is under
cc3dff
 write load
cc3dff
cc3dff
Bug Description:  When the server is under alot of load, a race condition allows
cc3dff
                  a replication connection LDAP struct to be freed(unbind) while
cc3dff
                  it is being used by another thread.  This leads to a crash.
cc3dff
cc3dff
Fix Description:  Extend the connection lock to also cover ldap client interaction
cc3dff
                  (e.g. conn->ld struct).
cc3dff
cc3dff
https://fedorahosted.org/389/ticket/47759
cc3dff
cc3dff
Reviewed by: nhosoi & rmeggins(Thanks!!)
cc3dff
(cherry picked from commit 9940ca29ca258891c52640a23adc2851afe59d0e)
cc3dff
(cherry picked from commit 0e576c85c34826c4d63d9578db55f8179b4a1a60)
cc3dff
(cherry picked from commit 2a80b7152823ca16628c2da48614166b8d2104a4)
cc3dff
---
cc3dff
 .../servers/plugins/replication/repl5_connection.c | 89 ++++++++++++----------
cc3dff
 ldap/servers/slapd/ldaputil.c                      | 39 +++++-----
cc3dff
 2 files changed, 69 insertions(+), 59 deletions(-)
cc3dff
cc3dff
diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c
cc3dff
index 668abda..17d1d9c 100644
cc3dff
--- a/ldap/servers/plugins/replication/repl5_connection.c
cc3dff
+++ b/ldap/servers/plugins/replication/repl5_connection.c
cc3dff
@@ -138,6 +138,7 @@ static void repl5_debug_timeout_callback(time_t when, void *arg);
cc3dff
 
cc3dff
 /* Forward declarations */
cc3dff
 static void close_connection_internal(Repl_Connection *conn);
cc3dff
+static void conn_delete_internal(Repl_Connection *conn);
cc3dff
 
cc3dff
 /*
cc3dff
  * Create a new connection object. Returns a pointer to the object, or
cc3dff
@@ -182,11 +183,22 @@ conn_new(Repl_Agmt *agmt)
cc3dff
 	rpc->plain = NULL;
cc3dff
 	return rpc;
cc3dff
 loser:
cc3dff
-	conn_delete(rpc);
cc3dff
+	conn_delete_internal(rpc);
cc3dff
 	slapi_ch_free((void**)&rpc;;
cc3dff
 	return NULL;
cc3dff
 }
cc3dff
 
cc3dff
+static PRBool
cc3dff
+conn_connected_locked(Repl_Connection *conn, int locked)
cc3dff
+{
cc3dff
+	PRBool return_value;
cc3dff
+
cc3dff
+	if(!locked) PR_Lock(conn->lock);
cc3dff
+	return_value = STATE_CONNECTED == conn->state;
cc3dff
+	if(!locked) PR_Unlock(conn->lock);
cc3dff
+
cc3dff
+	return return_value;
cc3dff
+}
cc3dff
 
cc3dff
 /*
cc3dff
  * Return PR_TRUE if the connection is in the connected state
cc3dff
@@ -194,14 +206,9 @@ loser:
cc3dff
 static PRBool
cc3dff
 conn_connected(Repl_Connection *conn)
cc3dff
 {
cc3dff
-	PRBool return_value;
cc3dff
-	PR_Lock(conn->lock);
cc3dff
-	return_value = STATE_CONNECTED == conn->state;
cc3dff
-	PR_Unlock(conn->lock);
cc3dff
-	return return_value;
cc3dff
+	return conn_connected_locked(conn, 1);
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
 /*
cc3dff
  * Destroy a connection object.
cc3dff
  */
cc3dff
@@ -243,7 +250,6 @@ conn_delete(Repl_Connection *conn)
cc3dff
 		if (slapi_eq_cancel(conn->linger_event) == 1)
cc3dff
 		{
cc3dff
 			/* Event was found and cancelled. Destroy the connection object. */
cc3dff
-			PR_Unlock(conn->lock);
cc3dff
 			destroy_it = PR_TRUE;
cc3dff
 		}
cc3dff
 		else
cc3dff
@@ -254,16 +260,15 @@ conn_delete(Repl_Connection *conn)
cc3dff
 			 * off, so arrange for the event to destroy the object .
cc3dff
 			 */
cc3dff
 			conn->delete_after_linger = PR_TRUE;
cc3dff
-			PR_Unlock(conn->lock);
cc3dff
 		}
cc3dff
 	}
cc3dff
 	if (destroy_it)
cc3dff
 	{
cc3dff
 		conn_delete_internal(conn);
cc3dff
 	}
cc3dff
+	PR_Unlock(conn->lock);
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
 /*
cc3dff
  * Return the last operation type processed by the connection
cc3dff
  * object, and the LDAP error encountered.
cc3dff
@@ -327,17 +332,18 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda
cc3dff
 			while (!slapi_is_shutting_down())
cc3dff
 			{
cc3dff
 				/* we have to make sure the update sending thread does not
cc3dff
-				   attempt to call conn_disconnect while we are reading
cc3dff
+				   attempt to close connection while we are reading
cc3dff
 				   results - so lock the conn while we get the results */
cc3dff
 				PR_Lock(conn->lock);
cc3dff
+
cc3dff
 				if ((STATE_CONNECTED != conn->state) || !conn->ld) {
cc3dff
 					rc = -1;
cc3dff
 					return_value = CONN_NOT_CONNECTED;
cc3dff
 					PR_Unlock(conn->lock);
cc3dff
 					break;
cc3dff
 				}
cc3dff
-
cc3dff
 				rc = ldap_result(conn->ld, send_msgid, 1, &local_timeout, &res;;
cc3dff
+
cc3dff
 				PR_Unlock(conn->lock);
cc3dff
 
cc3dff
 				if (0 != rc)
cc3dff
@@ -661,8 +667,10 @@ perform_operation(Repl_Connection *conn, int optype, const char *dn,
cc3dff
 	server_controls[1] = update_control;
cc3dff
 	server_controls[2] = NULL;
cc3dff
 
cc3dff
-	/* lock the conn to prevent the result reader thread
cc3dff
-	   from closing the connection out from under us */
cc3dff
+	/*
cc3dff
+	 * Lock the conn to prevent the result reader thread
cc3dff
+	 * from closing the connection out from under us.
cc3dff
+	 */
cc3dff
 	PR_Lock(conn->lock);
cc3dff
 	if (STATE_CONNECTED == conn->state)
cc3dff
 	{
cc3dff
@@ -804,7 +812,6 @@ conn_send_rename(Repl_Connection *conn, const char *dn,
cc3dff
 		NULL /* extop OID */, NULL /* extop payload */, message_id);
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
 /*
cc3dff
  * Send an LDAP extended operation.
cc3dff
  */
cc3dff
@@ -818,7 +825,6 @@ conn_send_extended_operation(Repl_Connection *conn, const char *extop_oid,
cc3dff
 		update_control, extop_oid, payload, message_id);
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
 /*
cc3dff
  * Synchronously read an entry and return a specific attribute's values.
cc3dff
  * Returns CONN_OPERATION_SUCCESS if successful. Returns
cc3dff
@@ -838,6 +844,8 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn,
cc3dff
 	LDAPMessage *res = NULL;
cc3dff
 	char *attrs[2];
cc3dff
 
cc3dff
+	PR_Lock(conn->lock);
cc3dff
+
cc3dff
 	PR_ASSERT(NULL != type);
cc3dff
 	if (conn_connected(conn))
cc3dff
 	{
cc3dff
@@ -860,7 +868,7 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn,
cc3dff
 		}
cc3dff
 		else if (IS_DISCONNECT_ERROR(ldap_rc))
cc3dff
 		{
cc3dff
-			conn_disconnect(conn);
cc3dff
+			close_connection_internal(conn);
cc3dff
 			return_value = CONN_NOT_CONNECTED;
cc3dff
 		}
cc3dff
 		else
cc3dff
@@ -878,10 +886,11 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn,
cc3dff
 	{
cc3dff
 		return_value = CONN_NOT_CONNECTED;
cc3dff
 	}
cc3dff
+	PR_Unlock(conn->lock);
cc3dff
+
cc3dff
 	return return_value;
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
 /*
cc3dff
  * Return an pointer to a string describing the connection's status.
cc3dff
 */
cc3dff
@@ -892,8 +901,6 @@ conn_get_status(Repl_Connection *conn)
cc3dff
 	return conn->status;
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
-
cc3dff
 /*
cc3dff
  * Cancel any outstanding linger timer. Should be called when
cc3dff
  * a replication session is beginning.
cc3dff
@@ -925,7 +932,6 @@ conn_cancel_linger(Repl_Connection *conn)
cc3dff
 	PR_Unlock(conn->lock);
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
 /*
cc3dff
  * Called when our linger timeout timer expires. This means
cc3dff
  * we should check to see if perhaps the connection's become
cc3dff
@@ -957,7 +963,6 @@ linger_timeout(time_t event_time, void *arg)
cc3dff
 	}
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
 /*
cc3dff
  * Indicate that a session is ending. The linger timer starts when
cc3dff
  * this function is called.
cc3dff
@@ -995,8 +1000,6 @@ conn_start_linger(Repl_Connection *conn)
cc3dff
 	PR_Unlock(conn->lock);
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
-
cc3dff
 /*
cc3dff
  * If no connection is currently active, opens a connection and binds to
cc3dff
  * the remote server. If a connection is open (e.g. lingering) then
cc3dff
@@ -1015,10 +1018,14 @@ conn_connect(Repl_Connection *conn)
cc3dff
 	ConnResult return_value = CONN_OPERATION_SUCCESS;
cc3dff
 	int pw_ret = 1;
cc3dff
 
cc3dff
-	/** Connection already open just return SUCCESS **/
cc3dff
-	if(conn->state == STATE_CONNECTED) goto done;
cc3dff
-
cc3dff
 	PR_Lock(conn->lock);
cc3dff
+
cc3dff
+	/* Connection already open, just return SUCCESS */
cc3dff
+	if(conn->state == STATE_CONNECTED){
cc3dff
+		PR_Unlock(conn->lock);
cc3dff
+		return return_value;
cc3dff
+	}
cc3dff
+
cc3dff
 	if (conn->flag_agmt_changed) {
cc3dff
 		/* So far we cannot change Hostname and Port */
cc3dff
 		/* slapi_ch_free((void **)&conn->hostname); */
cc3dff
@@ -1033,7 +1040,6 @@ conn_connect(Repl_Connection *conn)
cc3dff
 		conn->port = agmt_get_port(conn->agmt); /* port could be updated */
cc3dff
 		slapi_ch_free((void **)&conn->plain);
cc3dff
 	}
cc3dff
-	PR_Unlock(conn->lock);
cc3dff
 
cc3dff
 	creds = agmt_get_credentials(conn->agmt);
cc3dff
 
cc3dff
@@ -1174,6 +1180,7 @@ done:
cc3dff
 	{
cc3dff
 		close_connection_internal(conn);
cc3dff
 	}
cc3dff
+	PR_Unlock(conn->lock);
cc3dff
 
cc3dff
 	return return_value;
cc3dff
 }
cc3dff
@@ -1209,7 +1216,6 @@ conn_disconnect(Repl_Connection *conn)
cc3dff
 	PR_Unlock(conn->lock);
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
 /*
cc3dff
  * Determine if the remote replica supports DS 5.0 replication.
cc3dff
  * Return codes:
cc3dff
@@ -1226,6 +1232,7 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn)
cc3dff
 	ConnResult return_value;
cc3dff
 	int ldap_rc;
cc3dff
 
cc3dff
+	PR_Lock(conn->lock);
cc3dff
 	if (conn_connected(conn))
cc3dff
 	{
cc3dff
 		if (conn->supports_ds50_repl == -1) {
cc3dff
@@ -1273,7 +1280,7 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn)
cc3dff
 				if (IS_DISCONNECT_ERROR(ldap_rc))
cc3dff
 				{
cc3dff
 					conn->last_ldap_error = ldap_rc;	/* specific reason */
cc3dff
-					conn_disconnect(conn);
cc3dff
+					close_connection_internal(conn);
cc3dff
 					return_value = CONN_NOT_CONNECTED;
cc3dff
 				}
cc3dff
 				else
cc3dff
@@ -1293,10 +1300,11 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn)
cc3dff
 		/* Not connected */
cc3dff
 		return_value = CONN_NOT_CONNECTED;
cc3dff
 	}
cc3dff
+	PR_Unlock(conn->lock);
cc3dff
+
cc3dff
 	return return_value;
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
 /*
cc3dff
  * Determine if the remote replica supports DS 7.1 replication.
cc3dff
  * Return codes:
cc3dff
@@ -1313,6 +1321,7 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn)
cc3dff
 	ConnResult return_value;
cc3dff
 	int ldap_rc;
cc3dff
 
cc3dff
+	PR_Lock(conn->lock);
cc3dff
 	if (conn_connected(conn))
cc3dff
 	{
cc3dff
 		if (conn->supports_ds71_repl == -1) {
cc3dff
@@ -1344,7 +1353,7 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn)
cc3dff
 				if (IS_DISCONNECT_ERROR(ldap_rc))
cc3dff
 				{
cc3dff
 					conn->last_ldap_error = ldap_rc;	/* specific reason */
cc3dff
-					conn_disconnect(conn);
cc3dff
+					close_connection_internal(conn);
cc3dff
 					return_value = CONN_NOT_CONNECTED;
cc3dff
 				}
cc3dff
 				else
cc3dff
@@ -1364,6 +1373,8 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn)
cc3dff
 		/* Not connected */
cc3dff
 		return_value = CONN_NOT_CONNECTED;
cc3dff
 	}
cc3dff
+	PR_Unlock(conn->lock);
cc3dff
+
cc3dff
 	return return_value;
cc3dff
 }
cc3dff
 
cc3dff
@@ -1383,6 +1394,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
cc3dff
 	ConnResult return_value;
cc3dff
 	int ldap_rc;
cc3dff
 
cc3dff
+	PR_Lock(conn->lock);
cc3dff
 	if (conn_connected(conn))
cc3dff
 	{
cc3dff
 		if (conn->supports_ds90_repl == -1) {
cc3dff
@@ -1414,7 +1426,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
cc3dff
 				if (IS_DISCONNECT_ERROR(ldap_rc))
cc3dff
 				{
cc3dff
 					conn->last_ldap_error = ldap_rc;        /* specific reason */
cc3dff
-					conn_disconnect(conn);
cc3dff
+					close_connection_internal(conn);
cc3dff
 					return_value = CONN_NOT_CONNECTED;
cc3dff
 				}
cc3dff
 				else
cc3dff
@@ -1423,7 +1435,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
cc3dff
 				}
cc3dff
 			}
cc3dff
 			if (NULL != res)
cc3dff
-                                ldap_msgfree(res);
cc3dff
+				ldap_msgfree(res);
cc3dff
 		}
cc3dff
 		else
cc3dff
 		{
cc3dff
@@ -1435,6 +1447,8 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
cc3dff
 		/* Not connected */
cc3dff
 		return_value = CONN_NOT_CONNECTED;
cc3dff
 	}
cc3dff
+	PR_Unlock(conn->lock);
cc3dff
+
cc3dff
 	return return_value;
cc3dff
 }
cc3dff
 
cc3dff
@@ -1452,7 +1466,6 @@ conn_replica_is_readonly(Repl_Connection *conn)
cc3dff
 	}
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
 /*
cc3dff
  * Return 1 if "value" is a value of attribute type "type" in entry "entry".
cc3dff
  * Otherwise, return 0.
cc3dff
@@ -1501,9 +1514,6 @@ attribute_string_value_present(LDAP *ld, LDAPMessage *entry, const char *type,
cc3dff
 	return return_value;
cc3dff
 }
cc3dff
 
cc3dff
-
cc3dff
-
cc3dff
-
cc3dff
 /*
cc3dff
  * Read the remote server's schema entry, then read the local schema entry,
cc3dff
  * and compare the nsschemacsn attribute. If the local csn is newer, or
cc3dff
@@ -1533,7 +1543,7 @@ conn_push_schema(Repl_Connection *conn, CSN **remotecsn)
cc3dff
 		return_value = CONN_OPERATION_FAILED;
cc3dff
 		slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "NULL remote CSN\n");
cc3dff
 	}
cc3dff
-	else if (!conn_connected(conn))
cc3dff
+	else if (!conn_connected_locked(conn, 0 /* not locked */))
cc3dff
 	{
cc3dff
 		return_value = CONN_NOT_CONNECTED;
cc3dff
 		slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
cc3dff
@@ -1699,6 +1709,7 @@ conn_push_schema(Repl_Connection *conn, CSN **remotecsn)
cc3dff
 	{
cc3dff
 		csn_free(&localcsn);
cc3dff
 	}
cc3dff
+
cc3dff
 	return return_value;
cc3dff
 }
cc3dff
 
cc3dff
diff --git a/ldap/servers/slapd/ldaputil.c b/ldap/servers/slapd/ldaputil.c
cc3dff
index edc8267..08601bd 100644
cc3dff
--- a/ldap/servers/slapd/ldaputil.c
cc3dff
+++ b/ldap/servers/slapd/ldaputil.c
cc3dff
@@ -1011,8 +1011,8 @@ slapi_ldap_bind(
cc3dff
        than the currently unused clientctrls */
cc3dff
     ldap_get_option(ld, LDAP_OPT_CLIENT_CONTROLS, &clientctrls);
cc3dff
     if (clientctrls && clientctrls[0] &&
cc3dff
-	slapi_control_present(clientctrls, START_TLS_OID, NULL, NULL)) {
cc3dff
-	secure = 2;
cc3dff
+        slapi_control_present(clientctrls, START_TLS_OID, NULL, NULL)) {
cc3dff
+        secure = 2;
cc3dff
     } else {
cc3dff
 #if defined(USE_OPENLDAP)
cc3dff
 	/* openldap doesn't have a SSL/TLS yes/no flag - so grab the
cc3dff
@@ -1051,12 +1051,12 @@ slapi_ldap_bind(
cc3dff
 	    slapi_log_error(SLAPI_LOG_SHELL, "slapi_ldap_bind",
cc3dff
 			    "Set up conn to use client auth\n");
cc3dff
 	}
cc3dff
-	bvcreds.bv_val = NULL; /* ignore username and passed in creds */
cc3dff
-	bvcreds.bv_len = 0; /* for external auth */
cc3dff
-	bindid = NULL;
cc3dff
+        bvcreds.bv_val = NULL; /* ignore username and passed in creds */
cc3dff
+        bvcreds.bv_len = 0; /* for external auth */
cc3dff
+        bindid = NULL;
cc3dff
     } else { /* other type of auth */
cc3dff
-	bvcreds.bv_val = (char *)creds;
cc3dff
-	bvcreds.bv_len = creds ? strlen(creds) : 0;
cc3dff
+        bvcreds.bv_val = (char *)creds;
cc3dff
+        bvcreds.bv_len = creds ? strlen(creds) : 0;
cc3dff
     }
cc3dff
 
cc3dff
     if (secure == 2) { /* send start tls */
cc3dff
@@ -1084,31 +1084,29 @@ slapi_ldap_bind(
cc3dff
 			bindid, creds);
cc3dff
 	if ((rc = ldap_sasl_bind(ld, bindid, mech, &bvcreds, serverctrls,
cc3dff
 	                         NULL /* clientctrls */, &mymsgid))) {
cc3dff
-	    char *myhostname = NULL;
cc3dff
-	    char *copy = NULL;
cc3dff
+	    char *hostname = NULL;
cc3dff
+	    char *host_port = NULL;
cc3dff
 	    char *ptr = NULL;
cc3dff
 	    int myerrno = errno;
cc3dff
 	    int gaierr = 0;
cc3dff
 
cc3dff
-	    ldap_get_option(ld, LDAP_OPT_HOST_NAME, &myhostname);
cc3dff
-	    if (myhostname) {
cc3dff
-	        ptr = strchr(myhostname, ':');
cc3dff
+	    ldap_get_option(ld, LDAP_OPT_HOST_NAME, &host_port);
cc3dff
+	    if (host_port) {
cc3dff
+	        ptr = strchr(host_port, ':');
cc3dff
 	        if (ptr) {
cc3dff
-	            copy = slapi_ch_strdup(myhostname);
cc3dff
-	            *(copy + (ptr - myhostname)) = '\0';
cc3dff
-	            slapi_ch_free_string(&myhostname);
cc3dff
-	            myhostname = copy;
cc3dff
+	            hostname = slapi_ch_strdup(host_port);
cc3dff
+	            *(hostname + (ptr - host_port)) = '\0';
cc3dff
 	        }
cc3dff
 	    }
cc3dff
-
cc3dff
 	    if (0 == myerrno) {
cc3dff
 	        struct addrinfo *result = NULL;
cc3dff
-	        gaierr = getaddrinfo(myhostname, NULL, NULL, &result);
cc3dff
+	        gaierr = getaddrinfo(hostname, NULL, NULL, &result);
cc3dff
 	        myerrno = errno;
cc3dff
 	        if (result) {
cc3dff
 	            freeaddrinfo(result);
cc3dff
 	        }
cc3dff
 	    }
cc3dff
+
cc3dff
 	    slapi_log_error(SLAPI_LOG_FATAL, "slapi_ldap_bind",
cc3dff
 			    "Error: could not send bind request for id "
cc3dff
 			    "[%s] authentication mechanism [%s]: error %d (%s), system error %d (%s), "
cc3dff
@@ -1119,8 +1117,9 @@ slapi_ldap_bind(
cc3dff
 			    PR_GetError(), slapd_pr_strerror(PR_GetError()),
cc3dff
 			    myerrno ? myerrno : gaierr,
cc3dff
 			    myerrno ? slapd_system_strerror(myerrno) : gai_strerror(gaierr),
cc3dff
-			    myhostname ? myhostname : "unknown host");
cc3dff
-	    slapi_ch_free_string(&myhostname);
cc3dff
+			    host_port ? host_port : "unknown host");
cc3dff
+	    slapi_ch_free_string(&hostname);
cc3dff
+	    slapi_ch_free_string(&host_port);
cc3dff
 	    goto done;
cc3dff
 	}
cc3dff
 
cc3dff
-- 
cc3dff
1.8.1.4
cc3dff