Blob Blame History Raw
From 5b6deac35adbae20d0821a4530d30f0908ad7478 Mon Sep 17 00:00:00 2001
From: Mark Reynolds <mreynolds@redhat.com>
Date: Mon, 31 Mar 2014 15:17:59 -0400
Subject: [PATCH] Ticket 47759 - Crash in replication when server is under
 write load

Bug Description:  When the server is under alot of load, a race condition allows
                  a replication connection LDAP struct to be freed(unbind) while
                  it is being used by another thread.  This leads to a crash.

Fix Description:  Extend the connection lock to also cover ldap client interaction
                  (e.g. conn->ld struct).

https://fedorahosted.org/389/ticket/47759

Reviewed by: nhosoi & rmeggins(Thanks!!)
(cherry picked from commit 9940ca29ca258891c52640a23adc2851afe59d0e)
(cherry picked from commit 0e576c85c34826c4d63d9578db55f8179b4a1a60)
(cherry picked from commit 2a80b7152823ca16628c2da48614166b8d2104a4)
---
 .../servers/plugins/replication/repl5_connection.c | 89 ++++++++++++----------
 ldap/servers/slapd/ldaputil.c                      | 39 +++++-----
 2 files changed, 69 insertions(+), 59 deletions(-)

diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c
index 668abda..17d1d9c 100644
--- a/ldap/servers/plugins/replication/repl5_connection.c
+++ b/ldap/servers/plugins/replication/repl5_connection.c
@@ -138,6 +138,7 @@ static void repl5_debug_timeout_callback(time_t when, void *arg);
 
 /* Forward declarations */
 static void close_connection_internal(Repl_Connection *conn);
+static void conn_delete_internal(Repl_Connection *conn);
 
 /*
  * Create a new connection object. Returns a pointer to the object, or
@@ -182,11 +183,22 @@ conn_new(Repl_Agmt *agmt)
 	rpc->plain = NULL;
 	return rpc;
 loser:
-	conn_delete(rpc);
+	conn_delete_internal(rpc);
 	slapi_ch_free((void**)&rpc);
 	return NULL;
 }
 
+static PRBool
+conn_connected_locked(Repl_Connection *conn, int locked)
+{
+	PRBool return_value;
+
+	if(!locked) PR_Lock(conn->lock);
+	return_value = STATE_CONNECTED == conn->state;
+	if(!locked) PR_Unlock(conn->lock);
+
+	return return_value;
+}
 
 /*
  * Return PR_TRUE if the connection is in the connected state
@@ -194,14 +206,9 @@ loser:
 static PRBool
 conn_connected(Repl_Connection *conn)
 {
-	PRBool return_value;
-	PR_Lock(conn->lock);
-	return_value = STATE_CONNECTED == conn->state;
-	PR_Unlock(conn->lock);
-	return return_value;
+	return conn_connected_locked(conn, 1);
 }
 
-
 /*
  * Destroy a connection object.
  */
@@ -243,7 +250,6 @@ conn_delete(Repl_Connection *conn)
 		if (slapi_eq_cancel(conn->linger_event) == 1)
 		{
 			/* Event was found and cancelled. Destroy the connection object. */
-			PR_Unlock(conn->lock);
 			destroy_it = PR_TRUE;
 		}
 		else
@@ -254,16 +260,15 @@ conn_delete(Repl_Connection *conn)
 			 * off, so arrange for the event to destroy the object .
 			 */
 			conn->delete_after_linger = PR_TRUE;
-			PR_Unlock(conn->lock);
 		}
 	}
 	if (destroy_it)
 	{
 		conn_delete_internal(conn);
 	}
+	PR_Unlock(conn->lock);
 }
 
-
 /*
  * Return the last operation type processed by the connection
  * object, and the LDAP error encountered.
@@ -327,17 +332,18 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda
 			while (!slapi_is_shutting_down())
 			{
 				/* we have to make sure the update sending thread does not
-				   attempt to call conn_disconnect while we are reading
+				   attempt to close connection while we are reading
 				   results - so lock the conn while we get the results */
 				PR_Lock(conn->lock);
+
 				if ((STATE_CONNECTED != conn->state) || !conn->ld) {
 					rc = -1;
 					return_value = CONN_NOT_CONNECTED;
 					PR_Unlock(conn->lock);
 					break;
 				}
-
 				rc = ldap_result(conn->ld, send_msgid, 1, &local_timeout, &res);
+
 				PR_Unlock(conn->lock);
 
 				if (0 != rc)
@@ -661,8 +667,10 @@ perform_operation(Repl_Connection *conn, int optype, const char *dn,
 	server_controls[1] = update_control;
 	server_controls[2] = NULL;
 
-	/* lock the conn to prevent the result reader thread
-	   from closing the connection out from under us */
+	/*
+	 * Lock the conn to prevent the result reader thread
+	 * from closing the connection out from under us.
+	 */
 	PR_Lock(conn->lock);
 	if (STATE_CONNECTED == conn->state)
 	{
@@ -804,7 +812,6 @@ conn_send_rename(Repl_Connection *conn, const char *dn,
 		NULL /* extop OID */, NULL /* extop payload */, message_id);
 }
 
-
 /*
  * Send an LDAP extended operation.
  */
@@ -818,7 +825,6 @@ conn_send_extended_operation(Repl_Connection *conn, const char *extop_oid,
 		update_control, extop_oid, payload, message_id);
 }
 
-
 /*
  * Synchronously read an entry and return a specific attribute's values.
  * Returns CONN_OPERATION_SUCCESS if successful. Returns
@@ -838,6 +844,8 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn,
 	LDAPMessage *res = NULL;
 	char *attrs[2];
 
+	PR_Lock(conn->lock);
+
 	PR_ASSERT(NULL != type);
 	if (conn_connected(conn))
 	{
@@ -860,7 +868,7 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn,
 		}
 		else if (IS_DISCONNECT_ERROR(ldap_rc))
 		{
-			conn_disconnect(conn);
+			close_connection_internal(conn);
 			return_value = CONN_NOT_CONNECTED;
 		}
 		else
@@ -878,10 +886,11 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn,
 	{
 		return_value = CONN_NOT_CONNECTED;
 	}
+	PR_Unlock(conn->lock);
+
 	return return_value;
 }
 
-
 /*
  * Return an pointer to a string describing the connection's status.
 */
@@ -892,8 +901,6 @@ conn_get_status(Repl_Connection *conn)
 	return conn->status;
 }
 
-
-
 /*
  * Cancel any outstanding linger timer. Should be called when
  * a replication session is beginning.
@@ -925,7 +932,6 @@ conn_cancel_linger(Repl_Connection *conn)
 	PR_Unlock(conn->lock);
 }
 
-
 /*
  * Called when our linger timeout timer expires. This means
  * we should check to see if perhaps the connection's become
@@ -957,7 +963,6 @@ linger_timeout(time_t event_time, void *arg)
 	}
 }
 
-
 /*
  * Indicate that a session is ending. The linger timer starts when
  * this function is called.
@@ -995,8 +1000,6 @@ conn_start_linger(Repl_Connection *conn)
 	PR_Unlock(conn->lock);
 }
 
-
-
 /*
  * If no connection is currently active, opens a connection and binds to
  * the remote server. If a connection is open (e.g. lingering) then
@@ -1015,10 +1018,14 @@ conn_connect(Repl_Connection *conn)
 	ConnResult return_value = CONN_OPERATION_SUCCESS;
 	int pw_ret = 1;
 
-	/** Connection already open just return SUCCESS **/
-	if(conn->state == STATE_CONNECTED) goto done;
-
 	PR_Lock(conn->lock);
+
+	/* Connection already open, just return SUCCESS */
+	if(conn->state == STATE_CONNECTED){
+		PR_Unlock(conn->lock);
+		return return_value;
+	}
+
 	if (conn->flag_agmt_changed) {
 		/* So far we cannot change Hostname and Port */
 		/* slapi_ch_free((void **)&conn->hostname); */
@@ -1033,7 +1040,6 @@ conn_connect(Repl_Connection *conn)
 		conn->port = agmt_get_port(conn->agmt); /* port could be updated */
 		slapi_ch_free((void **)&conn->plain);
 	}
-	PR_Unlock(conn->lock);
 
 	creds = agmt_get_credentials(conn->agmt);
 
@@ -1174,6 +1180,7 @@ done:
 	{
 		close_connection_internal(conn);
 	}
+	PR_Unlock(conn->lock);
 
 	return return_value;
 }
@@ -1209,7 +1216,6 @@ conn_disconnect(Repl_Connection *conn)
 	PR_Unlock(conn->lock);
 }
 
-
 /*
  * Determine if the remote replica supports DS 5.0 replication.
  * Return codes:
@@ -1226,6 +1232,7 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn)
 	ConnResult return_value;
 	int ldap_rc;
 
+	PR_Lock(conn->lock);
 	if (conn_connected(conn))
 	{
 		if (conn->supports_ds50_repl == -1) {
@@ -1273,7 +1280,7 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn)
 				if (IS_DISCONNECT_ERROR(ldap_rc))
 				{
 					conn->last_ldap_error = ldap_rc;	/* specific reason */
-					conn_disconnect(conn);
+					close_connection_internal(conn);
 					return_value = CONN_NOT_CONNECTED;
 				}
 				else
@@ -1293,10 +1300,11 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn)
 		/* Not connected */
 		return_value = CONN_NOT_CONNECTED;
 	}
+	PR_Unlock(conn->lock);
+
 	return return_value;
 }
 
-
 /*
  * Determine if the remote replica supports DS 7.1 replication.
  * Return codes:
@@ -1313,6 +1321,7 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn)
 	ConnResult return_value;
 	int ldap_rc;
 
+	PR_Lock(conn->lock);
 	if (conn_connected(conn))
 	{
 		if (conn->supports_ds71_repl == -1) {
@@ -1344,7 +1353,7 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn)
 				if (IS_DISCONNECT_ERROR(ldap_rc))
 				{
 					conn->last_ldap_error = ldap_rc;	/* specific reason */
-					conn_disconnect(conn);
+					close_connection_internal(conn);
 					return_value = CONN_NOT_CONNECTED;
 				}
 				else
@@ -1364,6 +1373,8 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn)
 		/* Not connected */
 		return_value = CONN_NOT_CONNECTED;
 	}
+	PR_Unlock(conn->lock);
+
 	return return_value;
 }
 
@@ -1383,6 +1394,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
 	ConnResult return_value;
 	int ldap_rc;
 
+	PR_Lock(conn->lock);
 	if (conn_connected(conn))
 	{
 		if (conn->supports_ds90_repl == -1) {
@@ -1414,7 +1426,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
 				if (IS_DISCONNECT_ERROR(ldap_rc))
 				{
 					conn->last_ldap_error = ldap_rc;        /* specific reason */
-					conn_disconnect(conn);
+					close_connection_internal(conn);
 					return_value = CONN_NOT_CONNECTED;
 				}
 				else
@@ -1423,7 +1435,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
 				}
 			}
 			if (NULL != res)
-                                ldap_msgfree(res);
+				ldap_msgfree(res);
 		}
 		else
 		{
@@ -1435,6 +1447,8 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn)
 		/* Not connected */
 		return_value = CONN_NOT_CONNECTED;
 	}
+	PR_Unlock(conn->lock);
+
 	return return_value;
 }
 
@@ -1452,7 +1466,6 @@ conn_replica_is_readonly(Repl_Connection *conn)
 	}
 }
 
-
 /*
  * Return 1 if "value" is a value of attribute type "type" in entry "entry".
  * Otherwise, return 0.
@@ -1501,9 +1514,6 @@ attribute_string_value_present(LDAP *ld, LDAPMessage *entry, const char *type,
 	return return_value;
 }
 
-
-
-
 /*
  * Read the remote server's schema entry, then read the local schema entry,
  * and compare the nsschemacsn attribute. If the local csn is newer, or
@@ -1533,7 +1543,7 @@ conn_push_schema(Repl_Connection *conn, CSN **remotecsn)
 		return_value = CONN_OPERATION_FAILED;
 		slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "NULL remote CSN\n");
 	}
-	else if (!conn_connected(conn))
+	else if (!conn_connected_locked(conn, 0 /* not locked */))
 	{
 		return_value = CONN_NOT_CONNECTED;
 		slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
@@ -1699,6 +1709,7 @@ conn_push_schema(Repl_Connection *conn, CSN **remotecsn)
 	{
 		csn_free(&localcsn);
 	}
+
 	return return_value;
 }
 
diff --git a/ldap/servers/slapd/ldaputil.c b/ldap/servers/slapd/ldaputil.c
index edc8267..08601bd 100644
--- a/ldap/servers/slapd/ldaputil.c
+++ b/ldap/servers/slapd/ldaputil.c
@@ -1011,8 +1011,8 @@ slapi_ldap_bind(
        than the currently unused clientctrls */
     ldap_get_option(ld, LDAP_OPT_CLIENT_CONTROLS, &clientctrls);
     if (clientctrls && clientctrls[0] &&
-	slapi_control_present(clientctrls, START_TLS_OID, NULL, NULL)) {
-	secure = 2;
+        slapi_control_present(clientctrls, START_TLS_OID, NULL, NULL)) {
+        secure = 2;
     } else {
 #if defined(USE_OPENLDAP)
 	/* openldap doesn't have a SSL/TLS yes/no flag - so grab the
@@ -1051,12 +1051,12 @@ slapi_ldap_bind(
 	    slapi_log_error(SLAPI_LOG_SHELL, "slapi_ldap_bind",
 			    "Set up conn to use client auth\n");
 	}
-	bvcreds.bv_val = NULL; /* ignore username and passed in creds */
-	bvcreds.bv_len = 0; /* for external auth */
-	bindid = NULL;
+        bvcreds.bv_val = NULL; /* ignore username and passed in creds */
+        bvcreds.bv_len = 0; /* for external auth */
+        bindid = NULL;
     } else { /* other type of auth */
-	bvcreds.bv_val = (char *)creds;
-	bvcreds.bv_len = creds ? strlen(creds) : 0;
+        bvcreds.bv_val = (char *)creds;
+        bvcreds.bv_len = creds ? strlen(creds) : 0;
     }
 
     if (secure == 2) { /* send start tls */
@@ -1084,31 +1084,29 @@ slapi_ldap_bind(
 			bindid, creds);
 	if ((rc = ldap_sasl_bind(ld, bindid, mech, &bvcreds, serverctrls,
 	                         NULL /* clientctrls */, &mymsgid))) {
-	    char *myhostname = NULL;
-	    char *copy = NULL;
+	    char *hostname = NULL;
+	    char *host_port = NULL;
 	    char *ptr = NULL;
 	    int myerrno = errno;
 	    int gaierr = 0;
 
-	    ldap_get_option(ld, LDAP_OPT_HOST_NAME, &myhostname);
-	    if (myhostname) {
-	        ptr = strchr(myhostname, ':');
+	    ldap_get_option(ld, LDAP_OPT_HOST_NAME, &host_port);
+	    if (host_port) {
+	        ptr = strchr(host_port, ':');
 	        if (ptr) {
-	            copy = slapi_ch_strdup(myhostname);
-	            *(copy + (ptr - myhostname)) = '\0';
-	            slapi_ch_free_string(&myhostname);
-	            myhostname = copy;
+	            hostname = slapi_ch_strdup(host_port);
+	            *(hostname + (ptr - host_port)) = '\0';
 	        }
 	    }
-
 	    if (0 == myerrno) {
 	        struct addrinfo *result = NULL;
-	        gaierr = getaddrinfo(myhostname, NULL, NULL, &result);
+	        gaierr = getaddrinfo(hostname, NULL, NULL, &result);
 	        myerrno = errno;
 	        if (result) {
 	            freeaddrinfo(result);
 	        }
 	    }
+
 	    slapi_log_error(SLAPI_LOG_FATAL, "slapi_ldap_bind",
 			    "Error: could not send bind request for id "
 			    "[%s] authentication mechanism [%s]: error %d (%s), system error %d (%s), "
@@ -1119,8 +1117,9 @@ slapi_ldap_bind(
 			    PR_GetError(), slapd_pr_strerror(PR_GetError()),
 			    myerrno ? myerrno : gaierr,
 			    myerrno ? slapd_system_strerror(myerrno) : gai_strerror(gaierr),
-			    myhostname ? myhostname : "unknown host");
-	    slapi_ch_free_string(&myhostname);
+			    host_port ? host_port : "unknown host");
+	    slapi_ch_free_string(&hostname);
+	    slapi_ch_free_string(&host_port);
 	    goto done;
 	}
 
-- 
1.8.1.4