From 5b6deac35adbae20d0821a4530d30f0908ad7478 Mon Sep 17 00:00:00 2001 From: Mark Reynolds Date: Mon, 31 Mar 2014 15:17:59 -0400 Subject: [PATCH] Ticket 47759 - Crash in replication when server is under write load Bug Description: When the server is under alot of load, a race condition allows a replication connection LDAP struct to be freed(unbind) while it is being used by another thread. This leads to a crash. Fix Description: Extend the connection lock to also cover ldap client interaction (e.g. conn->ld struct). https://fedorahosted.org/389/ticket/47759 Reviewed by: nhosoi & rmeggins(Thanks!!) (cherry picked from commit 9940ca29ca258891c52640a23adc2851afe59d0e) (cherry picked from commit 0e576c85c34826c4d63d9578db55f8179b4a1a60) (cherry picked from commit 2a80b7152823ca16628c2da48614166b8d2104a4) --- .../servers/plugins/replication/repl5_connection.c | 89 ++++++++++++---------- ldap/servers/slapd/ldaputil.c | 39 +++++----- 2 files changed, 69 insertions(+), 59 deletions(-) diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c index 668abda..17d1d9c 100644 --- a/ldap/servers/plugins/replication/repl5_connection.c +++ b/ldap/servers/plugins/replication/repl5_connection.c @@ -138,6 +138,7 @@ static void repl5_debug_timeout_callback(time_t when, void *arg); /* Forward declarations */ static void close_connection_internal(Repl_Connection *conn); +static void conn_delete_internal(Repl_Connection *conn); /* * Create a new connection object. Returns a pointer to the object, or @@ -182,11 +183,22 @@ conn_new(Repl_Agmt *agmt) rpc->plain = NULL; return rpc; loser: - conn_delete(rpc); + conn_delete_internal(rpc); slapi_ch_free((void**)&rpc); return NULL; } +static PRBool +conn_connected_locked(Repl_Connection *conn, int locked) +{ + PRBool return_value; + + if(!locked) PR_Lock(conn->lock); + return_value = STATE_CONNECTED == conn->state; + if(!locked) PR_Unlock(conn->lock); + + return return_value; +} /* * Return PR_TRUE if the connection is in the connected state @@ -194,14 +206,9 @@ loser: static PRBool conn_connected(Repl_Connection *conn) { - PRBool return_value; - PR_Lock(conn->lock); - return_value = STATE_CONNECTED == conn->state; - PR_Unlock(conn->lock); - return return_value; + return conn_connected_locked(conn, 1); } - /* * Destroy a connection object. */ @@ -243,7 +250,6 @@ conn_delete(Repl_Connection *conn) if (slapi_eq_cancel(conn->linger_event) == 1) { /* Event was found and cancelled. Destroy the connection object. */ - PR_Unlock(conn->lock); destroy_it = PR_TRUE; } else @@ -254,16 +260,15 @@ conn_delete(Repl_Connection *conn) * off, so arrange for the event to destroy the object . */ conn->delete_after_linger = PR_TRUE; - PR_Unlock(conn->lock); } } if (destroy_it) { conn_delete_internal(conn); } + PR_Unlock(conn->lock); } - /* * Return the last operation type processed by the connection * object, and the LDAP error encountered. @@ -327,17 +332,18 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda while (!slapi_is_shutting_down()) { /* we have to make sure the update sending thread does not - attempt to call conn_disconnect while we are reading + attempt to close connection while we are reading results - so lock the conn while we get the results */ PR_Lock(conn->lock); + if ((STATE_CONNECTED != conn->state) || !conn->ld) { rc = -1; return_value = CONN_NOT_CONNECTED; PR_Unlock(conn->lock); break; } - rc = ldap_result(conn->ld, send_msgid, 1, &local_timeout, &res); + PR_Unlock(conn->lock); if (0 != rc) @@ -661,8 +667,10 @@ perform_operation(Repl_Connection *conn, int optype, const char *dn, server_controls[1] = update_control; server_controls[2] = NULL; - /* lock the conn to prevent the result reader thread - from closing the connection out from under us */ + /* + * Lock the conn to prevent the result reader thread + * from closing the connection out from under us. + */ PR_Lock(conn->lock); if (STATE_CONNECTED == conn->state) { @@ -804,7 +812,6 @@ conn_send_rename(Repl_Connection *conn, const char *dn, NULL /* extop OID */, NULL /* extop payload */, message_id); } - /* * Send an LDAP extended operation. */ @@ -818,7 +825,6 @@ conn_send_extended_operation(Repl_Connection *conn, const char *extop_oid, update_control, extop_oid, payload, message_id); } - /* * Synchronously read an entry and return a specific attribute's values. * Returns CONN_OPERATION_SUCCESS if successful. Returns @@ -838,6 +844,8 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn, LDAPMessage *res = NULL; char *attrs[2]; + PR_Lock(conn->lock); + PR_ASSERT(NULL != type); if (conn_connected(conn)) { @@ -860,7 +868,7 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn, } else if (IS_DISCONNECT_ERROR(ldap_rc)) { - conn_disconnect(conn); + close_connection_internal(conn); return_value = CONN_NOT_CONNECTED; } else @@ -878,10 +886,11 @@ conn_read_entry_attribute(Repl_Connection *conn, const char *dn, { return_value = CONN_NOT_CONNECTED; } + PR_Unlock(conn->lock); + return return_value; } - /* * Return an pointer to a string describing the connection's status. */ @@ -892,8 +901,6 @@ conn_get_status(Repl_Connection *conn) return conn->status; } - - /* * Cancel any outstanding linger timer. Should be called when * a replication session is beginning. @@ -925,7 +932,6 @@ conn_cancel_linger(Repl_Connection *conn) PR_Unlock(conn->lock); } - /* * Called when our linger timeout timer expires. This means * we should check to see if perhaps the connection's become @@ -957,7 +963,6 @@ linger_timeout(time_t event_time, void *arg) } } - /* * Indicate that a session is ending. The linger timer starts when * this function is called. @@ -995,8 +1000,6 @@ conn_start_linger(Repl_Connection *conn) PR_Unlock(conn->lock); } - - /* * If no connection is currently active, opens a connection and binds to * the remote server. If a connection is open (e.g. lingering) then @@ -1015,10 +1018,14 @@ conn_connect(Repl_Connection *conn) ConnResult return_value = CONN_OPERATION_SUCCESS; int pw_ret = 1; - /** Connection already open just return SUCCESS **/ - if(conn->state == STATE_CONNECTED) goto done; - PR_Lock(conn->lock); + + /* Connection already open, just return SUCCESS */ + if(conn->state == STATE_CONNECTED){ + PR_Unlock(conn->lock); + return return_value; + } + if (conn->flag_agmt_changed) { /* So far we cannot change Hostname and Port */ /* slapi_ch_free((void **)&conn->hostname); */ @@ -1033,7 +1040,6 @@ conn_connect(Repl_Connection *conn) conn->port = agmt_get_port(conn->agmt); /* port could be updated */ slapi_ch_free((void **)&conn->plain); } - PR_Unlock(conn->lock); creds = agmt_get_credentials(conn->agmt); @@ -1174,6 +1180,7 @@ done: { close_connection_internal(conn); } + PR_Unlock(conn->lock); return return_value; } @@ -1209,7 +1216,6 @@ conn_disconnect(Repl_Connection *conn) PR_Unlock(conn->lock); } - /* * Determine if the remote replica supports DS 5.0 replication. * Return codes: @@ -1226,6 +1232,7 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn) ConnResult return_value; int ldap_rc; + PR_Lock(conn->lock); if (conn_connected(conn)) { if (conn->supports_ds50_repl == -1) { @@ -1273,7 +1280,7 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn) if (IS_DISCONNECT_ERROR(ldap_rc)) { conn->last_ldap_error = ldap_rc; /* specific reason */ - conn_disconnect(conn); + close_connection_internal(conn); return_value = CONN_NOT_CONNECTED; } else @@ -1293,10 +1300,11 @@ conn_replica_supports_ds5_repl(Repl_Connection *conn) /* Not connected */ return_value = CONN_NOT_CONNECTED; } + PR_Unlock(conn->lock); + return return_value; } - /* * Determine if the remote replica supports DS 7.1 replication. * Return codes: @@ -1313,6 +1321,7 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn) ConnResult return_value; int ldap_rc; + PR_Lock(conn->lock); if (conn_connected(conn)) { if (conn->supports_ds71_repl == -1) { @@ -1344,7 +1353,7 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn) if (IS_DISCONNECT_ERROR(ldap_rc)) { conn->last_ldap_error = ldap_rc; /* specific reason */ - conn_disconnect(conn); + close_connection_internal(conn); return_value = CONN_NOT_CONNECTED; } else @@ -1364,6 +1373,8 @@ conn_replica_supports_ds71_repl(Repl_Connection *conn) /* Not connected */ return_value = CONN_NOT_CONNECTED; } + PR_Unlock(conn->lock); + return return_value; } @@ -1383,6 +1394,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn) ConnResult return_value; int ldap_rc; + PR_Lock(conn->lock); if (conn_connected(conn)) { if (conn->supports_ds90_repl == -1) { @@ -1414,7 +1426,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn) if (IS_DISCONNECT_ERROR(ldap_rc)) { conn->last_ldap_error = ldap_rc; /* specific reason */ - conn_disconnect(conn); + close_connection_internal(conn); return_value = CONN_NOT_CONNECTED; } else @@ -1423,7 +1435,7 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn) } } if (NULL != res) - ldap_msgfree(res); + ldap_msgfree(res); } else { @@ -1435,6 +1447,8 @@ conn_replica_supports_ds90_repl(Repl_Connection *conn) /* Not connected */ return_value = CONN_NOT_CONNECTED; } + PR_Unlock(conn->lock); + return return_value; } @@ -1452,7 +1466,6 @@ conn_replica_is_readonly(Repl_Connection *conn) } } - /* * Return 1 if "value" is a value of attribute type "type" in entry "entry". * Otherwise, return 0. @@ -1501,9 +1514,6 @@ attribute_string_value_present(LDAP *ld, LDAPMessage *entry, const char *type, return return_value; } - - - /* * Read the remote server's schema entry, then read the local schema entry, * and compare the nsschemacsn attribute. If the local csn is newer, or @@ -1533,7 +1543,7 @@ conn_push_schema(Repl_Connection *conn, CSN **remotecsn) return_value = CONN_OPERATION_FAILED; slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, "NULL remote CSN\n"); } - else if (!conn_connected(conn)) + else if (!conn_connected_locked(conn, 0 /* not locked */)) { return_value = CONN_NOT_CONNECTED; slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, @@ -1699,6 +1709,7 @@ conn_push_schema(Repl_Connection *conn, CSN **remotecsn) { csn_free(&localcsn); } + return return_value; } diff --git a/ldap/servers/slapd/ldaputil.c b/ldap/servers/slapd/ldaputil.c index edc8267..08601bd 100644 --- a/ldap/servers/slapd/ldaputil.c +++ b/ldap/servers/slapd/ldaputil.c @@ -1011,8 +1011,8 @@ slapi_ldap_bind( than the currently unused clientctrls */ ldap_get_option(ld, LDAP_OPT_CLIENT_CONTROLS, &clientctrls); if (clientctrls && clientctrls[0] && - slapi_control_present(clientctrls, START_TLS_OID, NULL, NULL)) { - secure = 2; + slapi_control_present(clientctrls, START_TLS_OID, NULL, NULL)) { + secure = 2; } else { #if defined(USE_OPENLDAP) /* openldap doesn't have a SSL/TLS yes/no flag - so grab the @@ -1051,12 +1051,12 @@ slapi_ldap_bind( slapi_log_error(SLAPI_LOG_SHELL, "slapi_ldap_bind", "Set up conn to use client auth\n"); } - bvcreds.bv_val = NULL; /* ignore username and passed in creds */ - bvcreds.bv_len = 0; /* for external auth */ - bindid = NULL; + bvcreds.bv_val = NULL; /* ignore username and passed in creds */ + bvcreds.bv_len = 0; /* for external auth */ + bindid = NULL; } else { /* other type of auth */ - bvcreds.bv_val = (char *)creds; - bvcreds.bv_len = creds ? strlen(creds) : 0; + bvcreds.bv_val = (char *)creds; + bvcreds.bv_len = creds ? strlen(creds) : 0; } if (secure == 2) { /* send start tls */ @@ -1084,31 +1084,29 @@ slapi_ldap_bind( bindid, creds); if ((rc = ldap_sasl_bind(ld, bindid, mech, &bvcreds, serverctrls, NULL /* clientctrls */, &mymsgid))) { - char *myhostname = NULL; - char *copy = NULL; + char *hostname = NULL; + char *host_port = NULL; char *ptr = NULL; int myerrno = errno; int gaierr = 0; - ldap_get_option(ld, LDAP_OPT_HOST_NAME, &myhostname); - if (myhostname) { - ptr = strchr(myhostname, ':'); + ldap_get_option(ld, LDAP_OPT_HOST_NAME, &host_port); + if (host_port) { + ptr = strchr(host_port, ':'); if (ptr) { - copy = slapi_ch_strdup(myhostname); - *(copy + (ptr - myhostname)) = '\0'; - slapi_ch_free_string(&myhostname); - myhostname = copy; + hostname = slapi_ch_strdup(host_port); + *(hostname + (ptr - host_port)) = '\0'; } } - if (0 == myerrno) { struct addrinfo *result = NULL; - gaierr = getaddrinfo(myhostname, NULL, NULL, &result); + gaierr = getaddrinfo(hostname, NULL, NULL, &result); myerrno = errno; if (result) { freeaddrinfo(result); } } + slapi_log_error(SLAPI_LOG_FATAL, "slapi_ldap_bind", "Error: could not send bind request for id " "[%s] authentication mechanism [%s]: error %d (%s), system error %d (%s), " @@ -1119,8 +1117,9 @@ slapi_ldap_bind( PR_GetError(), slapd_pr_strerror(PR_GetError()), myerrno ? myerrno : gaierr, myerrno ? slapd_system_strerror(myerrno) : gai_strerror(gaierr), - myhostname ? myhostname : "unknown host"); - slapi_ch_free_string(&myhostname); + host_port ? host_port : "unknown host"); + slapi_ch_free_string(&hostname); + slapi_ch_free_string(&host_port); goto done; } -- 1.8.1.4