From 4d825079a9bb622211d3709ea2496f533b8e51d1 Mon Sep 17 00:00:00 2001 From: Noriko Hosoi Date: Fri, 5 Oct 2012 17:56:22 -0700 Subject: [PATCH 2/5] Bug 863576 - Dirsrv deadlock locking up IPA https://bugzilla.redhat.com/show_bug.cgi?id=863576 Bug Description: Abandon of a Simple Paged Results request causes the self deadlock. When abandoning a simple paged result request, the mutex for the connection object c_mutex is locked in do_abandon. But to free a pagedresult massage id, pagedresults_free_one_msgid called from do_abandon tries to acquire lock on c_mutex again. The NSPR lock function PR_Lock is not self re-entrant. Thus the server hangs there due to the self-deadlock. Fix Description: This patch is removing to call PR_Lock(c_mutex) in pagedresults_free_one_msgid and renamed it to pagedresults_free_ one_msgid_nolock. To maintain the consistency, "_nolock" is added to other pagedresults apis which do not call PR_Lock in it. Also, stricter locking on c_mutex is being added to pagedresults_ parse_control_value to protect the pagedresults related field in the connection object. (cherry picked from commit c19bb9dd1e95ee98a53a06f3d7eefb4dce5bc0ef) --- ldap/servers/slapd/abandon.c | 2 +- ldap/servers/slapd/connection.c | 4 +- ldap/servers/slapd/daemon.c | 2 +- ldap/servers/slapd/pagedresults.c | 130 +++++++++++++++++++------------------ ldap/servers/slapd/proto-slap.h | 8 +- 5 files changed, 74 insertions(+), 72 deletions(-) diff --git a/ldap/servers/slapd/abandon.c b/ldap/servers/slapd/abandon.c index 4f00da9..094ae95 100644 --- a/ldap/servers/slapd/abandon.c +++ b/ldap/servers/slapd/abandon.c @@ -153,7 +153,7 @@ do_abandon( Slapi_PBlock *pb ) } if ( op_is_pagedresults(o) ) { - if ( 0 == pagedresults_free_one_msgid(pb->pb_conn, id) ) { + if ( 0 == pagedresults_free_one_msgid_nolock(pb->pb_conn, id) ) { slapi_log_access( LDAP_DEBUG_STATS, "conn=%" NSPRIu64 " op=%d ABANDON targetop=Simple Paged Results\n", pb->pb_conn->c_connid, pb->pb_op->o_opid ); diff --git a/ldap/servers/slapd/connection.c b/ldap/servers/slapd/connection.c index 9e43104..a3b1df5 100644 --- a/ldap/servers/slapd/connection.c +++ b/ldap/servers/slapd/connection.c @@ -2094,7 +2094,7 @@ void connection_enter_leave_turbo(Connection *conn, int current_turbo_flag, int PR_Lock(conn->c_mutex); /* We can already be in turbo mode, or not */ current_mode = current_turbo_flag; - if (pagedresults_in_use(conn)) { + if (pagedresults_in_use_nolock(conn)) { /* PAGED_RESULTS does not need turbo mode */ new_mode = 0; } else if (conn->c_private->operation_rate == 0) { @@ -2780,7 +2780,7 @@ disconnect_server_nomutex( Connection *conn, PRUint64 opconnid, int opid, PRErro connection_abandon_operations( conn ); /* needed here to ensure simple paged results timeout properly and * don't impact subsequent ops */ - pagedresults_reset_timedout(conn); + pagedresults_reset_timedout_nolock(conn); if (! config_check_referral_mode()) { /* diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c index 597e131..b611f5c 100644 --- a/ldap/servers/slapd/daemon.c +++ b/ldap/servers/slapd/daemon.c @@ -1693,7 +1693,7 @@ setup_pr_read_pds(Connection_Table *ct, PRFileDesc **n_tcps, PRFileDesc **s_tcps { int add_fd = 1; /* check timeout for PAGED RESULTS */ - if (pagedresults_is_timedout(c)) + if (pagedresults_is_timedout_nolock(c)) { /* Exceeded the timelimit; disconnect the client */ disconnect_server_nomutex(c, c->c_connid, -1, diff --git a/ldap/servers/slapd/pagedresults.c b/ldap/servers/slapd/pagedresults.c index ea7de14..d445c06 100644 --- a/ldap/servers/slapd/pagedresults.c +++ b/ldap/servers/slapd/pagedresults.c @@ -64,6 +64,7 @@ pagedresults_parse_control_value( Slapi_PBlock *pb, struct berval cookie = {0}; Connection *conn = pb->pb_conn; Operation *op = pb->pb_op; + BerElement *ber = NULL; LDAPDebug0Args(LDAP_DEBUG_TRACE, "--> pagedresults_parse_control_value\n"); if ( NULL == conn || NULL == op || NULL == pagesize || NULL == index ) { @@ -76,70 +77,71 @@ pagedresults_parse_control_value( Slapi_PBlock *pb, if ( psbvp->bv_len == 0 || psbvp->bv_val == NULL ) { - rc = LDAP_PROTOCOL_ERROR; + LDAPDebug0Args(LDAP_DEBUG_ANY, + "<-- pagedresults_parse_control_value: no control value\n"); + return LDAP_PROTOCOL_ERROR; } - else + ber = ber_init( psbvp ); + if ( ber == NULL ) { - BerElement *ber = ber_init( psbvp ); - if ( ber == NULL ) - { - rc = LDAP_OPERATIONS_ERROR; - } - else - { - if ( ber_scanf( ber, "{io}", pagesize, &cookie ) == LBER_ERROR ) - { - rc = LDAP_PROTOCOL_ERROR; + LDAPDebug0Args(LDAP_DEBUG_ANY, + "<-- pagedresults_parse_control_value: no control value\n"); + return LDAP_PROTOCOL_ERROR; + } + if ( ber_scanf( ber, "{io}", pagesize, &cookie ) == LBER_ERROR ) + { + LDAPDebug0Args(LDAP_DEBUG_ANY, + "<-- pagedresults_parse_control_value: corrupted control value\n"); + return LDAP_PROTOCOL_ERROR; + } + + PR_Lock(conn->c_mutex); + /* the ber encoding is no longer needed */ + ber_free(ber, 1); + if ( cookie.bv_len <= 0 ) { + int i; + int maxlen; + /* first time? */ + maxlen = conn->c_pagedresults.prl_maxlen; + if (conn->c_pagedresults.prl_count == maxlen) { + if (0 == maxlen) { /* first time */ + conn->c_pagedresults.prl_maxlen = 1; + conn->c_pagedresults.prl_list = + (PagedResults *)slapi_ch_calloc(1, + sizeof(PagedResults)); + } else { + /* new max length */ + conn->c_pagedresults.prl_maxlen *= 2; + conn->c_pagedresults.prl_list = + (PagedResults *)slapi_ch_realloc( + (char *)conn->c_pagedresults.prl_list, + sizeof(PagedResults) * + conn->c_pagedresults.prl_maxlen); + /* initialze newly allocated area */ + memset(conn->c_pagedresults.prl_list + maxlen, '\0', + sizeof(PagedResults) * maxlen); } - /* the ber encoding is no longer needed */ - ber_free(ber, 1); - if ( cookie.bv_len <= 0 ) { - int i; - int maxlen; - /* first time? */ - PR_Lock(conn->c_mutex); - maxlen = conn->c_pagedresults.prl_maxlen; - if (conn->c_pagedresults.prl_count == maxlen) { - if (0 == maxlen) { /* first time */ - conn->c_pagedresults.prl_maxlen = 1; - conn->c_pagedresults.prl_list = - (PagedResults *)slapi_ch_calloc(1, - sizeof(PagedResults)); - } else { - /* new max length */ - conn->c_pagedresults.prl_maxlen *= 2; - conn->c_pagedresults.prl_list = - (PagedResults *)slapi_ch_realloc( - (char *)conn->c_pagedresults.prl_list, - sizeof(PagedResults) * - conn->c_pagedresults.prl_maxlen); - /* initialze newly allocated area */ - memset(conn->c_pagedresults.prl_list + maxlen, '\0', - sizeof(PagedResults) * maxlen); - } - *index = maxlen; /* the first position in the new area */ - } else { - for (i = 0; i < conn->c_pagedresults.prl_maxlen; i++) { - if (!conn->c_pagedresults.prl_list[i].pr_current_be) { - *index = i; - break; - } - } + *index = maxlen; /* the first position in the new area */ + } else { + for (i = 0; i < conn->c_pagedresults.prl_maxlen; i++) { + if (!conn->c_pagedresults.prl_list[i].pr_current_be) { + *index = i; + break; } - conn->c_pagedresults.prl_count++; - PR_Unlock(conn->c_mutex); - } else { - /* Repeated paged results request. - * PagedResults is already allocated. */ - char *ptr = slapi_ch_malloc(cookie.bv_len + 1); - memcpy(ptr, cookie.bv_val, cookie.bv_len); - *(ptr+cookie.bv_len) = '\0'; - *index = strtol(ptr, NULL, 10); - slapi_ch_free_string(&ptr); } - slapi_ch_free((void **)&cookie.bv_val); } + conn->c_pagedresults.prl_count++; + } else { + /* Repeated paged results request. + * PagedResults is already allocated. */ + char *ptr = slapi_ch_malloc(cookie.bv_len + 1); + memcpy(ptr, cookie.bv_val, cookie.bv_len); + *(ptr+cookie.bv_len) = '\0'; + *index = strtol(ptr, NULL, 10); + slapi_ch_free_string(&ptr); } + slapi_ch_free((void **)&cookie.bv_val); + if ((*index > -1) && (*index < conn->c_pagedresults.prl_maxlen)) { /* Need to keep the latest msgid to prepare for the abandon. */ conn->c_pagedresults.prl_list[*index].pr_msgid = op->o_msgid; @@ -149,6 +151,7 @@ pagedresults_parse_control_value( Slapi_PBlock *pb, "pagedresults_parse_control_value: invalid cookie: %d\n", *index); } + PR_Unlock(conn->c_mutex); LDAPDebug1Arg(LDAP_DEBUG_TRACE, "<-- pagedresults_parse_control_value: idx %d\n", *index); @@ -261,7 +264,7 @@ pagedresults_free_one( Connection *conn, int index ) } int -pagedresults_free_one_msgid( Connection *conn, ber_int_t msgid ) +pagedresults_free_one_msgid_nolock( Connection *conn, ber_int_t msgid ) { int rc = -1; int i; @@ -269,9 +272,9 @@ pagedresults_free_one_msgid( Connection *conn, ber_int_t msgid ) LDAPDebug1Arg(LDAP_DEBUG_TRACE, "--> pagedresults_free_one: msgid=%d\n", msgid); if (conn && (msgid > -1)) { - PR_Lock(conn->c_mutex); if (conn->c_pagedresults.prl_count <= 0) { - LDAPDebug2Args(LDAP_DEBUG_TRACE, "pagedresults_free_one_msgid: " + LDAPDebug2Args(LDAP_DEBUG_TRACE, + "pagedresults_free_one_msgid_nolock: " "conn=%d paged requests list count is %d\n", conn->c_connid, conn->c_pagedresults.prl_count); } else { @@ -285,7 +288,6 @@ pagedresults_free_one_msgid( Connection *conn, ber_int_t msgid ) } } } - PR_Unlock(conn->c_mutex); } LDAPDebug1Arg(LDAP_DEBUG_TRACE, "<-- pagedresults_free_one: %d\n", rc); @@ -720,7 +722,7 @@ pagedresults_reset_processing(Connection *conn, int index) /* Are all the paged results requests timed out? */ int -pagedresults_is_timedout(Connection *conn) +pagedresults_is_timedout_nolock(Connection *conn) { int i; PagedResults *prp = NULL; @@ -753,7 +755,7 @@ pagedresults_is_timedout(Connection *conn) /* reset all timeout */ int -pagedresults_reset_timedout(Connection *conn) +pagedresults_reset_timedout_nolock(Connection *conn) { int i; PagedResults *prp = NULL; @@ -773,7 +775,7 @@ pagedresults_reset_timedout(Connection *conn) /* paged results requests are in progress. */ int -pagedresults_in_use(Connection *conn) +pagedresults_in_use_nolock(Connection *conn) { LDAPDebug0Args(LDAP_DEBUG_TRACE, "--> pagedresults_in_use\n"); if (NULL == conn) { diff --git a/ldap/servers/slapd/proto-slap.h b/ldap/servers/slapd/proto-slap.h index 1b62c13..7e438b7 100644 --- a/ldap/servers/slapd/proto-slap.h +++ b/ldap/servers/slapd/proto-slap.h @@ -1421,11 +1421,11 @@ int pagedresults_set_timelimit(Connection *conn, time_t timelimit, int index); int pagedresults_cleanup(Connection *conn, int needlock); int pagedresults_check_or_set_processing(Connection *conn, int index); int pagedresults_reset_processing(Connection *conn, int index); -int pagedresults_is_timedout(Connection *conn); -int pagedresults_reset_timedout(Connection *conn); -int pagedresults_in_use(Connection *conn); +int pagedresults_is_timedout_nolock(Connection *conn); +int pagedresults_reset_timedout_nolock(Connection *conn); +int pagedresults_in_use_nolock(Connection *conn); int pagedresults_free_one(Connection *conn, int index); -int pagedresults_free_one_msgid( Connection *conn, ber_int_t msgid ); +int pagedresults_free_one_msgid_nolock( Connection *conn, ber_int_t msgid ); int op_is_pagedresults(Operation *op); int pagedresults_cleanup_all(Connection *conn, int needlock); void op_set_pagedresults(Operation *op); -- 1.7.7.6