From 6b7f87a557170164518d7c3b8e408304f2a9c1f4 Mon Sep 17 00:00:00 2001 From: Thierry Bordaz Date: Fri, 17 May 2019 14:31:45 +0200 Subject: [PATCH] Ticket 50389 - ns-slapd craches while two threads are polling the same connection Bug Description: nspr IO is not multi-threaded safe. 389-ds should not be in a situation where several threads are polling a same connection at the same time. The scenario is a worker send back an operation result at the same time another worker wants to read an incoming request. Fix Description: The fix consist in synchonizing polling with c_pdumutex. The thread that sends data (flush_ber) hold c_pdumutex. The thread that reads the data does a non blocking read. It then enforce ioblocktimeout with iteration of poll. The reading thread must hold c_pdumutex during poll to synchronize with the reader thread. The reading thread must poll with a small timeout (CONN_TURBO_TIMEOUT_INTERVAL). In order to not block the thread that send back data, the fix reduces the delay to 0.1s. https://pagure.io/389-ds-base/issue/50389 Reviewed by: Mark Reynolds, Matus Honek, William Brown Platforms tested: F28 Flag Day: no Doc impact: no (cherry picked from commit 2886ba77f664e4734a7ddfe4146f229caca49ce4) --- ldap/servers/slapd/connection.c | 5 ++++- ldap/servers/slapd/daemon.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ldap/servers/slapd/connection.c b/ldap/servers/slapd/connection.c index 188383b97..945602f20 100644 --- a/ldap/servers/slapd/connection.c +++ b/ldap/servers/slapd/connection.c @@ -932,7 +932,8 @@ connection_free_private_buffer(Connection *conn) #define CONN_DONE 3 #define CONN_TIMEDOUT 4 -#define CONN_TURBO_TIMEOUT_INTERVAL 1000 /* milliseconds */ +#define CONN_TURBO_TIMEOUT_INTERVAL 100 /* milliseconds */ +#define CONN_TURBO_TIMEOUT_MAXIMUM 5 /* attempts * interval IE 2000ms with 400 * 5 */ #define CONN_TURBO_CHECK_INTERVAL 5 /* seconds */ #define CONN_TURBO_PERCENTILE 50 /* proportion of threads allowed to be in turbo mode */ #define CONN_TURBO_HYSTERESIS 0 /* avoid flip flopping in and out of turbo mode */ @@ -1207,7 +1208,9 @@ connection_read_operation(Connection *conn, Operation *op, ber_tag_t *tag, int * pr_pd.fd = (PRFileDesc *)conn->c_prfd; pr_pd.in_flags = PR_POLL_READ; pr_pd.out_flags = 0; + PR_Lock(conn->c_pdumutex); ret = PR_Poll(&pr_pd, 1, timeout); + PR_Unlock(conn->c_pdumutex); waits_done++; /* Did we time out ? */ if (0 == ret) { diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c index c77e1f15c..4841a8a5c 100644 --- a/ldap/servers/slapd/daemon.c +++ b/ldap/servers/slapd/daemon.c @@ -1943,6 +1943,8 @@ ns_handle_pr_read_ready(struct ns_job_t *job) * or something goes seriously wrong. Otherwise, return 0. * If -1 is returned, PR_GetError() explains why. * Revision: handle changed to void * to allow 64bit support + * + * Caller (flush_ber) must hold conn->c_pdumutex */ static int slapd_poll(void *handle, int output) -- 2.17.2