Blame SOURCES/0030-Ticket-50389-ns-slapd-craches-while-two-threads-are-.patch

26521d
From 6b7f87a557170164518d7c3b8e408304f2a9c1f4 Mon Sep 17 00:00:00 2001
26521d
From: Thierry Bordaz <tbordaz@redhat.com>
26521d
Date: Fri, 17 May 2019 14:31:45 +0200
26521d
Subject: [PATCH] Ticket 50389 - ns-slapd craches while two threads are polling
26521d
 the same connection
26521d
26521d
Bug Description:
26521d
	nspr IO is not multi-threaded safe.
26521d
	389-ds should not be in a situation where several threads are polling
26521d
	a same connection at the same time.
26521d
	The scenario is a worker send back an operation result at the same time
26521d
	another worker wants to read an incoming request.
26521d
26521d
Fix Description:
26521d
	The fix consist in synchonizing polling with c_pdumutex.
26521d
26521d
	The thread that sends data (flush_ber) hold c_pdumutex.
26521d
26521d
	The thread that reads the data does a non blocking read. It then
26521d
	enforce ioblocktimeout with iteration of poll.
26521d
	The reading thread must hold c_pdumutex during poll to synchronize
26521d
	with the reader thread.
26521d
	The reading thread must poll with a small timeout
26521d
	(CONN_TURBO_TIMEOUT_INTERVAL). In order to not block
26521d
	the thread that send back data, the fix reduces the delay to 0.1s.
26521d
26521d
https://pagure.io/389-ds-base/issue/50389
26521d
26521d
Reviewed by: Mark Reynolds, Matus Honek, William Brown
26521d
26521d
Platforms tested: F28
26521d
26521d
Flag Day: no
26521d
26521d
Doc impact: no
26521d
26521d
(cherry picked from commit 2886ba77f664e4734a7ddfe4146f229caca49ce4)
26521d
---
26521d
 ldap/servers/slapd/connection.c | 5 ++++-
26521d
 ldap/servers/slapd/daemon.c     | 2 ++
26521d
 2 files changed, 6 insertions(+), 1 deletion(-)
26521d
26521d
diff --git a/ldap/servers/slapd/connection.c b/ldap/servers/slapd/connection.c
26521d
index 188383b97..945602f20 100644
26521d
--- a/ldap/servers/slapd/connection.c
26521d
+++ b/ldap/servers/slapd/connection.c
26521d
@@ -932,7 +932,8 @@ connection_free_private_buffer(Connection *conn)
26521d
 #define CONN_DONE 3
26521d
 #define CONN_TIMEDOUT 4
26521d
 
26521d
-#define CONN_TURBO_TIMEOUT_INTERVAL 1000 /* milliseconds */
26521d
+#define CONN_TURBO_TIMEOUT_INTERVAL 100 /* milliseconds */
26521d
+#define CONN_TURBO_TIMEOUT_MAXIMUM 5 /* attempts * interval IE 2000ms with 400 * 5 */
26521d
 #define CONN_TURBO_CHECK_INTERVAL 5      /* seconds */
26521d
 #define CONN_TURBO_PERCENTILE 50         /* proportion of threads allowed to be in turbo mode */
26521d
 #define CONN_TURBO_HYSTERESIS 0          /* avoid flip flopping in and out of turbo mode */
26521d
@@ -1207,7 +1208,9 @@ connection_read_operation(Connection *conn, Operation *op, ber_tag_t *tag, int *
26521d
                 pr_pd.fd = (PRFileDesc *)conn->c_prfd;
26521d
                 pr_pd.in_flags = PR_POLL_READ;
26521d
                 pr_pd.out_flags = 0;
26521d
+                PR_Lock(conn->c_pdumutex);
26521d
                 ret = PR_Poll(&pr_pd, 1, timeout);
26521d
+                PR_Unlock(conn->c_pdumutex);
26521d
                 waits_done++;
26521d
                 /* Did we time out ? */
26521d
                 if (0 == ret) {
26521d
diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c
26521d
index c77e1f15c..4841a8a5c 100644
26521d
--- a/ldap/servers/slapd/daemon.c
26521d
+++ b/ldap/servers/slapd/daemon.c
26521d
@@ -1943,6 +1943,8 @@ ns_handle_pr_read_ready(struct ns_job_t *job)
26521d
  * or something goes seriously wrong.  Otherwise, return 0.
26521d
  * If -1 is returned, PR_GetError() explains why.
26521d
  * Revision: handle changed to void * to allow 64bit support
26521d
+ *
26521d
+ * Caller (flush_ber) must hold conn->c_pdumutex
26521d
  */
26521d
 static int
26521d
 slapd_poll(void *handle, int output)
26521d
-- 
26521d
2.17.2
26521d