Blob Blame Raw
From f993a9b5a1ac95728baae201543cad5993a28da1 Mon Sep 17 00:00:00 2001
From: Ludwig Krispenz <lkrispen@redhat.com>
Date: Mon, 1 Aug 2016 10:47:31 +0200
Subject: [PATCH 22/29] Ticket 48882 - server can hang in connection list
 processing

Bug Description: if a thread holding the connection monitor
		 is stuck in polling and the client doesn't
		 respond, the main thread can be blocked on
		 this connection when iterating the connection
		 table.

Fix Description: Implement a test and enter function for the connection
		 monitor, so the main thread will never wait for a
		 connection monitor already owned by an other thread

https://fedorahosted.org/389/ticket/48882

Reviewed by: Noriko, Thanks

(cherry picked from commit 7110db91e75f392f1c83643d9aa88895992d9c01)
---
 ldap/servers/slapd/daemon.c | 69 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c
index 81a54cf..23c30c3 100644
--- a/ldap/servers/slapd/daemon.c
+++ b/ldap/servers/slapd/daemon.c
@@ -164,6 +164,67 @@ static void unfurl_banners(Connection_Table *ct,daemon_ports_t *ports, PRFileDes
 static int write_pid_file();
 static int init_shutdown_detect();
 
+/*
+ * NSPR has different implementations for PRMonitor, depending
+ * on the availble threading model
+ * The PR_TestAndEnterMonitor is not available for pthreads
+ * so this is a implementation based on the code in
+ * prmon.c adapted to resemble the implementation in ptsynch.c
+ *
+ * The function needs access to the elements of the PRMonitor struct.
+ * Therfor the pthread variant of PRMonitor is copied here.
+ */
+typedef struct MY_PRMonitor {
+    const char* name;
+    pthread_mutex_t lock;
+    pthread_t owner;
+    pthread_cond_t entryCV;
+    pthread_cond_t waitCV;
+    PRInt32 refCount;
+    PRUint32 entryCount;
+    PRIntn notifyTimes;
+} MY_PRMonitor;
+
+static PRBool MY_TestAndEnterMonitor(MY_PRMonitor *mon)
+{
+    pthread_t self = pthread_self();
+    PRStatus rv;
+    PRBool rc = PR_FALSE;
+
+    PR_ASSERT(mon != NULL);
+    rv = pthread_mutex_lock(&mon->lock);
+    if (rv != 0) {
+	slapi_log_error(SLAPI_LOG_FATAL ,"TestAndEnterMonitor",
+                        "Failed to acquire monitor mutex, error (%d)\n", rv);
+	return rc;
+    }
+    if (mon->entryCount != 0) {
+        if (pthread_equal(mon->owner, self))
+            goto done;
+        rv = pthread_mutex_unlock(&mon->lock);
+	if (rv != 0) {
+	    slapi_log_error(SLAPI_LOG_FATAL ,"TestAndEnterMonitor",
+                        "Failed to release monitor mutex, error (%d)\n", rv);
+	}
+        return PR_FALSE;
+    }
+    /* and now I have the monitor */
+    PR_ASSERT(mon->notifyTimes == 0);
+    PR_ASSERT((mon->owner) == 0);
+    mon->owner = self;
+
+done:
+    mon->entryCount += 1;
+    rv = pthread_mutex_unlock(&mon->lock);
+    if (rv == PR_SUCCESS) {
+	rc = PR_TRUE;
+    } else {
+	slapi_log_error(SLAPI_LOG_FATAL ,"TestAndEnterMonitor",
+                        "Failed to release monitor mutex, error (%d)\n", rv);
+	rc = PR_FALSE;
+    }
+    return rc;
+}
 /* Globals which are used to store the sockets between
  * calls to daemon_pre_setuid_init() and the daemon thread
  * creation. */
@@ -1552,7 +1613,13 @@ setup_pr_read_pds(Connection_Table *ct, PRFileDesc **n_tcps, PRFileDesc **s_tcps
 		}
 		else
 		{
-			PR_EnterMonitor(c->c_mutex);
+			/* we try to acquire the connection mutex, if it is already
+			 * acquired by another thread, don't wait
+			 */
+			if (PR_FALSE == MY_TestAndEnterMonitor((MY_PRMonitor *)c->c_mutex)) {
+				c = next;
+				continue;
+			}
 			if (c->c_flags & CONN_FLAG_CLOSING)
 			{
 				/* A worker thread has marked that this connection
-- 
2.4.11