Blob Blame Raw
From 240cfa58c62571b92640a385cfcce6d858cb00dc Mon Sep 17 00:00:00 2001
From: Thierry Bordaz <tbordaz@redhat.com>
Date: Wed, 30 May 2018 15:48:11 +0200
Subject: [PATCH] Ticket 48184 - clean up and delete connections at shutdown
 (3rd)

Bug description:
        During shutdown we would not close connections.
        In the past this may have just been an annoyance, but now with the way
        nunc-stans works, io events can still trigger on open xeisting connectinos
        during shutdown.

Fix Description:
        Because of NS dynamic it can happen that several jobs wants to work on the
        same connection. In such case (a job is already set in c_job) we delay the
        new job that will retry.
        In addition:
            - some call needed c_mutex
            - test uninitialized nunc-stans in case of shutdown while startup is not completed

	If it is not possible to schedule immediately a job it is sometime useless to wait:
		- if the connection is already freed, just cancel the scheduled job
		  and do not register a new one
		- If we are in middle of a shutdown we do not know if the
		  scheduled job is ns_handle_closure, so cancel the scheduled
		  job and schedule ns_handle_closure.

https://pagure.io/389-ds-base/issue/48184

Reviewed by:
            Original fix reviewed by Ludwig and Viktor
            Second   fix reviewed by Mark
	    Third    fix reviewed by Mark

Platforms tested: F26

Flag Day: no

Doc impact: no
---
 ldap/servers/slapd/connection.c | 10 +++--
 ldap/servers/slapd/conntable.c  |  2 +-
 ldap/servers/slapd/daemon.c     | 67 +++++++++++++++++++++++++--------
 ldap/servers/slapd/proto-slap.h |  2 +-
 4 files changed, 60 insertions(+), 21 deletions(-)

diff --git a/ldap/servers/slapd/connection.c b/ldap/servers/slapd/connection.c
index 76e83112b..c54e7c26c 100644
--- a/ldap/servers/slapd/connection.c
+++ b/ldap/servers/slapd/connection.c
@@ -741,14 +741,18 @@ connection_acquire_nolock(Connection *conn)
 
 /* returns non-0 if connection can be reused and 0 otherwise */
 int
-connection_is_free(Connection *conn)
+connection_is_free(Connection *conn, int use_lock)
 {
     int rc;
 
-    PR_EnterMonitor(conn->c_mutex);
+    if (use_lock) {
+        PR_EnterMonitor(conn->c_mutex);
+    }
     rc = conn->c_sd == SLAPD_INVALID_SOCKET && conn->c_refcnt == 0 &&
          !(conn->c_flags & CONN_FLAG_CLOSING);
-    PR_ExitMonitor(conn->c_mutex);
+    if (use_lock) {
+        PR_ExitMonitor(conn->c_mutex);
+    }
 
     return rc;
 }
diff --git a/ldap/servers/slapd/conntable.c b/ldap/servers/slapd/conntable.c
index f2f763dfa..114871d17 100644
--- a/ldap/servers/slapd/conntable.c
+++ b/ldap/servers/slapd/conntable.c
@@ -129,7 +129,7 @@ connection_table_get_connection(Connection_Table *ct, int sd)
             break;
         }
 
-        if (connection_is_free(&(ct->c[index]))) {
+        if (connection_is_free(&(ct->c[index]), 1 /*use lock */)) {
             break;
         }
     }
diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c
index 50e67474e..35cfe7de0 100644
--- a/ldap/servers/slapd/daemon.c
+++ b/ldap/servers/slapd/daemon.c
@@ -1699,7 +1699,8 @@ ns_connection_post_io_or_closing_try(Connection *conn)
     }
 
     /*
-     * Cancel any existing ns jobs we have registered.
+     * A job was already scheduled.
+     * Let it be dispatched first
      */
     if (conn->c_job != NULL) {
         return 1;
@@ -1780,25 +1781,59 @@ ns_connection_post_io_or_closing_try(Connection *conn)
     }
     return 0;
 }
+
+/*
+ * Tries to schedule I/O for this connection
+ * If the connection is already busy with a scheduled I/O
+ * it can wait until scheduled I/O is dispatched
+ *
+ * caller must hold c_mutex
+ */
 void
 ns_connection_post_io_or_closing(Connection *conn)
 {
     while (ns_connection_post_io_or_closing_try(conn)) {
-	/* we should retry later */
-	
-	/* We are not suppose to work immediately on the connection that is taken by
-	 * another job
-	 * release the lock and give some time
-	 */
-	
-	if (CONN_NEEDS_CLOSING(conn) && conn->c_ns_close_jobs) {
-	    return;
-	} else {
-	    PR_ExitMonitor(conn->c_mutex);
-	    DS_Sleep(PR_MillisecondsToInterval(100));
-
-	    PR_EnterMonitor(conn->c_mutex);
-	}
+        /* Here a job is currently scheduled (c->job is set) and not yet dispatched
+         * Job can be either:
+         *  - ns_handle_closure
+         *  - ns_handle_pr_read_ready
+         */
+
+        if (connection_is_free(conn, 0)) {
+            PRStatus shutdown_status;
+
+            /* The connection being freed,
+             * It means that ns_handle_closure already completed and the connection
+             * is no longer on the active list.
+             * The scheduled job is useless and scheduling a new one as well
+             */
+            shutdown_status = ns_job_done(conn->c_job);
+            if (shutdown_status != PR_SUCCESS) {
+                slapi_log_err(SLAPI_LOG_CRIT, "ns_connection_post_io_or_closing", "Failed cancel a job on a freed connection %d !\n", conn->c_sd);
+            }
+            conn->c_job = NULL;
+            return;
+        }
+        if (g_get_shutdown() && CONN_NEEDS_CLOSING(conn)) {
+            PRStatus shutdown_status;
+
+            /* This is shutting down cancel any scheduled job to register ns_handle_closure
+             */
+            shutdown_status = ns_job_done(conn->c_job);
+            if (shutdown_status != PR_SUCCESS) {
+                slapi_log_err(SLAPI_LOG_CRIT, "ns_connection_post_io_or_closing", "Failed to cancel a job during shutdown %d !\n", conn->c_sd);
+            }
+            conn->c_job = NULL;
+            continue;
+        }
+
+        /* We are not suppose to work immediately on the connection that is taken by
+         * another job
+         * release the lock and give some time
+         */
+        PR_ExitMonitor(conn->c_mutex);
+        DS_Sleep(PR_MillisecondsToInterval(100));
+        PR_EnterMonitor(conn->c_mutex);
     }
 }
 
diff --git a/ldap/servers/slapd/proto-slap.h b/ldap/servers/slapd/proto-slap.h
index b13334ad1..f54bc6bc5 100644
--- a/ldap/servers/slapd/proto-slap.h
+++ b/ldap/servers/slapd/proto-slap.h
@@ -1431,7 +1431,7 @@ int connection_acquire_nolock(Connection *conn);
 int connection_acquire_nolock_ext(Connection *conn, int allow_when_closing);
 int connection_release_nolock(Connection *conn);
 int connection_release_nolock_ext(Connection *conn, int release_only);
-int connection_is_free(Connection *conn);
+int connection_is_free(Connection *conn, int user_lock);
 int connection_is_active_nolock(Connection *conn);
 #if defined(USE_OPENLDAP)
 ber_slen_t openldap_read_function(Sockbuf_IO_Desc *sbiod, void *buf, ber_len_t len);
-- 
2.17.0