Tree - rpms/389-ds-base - CentOS Git server

rpms / 389-ds-base

Blame SOURCES/0083-Ticket-48184-clean-up-and-delete-connections-at-shut.patch

Blob History Raw

		3b7e51	`From 5a5d3dffd0b36edb543fd31fa53d7128dd5161c2 Mon Sep 17 00:00:00 2001`
		3b7e51	`From: Thierry Bordaz <tbordaz@redhat.com>`
		3b7e51	`Date: Fri, 18 May 2018 10:13:46 +0200`
		3b7e51	`Subject: [PATCH] Ticket 48184 - clean up and delete connections at shutdown`
		3b7e51	`(2nd try)`
		3b7e51
		3b7e51	`Bug description:`
		3b7e51	`During shutdown we would not close connections.`
		3b7e51	`In the past this may have just been an annoyance, but now with the way`
		3b7e51	`nunc-stans works, io events can still trigger on open xeisting connectinos`
		3b7e51	`during shutdown.`
		3b7e51
		3b7e51	`Because of NS dynamic it can happen that several jobs wants to work on the`
		3b7e51	`same connection. In such case (a job is already set in c_job) we delay the`
		3b7e51	`new job that will retry.`
		3b7e51	`In addition:`
		3b7e51	`- some call needed c_mutex`
		3b7e51	`- test uninitialized nunc-stans in case of shutdown while startup is not completed`
		3b7e51
		3b7e51	`Fix Description: Close connections during shutdown rather than`
		3b7e51	`leaving them alive.`
		3b7e51
		3b7e51	`https://pagure.io/389-ds-base/issue/48184`
		3b7e51
		3b7e51	`Reviewed by:`
		3b7e51	`Original was Ludwig and Viktor`
		3b7e51	`Second fix reviewed by Mark`
		3b7e51
		3b7e51	`Platforms tested: F26`
		3b7e51
		3b7e51	`Flag Day: no`
		3b7e51
		3b7e51	`Doc impact: no`
		3b7e51
		3b7e51	`(cherry picked from commit e562157ca3e97867d902996cc18fb04f90dc10a8)`
		3b7e51	`---`
		3b7e51	`ldap/servers/slapd/connection.c \| 2 +`
		3b7e51	`ldap/servers/slapd/conntable.c \| 13 ++++`
		3b7e51	`ldap/servers/slapd/daemon.c \| 131 ++++++++++++++++++++++++++++------------`
		3b7e51	`ldap/servers/slapd/fe.h \| 1 +`
		3b7e51	`ldap/servers/slapd/slap.h \| 1 +`
		3b7e51	`5 files changed, 108 insertions(+), 40 deletions(-)`
		3b7e51
		3b7e51	`diff --git a/ldap/servers/slapd/connection.c b/ldap/servers/slapd/connection.c`
		3b7e51	`index b5030f0cb..76e83112b 100644`
		3b7e51	`--- a/ldap/servers/slapd/connection.c`
		3b7e51	`+++ b/ldap/servers/slapd/connection.c`
		3b7e51	`@@ -1716,7 +1716,9 @@ connection_threadmain()`
		3b7e51	`if ((tag != LDAP_REQ_UNBIND) && !thread_turbo_flag && !replication_connection) {`
		3b7e51	`if (!more_data) {`
		3b7e51	`conn->c_flags &= ~CONN_FLAG_MAX_THREADS;`
		3b7e51	`+ PR_EnterMonitor(conn->c_mutex);`
		3b7e51	`connection_make_readable_nolock(conn);`
		3b7e51	`+ PR_ExitMonitor(conn->c_mutex);`
		3b7e51	`/* once the connection is readable, another thread may access conn,`
		3b7e51	`* so need locking from here on */`
		3b7e51	`signal_listner();`
		3b7e51	`diff --git a/ldap/servers/slapd/conntable.c b/ldap/servers/slapd/conntable.c`
		3b7e51	`index 7c57b47cd..f2f763dfa 100644`
		3b7e51	`--- a/ldap/servers/slapd/conntable.c`
		3b7e51	`+++ b/ldap/servers/slapd/conntable.c`
		3b7e51	`@@ -91,6 +91,19 @@ connection_table_abandon_all_operations(Connection_Table *ct)`
		3b7e51	`}`
		3b7e51	`}`
		3b7e51
		3b7e51	`+void`
		3b7e51	`+connection_table_disconnect_all(Connection_Table *ct)`
		3b7e51	`+{`
		3b7e51	`+ for (size_t i = 0; i < ct->size; i++) {`
		3b7e51	`+ if (ct->c[i].c_mutex) {`
		3b7e51	`+ Connection *c = &(ct->c[i]);`
		3b7e51	`+ PR_EnterMonitor(c->c_mutex);`
		3b7e51	`+ disconnect_server_nomutex(c, c->c_connid, -1, SLAPD_DISCONNECT_ABORT, ECANCELED);`
		3b7e51	`+ PR_ExitMonitor(c->c_mutex);`
		3b7e51	`+ }`
		3b7e51	`+ }`
		3b7e51	`+}`
		3b7e51	`+`
		3b7e51	`/* Given a file descriptor for a socket, this function will return`
		3b7e51	`* a slot in the connection table to use.`
		3b7e51	`*`
		3b7e51	`diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c`
		3b7e51	`index fcc461a90..50e67474e 100644`
		3b7e51	`--- a/ldap/servers/slapd/daemon.c`
		3b7e51	`+++ b/ldap/servers/slapd/daemon.c`
		3b7e51	`@@ -1087,12 +1087,18 @@ slapd_daemon(daemon_ports_t ports, ns_thrpool_t tp)`
		3b7e51	`/* we have exited from ns_thrpool_wait. This means we are shutting down! */`
		3b7e51	`/* Please see https://firstyear.fedorapeople.org/nunc-stans/md_docs_job-safety.html */`
		3b7e51	`/* tldr is shutdown needs to run first to allow job_done on an ARMED job */`
		3b7e51	`- for (size_t i = 0; i < listeners; i++) {`
		3b7e51	`- PRStatus shutdown_status = ns_job_done(listener_idxs[i].ns_job);`
		3b7e51	`- if (shutdown_status != PR_SUCCESS) {`
		3b7e51	`- slapi_log_err(SLAPI_LOG_CRIT, "ns_set_shutdown", "Failed to shutdown listener idx %" PRIu64 " !\n", i);`
		3b7e51	`+ for (uint64_t i = 0; i < listeners; i++) {`
		3b7e51	`+ PRStatus shutdown_status;`
		3b7e51	`+`
		3b7e51	`+ if (listener_idxs[i].ns_job) {`
		3b7e51	`+ shutdown_status = ns_job_done(listener_idxs[i].ns_job);`
		3b7e51	`+ if (shutdown_status != PR_SUCCESS) {`
		3b7e51	`+ slapi_log_err(SLAPI_LOG_CRIT, "ns_set_shutdown", "Failed to shutdown listener idx %" PRIu64 " !\n", i);`
		3b7e51	`+ }`
		3b7e51	`+ PR_ASSERT(shutdown_status == PR_SUCCESS);`
		3b7e51	`+ } else {`
		3b7e51	`+ slapi_log_err(SLAPI_LOG_CRIT, "slapd_daemon", "Listeners uninitialized. Possibly the server was shutdown while starting\n");`
		3b7e51	`}`
		3b7e51	`- PR_ASSERT(shutdown_status == PR_SUCCESS);`
		3b7e51	`listener_idxs[i].ns_job = NULL;`
		3b7e51	`}`
		3b7e51	`} else {`
		3b7e51	`@@ -1176,6 +1182,32 @@ slapd_daemon(daemon_ports_t ports, ns_thrpool_t tp)`
		3b7e51	`housekeeping_stop(); /* Run this after op_thread_cleanup() logged sth */`
		3b7e51	`disk_monitoring_stop();`
		3b7e51
		3b7e51	`+ /*`
		3b7e51	`+ * Now that they are abandonded, we need to mark them as done.`
		3b7e51	`+ * In NS while it's safe to allow excess jobs to be cleaned by`
		3b7e51	`+ * by the walk and ns_job_done of remaining queued events, the`
		3b7e51	`+ * issue is that if we allow something to live past this point`
		3b7e51	`+ * the CT is freed from underneath, and bad things happen (tm).`
		3b7e51	`+ *`
		3b7e51	`+ * NOTE: We do this after we stop psearch, because there could`
		3b7e51	`+ * be a race between flagging the psearch done, and users still`
		3b7e51	`+ * try to send on the connection. Similar with op_threads.`
		3b7e51	`+ */`
		3b7e51	`+ connection_table_disconnect_all(the_connection_table);`
		3b7e51	`+`
		3b7e51	`+ /*`
		3b7e51	`+ * WARNING: Normally we should close the tp in main`
		3b7e51	`+ * but because of issues in the current connection design`
		3b7e51	`+ * we need to close it here to guarantee events won't fire!`
		3b7e51	`+ *`
		3b7e51	`+ * All the connection close jobs "should" complete before`
		3b7e51	`+ * shutdown at least.`
		3b7e51	`+ */`
		3b7e51	`+ if (enable_nunc_stans) {`
		3b7e51	`+ ns_thrpool_shutdown(tp);`
		3b7e51	`+ ns_thrpool_wait(tp);`
		3b7e51	`+ }`
		3b7e51	`+`
		3b7e51	`threads = g_get_active_threadcnt();`
		3b7e51	`if (threads > 0) {`
		3b7e51	`slapi_log_err(SLAPI_LOG_INFO, "slapd_daemon",`
		3b7e51	`@@ -1628,25 +1660,18 @@ ns_handle_closure(struct ns_job_t *job)`
		3b7e51	`Connection c = (Connection )ns_job_get_data(job);`
		3b7e51	`int do_yield = 0;`
		3b7e51
		3b7e51	`-/* this function must be called from the event loop thread */`
		3b7e51	`-#ifdef DEBUG`
		3b7e51	`- PR_ASSERT(0 == NS_JOB_IS_THREAD(ns_job_get_type(job)));`
		3b7e51	`-#else`
		3b7e51	`- /* This doesn't actually confirm it's in the event loop thread, but it's a start */`
		3b7e51	`- if (NS_JOB_IS_THREAD(ns_job_get_type(job)) != 0) {`
		3b7e51	`- slapi_log_err(SLAPI_LOG_ERR, "ns_handle_closure", "Attempt to close outside of event loop thread %" PRIu64 " for fd=%d\n",`
		3b7e51	`- c->c_connid, c->c_sd);`
		3b7e51	`- return;`
		3b7e51	`- }`
		3b7e51	`-#endif`
		3b7e51	`-`
		3b7e51	`PR_EnterMonitor(c->c_mutex);`
		3b7e51	`+ /* Assert we really have the right job state. */`
		3b7e51	`+ PR_ASSERT(job == c->c_job);`
		3b7e51
		3b7e51	`connection_release_nolock_ext(c, 1); /* release ref acquired for event framework */`
		3b7e51	`PR_ASSERT(c->c_ns_close_jobs == 1); /* should be exactly 1 active close job - this one */`
		3b7e51	`c->c_ns_close_jobs--; /* this job is processing closure */`
		3b7e51	`+ /* Because handle closure will add a new job, we need to detach our current one. */`
		3b7e51	`+ c->c_job = NULL;`
		3b7e51	`do_yield = ns_handle_closure_nomutex(c);`
		3b7e51	`PR_ExitMonitor(c->c_mutex);`
		3b7e51	`+ /* Remove this task now. */`
		3b7e51	`ns_job_done(job);`
		3b7e51	`if (do_yield) {`
		3b7e51	`/* closure not done - another reference still outstanding */`
		3b7e51	`@@ -1659,14 +1684,25 @@ ns_handle_closure(struct ns_job_t *job)`
		3b7e51	`/**`
		3b7e51	`* Schedule more I/O for this connection, or make sure that it`
		3b7e51	`* is closed in the event loop.`
		3b7e51	`+ * caller must hold c_mutex`
		3b7e51	`+ * It returns`
		3b7e51	`+ * 0 on success`
		3b7e51	`+ * 1 on need to retry`
		3b7e51	`*/`
		3b7e51	`-void`
		3b7e51	`-ns_connection_post_io_or_closing(Connection *conn)`
		3b7e51	`+static int`
		3b7e51	`+ns_connection_post_io_or_closing_try(Connection *conn)`
		3b7e51	`{`
		3b7e51	`struct timeval tv;`
		3b7e51
		3b7e51	`if (!enable_nunc_stans) {`
		3b7e51	`- return;`
		3b7e51	`+ return 0;`
		3b7e51	`+ }`
		3b7e51	`+`
		3b7e51	`+ /*`
		3b7e51	`+ * Cancel any existing ns jobs we have registered.`
		3b7e51	`+ */`
		3b7e51	`+ if (conn->c_job != NULL) {`
		3b7e51	`+ return 1;`
		3b7e51	`}`
		3b7e51
		3b7e51	`if (CONN_NEEDS_CLOSING(conn)) {`
		3b7e51	`@@ -1676,15 +1712,12 @@ ns_connection_post_io_or_closing(Connection *conn)`
		3b7e51	`slapi_log_err(SLAPI_LOG_CONNS, "ns_connection_post_io_or_closing", "Already a close "`
		3b7e51	`"job in progress on conn %" PRIu64 " for fd=%d\n",`
		3b7e51	`conn->c_connid, conn->c_sd);`
		3b7e51	`- return;`
		3b7e51	`+ return 0;`
		3b7e51	`} else {`
		3b7e51	`- /* just make sure we schedule the event to be closed in a timely manner */`
		3b7e51	`- tv.tv_sec = 0;`
		3b7e51	`- tv.tv_usec = slapd_wakeup_timer * 1000;`
		3b7e51	`conn->c_ns_close_jobs++; /* now 1 active closure job */`
		3b7e51	`connection_acquire_nolock_ext(conn, 1 /* allow acquire even when closing /); / event framework now has a reference */`
		3b7e51	`- ns_result_t job_result = ns_add_timeout_job(conn->c_tp, &tv, NS_JOB_TIMER,`
		3b7e51	`- ns_handle_closure, conn, NULL);`
		3b7e51	`+ /* Close the job asynchronously. Why? */`
		3b7e51	`+ ns_result_t job_result = ns_add_job(conn->c_tp, NS_JOB_TIMER, ns_handle_closure, conn, &(conn->c_job));`
		3b7e51	`if (job_result != NS_SUCCESS) {`
		3b7e51	`if (job_result == NS_SHUTDOWN) {`
		3b7e51	`slapi_log_err(SLAPI_LOG_INFO, "ns_connection_post_io_or_closing", "post closure job "`
		3b7e51	`@@ -1723,12 +1756,12 @@ ns_connection_post_io_or_closing(Connection *conn)`
		3b7e51	`* The error occurs when we get a connection in a closing state.`
		3b7e51	`* For now we return, but there is probably a better way to handle the error case.`
		3b7e51	`*/`
		3b7e51	`- return;`
		3b7e51	`+ return 0;`
		3b7e51	`}`
		3b7e51	`#endif`
		3b7e51	`ns_result_t job_result = ns_add_io_timeout_job(conn->c_tp, conn->c_prfd, &tv,`
		3b7e51	`NS_JOB_READ \| NS_JOB_PRESERVE_FD,`
		3b7e51	`- ns_handle_pr_read_ready, conn, NULL);`
		3b7e51	`+ ns_handle_pr_read_ready, conn, &(conn->c_job));`
		3b7e51	`if (job_result != NS_SUCCESS) {`
		3b7e51	`if (job_result == NS_SHUTDOWN) {`
		3b7e51	`slapi_log_err(SLAPI_LOG_INFO, "ns_connection_post_io_or_closing", "post I/O job for "`
		3b7e51	`@@ -1745,6 +1778,28 @@ ns_connection_post_io_or_closing(Connection *conn)`
		3b7e51	`conn->c_connid, conn->c_sd);`
		3b7e51	`}`
		3b7e51	`}`
		3b7e51	`+ return 0;`
		3b7e51	`+}`
		3b7e51	`+void`
		3b7e51	`+ns_connection_post_io_or_closing(Connection *conn)`
		3b7e51	`+{`
		3b7e51	`+ while (ns_connection_post_io_or_closing_try(conn)) {`
		3b7e51	`+ /* we should retry later */`
		3b7e51	`+`
		3b7e51	`+ /* We are not suppose to work immediately on the connection that is taken by`
		3b7e51	`+ * another job`
		3b7e51	`+ * release the lock and give some time`
		3b7e51	`+ */`
		3b7e51	`+`
		3b7e51	`+ if (CONN_NEEDS_CLOSING(conn) && conn->c_ns_close_jobs) {`
		3b7e51	`+ return;`
		3b7e51	`+ } else {`
		3b7e51	`+ PR_ExitMonitor(conn->c_mutex);`
		3b7e51	`+ DS_Sleep(PR_MillisecondsToInterval(100));`
		3b7e51	`+`
		3b7e51	`+ PR_EnterMonitor(conn->c_mutex);`
		3b7e51	`+ }`
		3b7e51	`+ }`
		3b7e51	`}`
		3b7e51
		3b7e51	`/* This function must be called without the thread flag, in the`
		3b7e51	`@@ -1757,19 +1812,12 @@ ns_handle_pr_read_ready(struct ns_job_t *job)`
		3b7e51	`int maxthreads = config_get_maxthreadsperconn();`
		3b7e51	`Connection c = (Connection )ns_job_get_data(job);`
		3b7e51
		3b7e51	`-/* this function must be called from the event loop thread */`
		3b7e51	`-#ifdef DEBUG`
		3b7e51	`- PR_ASSERT(0 == NS_JOB_IS_THREAD(ns_job_get_type(job)));`
		3b7e51	`-#else`
		3b7e51	`- /* This doesn't actually confirm it's in the event loop thread, but it's a start */`
		3b7e51	`- if (NS_JOB_IS_THREAD(ns_job_get_type(job)) != 0) {`
		3b7e51	`- slapi_log_err(SLAPI_LOG_ERR, "ns_handle_pr_read_ready", "Attempt to handle read ready outside of event loop thread %" PRIu64 " for fd=%d\n",`
		3b7e51	`- c->c_connid, c->c_sd);`
		3b7e51	`- return;`
		3b7e51	`- }`
		3b7e51	`-#endif`
		3b7e51	`-`
		3b7e51	`PR_EnterMonitor(c->c_mutex);`
		3b7e51	`+ /* Assert we really have the right job state. */`
		3b7e51	`+ PR_ASSERT(job == c->c_job);`
		3b7e51	`+`
		3b7e51	`+ /* On all code paths we remove the job, so set it null now */`
		3b7e51	`+ c->c_job = NULL;`
		3b7e51
		3b7e51	`slapi_log_err(SLAPI_LOG_CONNS, "ns_handle_pr_read_ready", "activity on conn %" PRIu64 " for fd=%d\n",`
		3b7e51	`c->c_connid, c->c_sd);`
		3b7e51	`@@ -1829,6 +1877,7 @@ ns_handle_pr_read_ready(struct ns_job_t *job)`
		3b7e51	`slapi_log_err(SLAPI_LOG_CONNS, "ns_handle_pr_read_ready", "queued conn %" PRIu64 " for fd=%d\n",`
		3b7e51	`c->c_connid, c->c_sd);`
		3b7e51	`}`
		3b7e51	`+ /* Since we call done on the job, we need to remove it here. */`
		3b7e51	`PR_ExitMonitor(c->c_mutex);`
		3b7e51	`ns_job_done(job);`
		3b7e51	`return;`
		3b7e51	`@@ -2451,7 +2500,9 @@ ns_handle_new_connection(struct ns_job_t *job)`
		3b7e51	`* that poll() was avoided, even at the expense of putting this new fd back`
		3b7e51	`* in nunc-stans to poll for read ready.`
		3b7e51	`*/`
		3b7e51	`+ PR_EnterMonitor(c->c_mutex);`
		3b7e51	`ns_connection_post_io_or_closing(c);`
		3b7e51	`+ PR_ExitMonitor(c->c_mutex);`
		3b7e51	`return;`
		3b7e51	`}`
		3b7e51
		3b7e51	`diff --git a/ldap/servers/slapd/fe.h b/ldap/servers/slapd/fe.h`
		3b7e51	`index 4d25a9fb8..f47bb6145 100644`
		3b7e51	`--- a/ldap/servers/slapd/fe.h`
		3b7e51	`+++ b/ldap/servers/slapd/fe.h`
		3b7e51	`@@ -100,6 +100,7 @@ extern Connection_Table the_connection_table; / JCM - Exported from globals.c`
		3b7e51	`Connection_Table *connection_table_new(int table_size);`
		3b7e51	`void connection_table_free(Connection_Table *ct);`
		3b7e51	`void connection_table_abandon_all_operations(Connection_Table *ct);`
		3b7e51	`+void connection_table_disconnect_all(Connection_Table *ct);`
		3b7e51	`Connection connection_table_get_connection(Connection_Table ct, int sd);`
		3b7e51	`int connection_table_move_connection_out_of_active_list(Connection_Table ct, Connection c);`
		3b7e51	`void connection_table_move_connection_on_to_active_list(Connection_Table ct, Connection c);`
		3b7e51	`diff --git a/ldap/servers/slapd/slap.h b/ldap/servers/slapd/slap.h`
		3b7e51	`index 03355f5fe..de4ac35c0 100644`
		3b7e51	`--- a/ldap/servers/slapd/slap.h`
		3b7e51	`+++ b/ldap/servers/slapd/slap.h`
		3b7e51	`@@ -1650,6 +1650,7 @@ typedef struct conn`
		3b7e51	`void c_io_layer_cb_data; / callback data */`
		3b7e51	`struct connection_table c_ct; / connection table that this connection belongs to */`
		3b7e51	`ns_thrpool_t c_tp; / thread pool for this connection */`
		3b7e51	`+ struct ns_job_t c_job; / If it exists, the current ns_job_t */`
		3b7e51	`int c_ns_close_jobs; /* number of current close jobs */`
		3b7e51	`char c_ipaddr; / ip address str - used by monitor */`
		3b7e51	`} Connection;`
		3b7e51	`--`
		3b7e51	`2.13.6`
		3b7e51

rpms / 389-ds-base

Source Code

Blame SOURCES/0083-Ticket-48184-clean-up-and-delete-connections-at-shut.patch