diff --git a/SOURCES/0091-Ticket-49765-Async-operations-can-hang-when-the-serv.patch b/SOURCES/0091-Ticket-49765-Async-operations-can-hang-when-the-serv.patch new file mode 100644 index 0000000..a5387aa --- /dev/null +++ b/SOURCES/0091-Ticket-49765-Async-operations-can-hang-when-the-serv.patch @@ -0,0 +1,278 @@ +From 70e6c68196d381d05d35414c138894b54939d236 Mon Sep 17 00:00:00 2001 +From: Thierry Bordaz +Date: Thu, 7 Jun 2018 16:19:34 +0200 +Subject: [PATCH] Ticket 49765 - Async operations can hang when the server is + running nunc-stans + +Bug Description: + The fix https://pagure.io/389-ds-base/issue/48184 allowed to schedule + several NS handlers where each handler waits for the dispatch of the + previous handler before being schedule. + + In case the current handler is never called (connection inactivity) + those that are waiting can wait indefinitely (until timeout or connection + closure). But those that are waiting delay the processing of the operation + when the scheduling is called by connection_threadmain. + + The some operations can appear hanging. + This scenario happens with async operations + +Fix Description: + Instead of waiting for the completion of the scheduled handler, + evaluates if the scheduled handler needs to be cleared (ns_job_done) + or the waiting handler to be canceled. + +https://pagure.io/389-ds-base/issue/49765 + +Reviewed by: Mark Reynolds (thanks Mark !) + +Platforms tested: F26 + +Flag Day: no + +Doc impact: no +--- + ldap/servers/slapd/daemon.c | 142 +++++++++++++++------------- + src/nunc-stans/include/nunc-stans.h | 3 + + src/nunc-stans/ns/ns_thrpool.c | 5 + + 3 files changed, 84 insertions(+), 66 deletions(-) + +diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c +index 35cfe7de0..0a723c4a8 100644 +--- a/ldap/servers/slapd/daemon.c ++++ b/ldap/servers/slapd/daemon.c +@@ -152,12 +152,21 @@ accept_and_configure(int s __attribute__((unused)), PRFileDesc *pr_acceptfd, PRN + */ + static int handle_new_connection(Connection_Table *ct, int tcps, PRFileDesc *pr_acceptfd, int secure, int local, Connection **newconn); + static void ns_handle_new_connection(struct ns_job_t *job); ++static void ns_handle_closure(struct ns_job_t *job); + static void handle_pr_read_ready(Connection_Table *ct, PRIntn num_poll); + static int clear_signal(struct POLL_STRUCT *fds); + static void unfurl_banners(Connection_Table *ct, daemon_ports_t *ports, PRFileDesc **n_tcps, PRFileDesc **s_tcps, PRFileDesc **i_unix); + static int write_pid_file(void); + static int init_shutdown_detect(void); + ++#define NS_HANDLER_NEW_CONNECTION 0 ++#define NS_HANDLER_READ_CONNECTION 1 ++#define NS_HANDLER_CLOSE_CONNECTION 2 ++static ns_job_func_t ns_handlers[] = { ++ ns_handle_new_connection, ++ ns_handle_pr_read_ready, ++ ns_handle_closure ++}; + /* + * NSPR has different implementations for PRMonitor, depending + * on the availble threading model +@@ -1058,7 +1067,7 @@ slapd_daemon(daemon_ports_t *ports, ns_thrpool_t *tp) + for (size_t ii = 0; ii < listeners; ++ii) { + listener_idxs[ii].ct = the_connection_table; /* to pass to handle_new_connection */ + ns_result_t result = ns_add_io_job(tp, listener_idxs[ii].listenfd, NS_JOB_ACCEPT | NS_JOB_PERSIST | NS_JOB_PRESERVE_FD, +- ns_handle_new_connection, &listener_idxs[ii], &(listener_idxs[ii].ns_job)); ++ ns_handlers[NS_HANDLER_NEW_CONNECTION], &listener_idxs[ii], &(listener_idxs[ii].ns_job)); + if (result != NS_SUCCESS) { + slapi_log_err(SLAPI_LOG_CRIT, "slapd_daemon", "ns_add_io_job failed to create add acceptor %d\n", result); + } +@@ -1684,28 +1693,84 @@ ns_handle_closure(struct ns_job_t *job) + /** + * Schedule more I/O for this connection, or make sure that it + * is closed in the event loop. ++ * + * caller must hold c_mutex +- * It returns +- * 0 on success +- * 1 on need to retry + */ +-static int +-ns_connection_post_io_or_closing_try(Connection *conn) ++void ++ns_connection_post_io_or_closing(Connection *conn) + { + struct timeval tv; + + if (!enable_nunc_stans) { +- return 0; ++ return; + } + + /* + * A job was already scheduled. +- * Let it be dispatched first ++ * Check if it is the appropriate one + */ + if (conn->c_job != NULL) { +- return 1; ++ if (connection_is_free(conn, 0)) { ++ PRStatus shutdown_status; ++ ++ /* The connection being freed, ++ * It means that ns_handle_closure already completed and the connection ++ * is no longer on the active list. ++ * The scheduled job is useless and scheduling a new one as well ++ */ ++ shutdown_status = ns_job_done(conn->c_job); ++ if (shutdown_status != PR_SUCCESS) { ++ slapi_log_err(SLAPI_LOG_CRIT, "ns_connection_post_io_or_closing", "Failed cancel a job on a freed connection %d !\n", conn->c_sd); ++ } ++ conn->c_job = NULL; ++ return; ++ } ++ if (CONN_NEEDS_CLOSING(conn)) { ++ if (ns_job_is_func(conn->c_job, ns_handlers[NS_HANDLER_CLOSE_CONNECTION])) { ++ /* Due to the closing state we would schedule a ns_handle_closure ++ * but one is already registered. ++ * Just return; ++ */ ++ slapi_log_err(SLAPI_LOG_CONNS, "ns_connection_post_io_or_closing", "Already ns_handle_closure " ++ "job in progress on conn %" PRIu64 " for fd=%d\n", ++ conn->c_connid, conn->c_sd); ++ return; ++ } else { ++ /* Due to the closing state we would schedule a ns_handle_closure ++ * but a different handler is registered. Stop it and schedule (below) ns_handle_closure ++ */ ++ ns_job_done(conn->c_job); ++ conn->c_job = NULL; ++ } ++ } else { ++ /* Here the connection is still active => ignore the call and return */ ++ if (ns_job_is_func(conn->c_job, ns_handlers[NS_HANDLER_READ_CONNECTION])) { ++ /* Connection is still active and a read_ready is already scheduled ++ * Likely a consequence of async operations ++ * Just let the current read_ready do its job ++ */ ++ slapi_log_err(SLAPI_LOG_CONNS, "ns_connection_post_io_or_closing", "Already ns_handle_pr_read_ready " ++ "job in progress on conn %" PRIu64 " for fd=%d\n", ++ conn->c_connid, conn->c_sd); ++ } else { ++ /* Weird situation where the connection is not flagged closing but ns_handle_closure ++ * is scheduled. ++ * We should not try to read it anymore ++ */ ++ PR_ASSERT(ns_job_is_func(conn->c_job, ns_handlers[NS_HANDLER_CLOSE_CONNECTION])); ++ } ++ return; ++ } + } + ++ /* At this point conn->c_job is NULL ++ * Either it was null when the function was called ++ * Or we cleared it (+ns_job_done) if the wrong (according ++ * to the connection state) handler was scheduled ++ * ++ * Now we need to determine which handler to schedule ++ */ ++ + if (CONN_NEEDS_CLOSING(conn)) { + /* there should only ever be 0 or 1 active closure jobs */ + PR_ASSERT((conn->c_ns_close_jobs == 0) || (conn->c_ns_close_jobs == 1)); +@@ -1718,7 +1783,7 @@ ns_connection_post_io_or_closing_try(Connection *conn) + conn->c_ns_close_jobs++; /* now 1 active closure job */ + connection_acquire_nolock_ext(conn, 1 /* allow acquire even when closing */); /* event framework now has a reference */ + /* Close the job asynchronously. Why? */ +- ns_result_t job_result = ns_add_job(conn->c_tp, NS_JOB_TIMER, ns_handle_closure, conn, &(conn->c_job)); ++ ns_result_t job_result = ns_add_job(conn->c_tp, NS_JOB_TIMER, ns_handlers[NS_HANDLER_CLOSE_CONNECTION], conn, &(conn->c_job)); + if (job_result != NS_SUCCESS) { + if (job_result == NS_SHUTDOWN) { + slapi_log_err(SLAPI_LOG_INFO, "ns_connection_post_io_or_closing", "post closure job " +@@ -1762,7 +1827,7 @@ ns_connection_post_io_or_closing_try(Connection *conn) + #endif + ns_result_t job_result = ns_add_io_timeout_job(conn->c_tp, conn->c_prfd, &tv, + NS_JOB_READ | NS_JOB_PRESERVE_FD, +- ns_handle_pr_read_ready, conn, &(conn->c_job)); ++ ns_handlers[NS_HANDLER_READ_CONNECTION], conn, &(conn->c_job)); + if (job_result != NS_SUCCESS) { + if (job_result == NS_SHUTDOWN) { + slapi_log_err(SLAPI_LOG_INFO, "ns_connection_post_io_or_closing", "post I/O job for " +@@ -1782,61 +1847,6 @@ ns_connection_post_io_or_closing_try(Connection *conn) + return 0; + } + +-/* +- * Tries to schedule I/O for this connection +- * If the connection is already busy with a scheduled I/O +- * it can wait until scheduled I/O is dispatched +- * +- * caller must hold c_mutex +- */ +-void +-ns_connection_post_io_or_closing(Connection *conn) +-{ +- while (ns_connection_post_io_or_closing_try(conn)) { +- /* Here a job is currently scheduled (c->job is set) and not yet dispatched +- * Job can be either: +- * - ns_handle_closure +- * - ns_handle_pr_read_ready +- */ +- +- if (connection_is_free(conn, 0)) { +- PRStatus shutdown_status; +- +- /* The connection being freed, +- * It means that ns_handle_closure already completed and the connection +- * is no longer on the active list. +- * The scheduled job is useless and scheduling a new one as well +- */ +- shutdown_status = ns_job_done(conn->c_job); +- if (shutdown_status != PR_SUCCESS) { +- slapi_log_err(SLAPI_LOG_CRIT, "ns_connection_post_io_or_closing", "Failed cancel a job on a freed connection %d !\n", conn->c_sd); +- } +- conn->c_job = NULL; +- return; +- } +- if (g_get_shutdown() && CONN_NEEDS_CLOSING(conn)) { +- PRStatus shutdown_status; +- +- /* This is shutting down cancel any scheduled job to register ns_handle_closure +- */ +- shutdown_status = ns_job_done(conn->c_job); +- if (shutdown_status != PR_SUCCESS) { +- slapi_log_err(SLAPI_LOG_CRIT, "ns_connection_post_io_or_closing", "Failed to cancel a job during shutdown %d !\n", conn->c_sd); +- } +- conn->c_job = NULL; +- continue; +- } +- +- /* We are not suppose to work immediately on the connection that is taken by +- * another job +- * release the lock and give some time +- */ +- PR_ExitMonitor(conn->c_mutex); +- DS_Sleep(PR_MillisecondsToInterval(100)); +- PR_EnterMonitor(conn->c_mutex); +- } +-} +- + /* This function must be called without the thread flag, in the + * event loop. This function may free the connection. This can + * only be done in the event loop thread. +diff --git a/src/nunc-stans/include/nunc-stans.h b/src/nunc-stans/include/nunc-stans.h +index 192e38ec3..a0ddbdb42 100644 +--- a/src/nunc-stans/include/nunc-stans.h ++++ b/src/nunc-stans/include/nunc-stans.h +@@ -959,4 +959,7 @@ ns_result_t ns_thrpool_wait(struct ns_thrpool_t *tp); + */ + ns_result_t ns_job_rearm(struct ns_job_t *job); + ++int ++ns_job_is_func(struct ns_job_t *job, ns_job_func_t func); ++ + #endif /* NS_THRPOOL_H */ +diff --git a/src/nunc-stans/ns/ns_thrpool.c b/src/nunc-stans/ns/ns_thrpool.c +index d95b0c38b..774607c88 100644 +--- a/src/nunc-stans/ns/ns_thrpool.c ++++ b/src/nunc-stans/ns/ns_thrpool.c +@@ -1237,6 +1237,11 @@ ns_job_rearm(ns_job_t *job) + /* Unreachable code .... */ + return NS_INVALID_REQUEST; + } ++int ++ns_job_is_func(struct ns_job_t *job, ns_job_func_t func) ++{ ++ return(job && job->func == func); ++} + + static void + ns_thrpool_delete(ns_thrpool_t *tp) +-- +2.17.1 + diff --git a/SOURCES/0092-Ticket-49765-compiler-warning.patch b/SOURCES/0092-Ticket-49765-compiler-warning.patch new file mode 100644 index 0000000..b28d8d2 --- /dev/null +++ b/SOURCES/0092-Ticket-49765-compiler-warning.patch @@ -0,0 +1,43 @@ +From 2daf9be2b949b845ce18c355d862ac765a512ba7 Mon Sep 17 00:00:00 2001 +From: Thierry Bordaz +Date: Fri, 8 Jun 2018 14:34:28 +0200 +Subject: [PATCH] Ticket 49765 - compiler warning + +--- + ldap/servers/slapd/daemon.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c +index 0a723c4a8..01db503dc 100644 +--- a/ldap/servers/slapd/daemon.c ++++ b/ldap/servers/slapd/daemon.c +@@ -1778,7 +1778,7 @@ ns_connection_post_io_or_closing(Connection *conn) + slapi_log_err(SLAPI_LOG_CONNS, "ns_connection_post_io_or_closing", "Already a close " + "job in progress on conn %" PRIu64 " for fd=%d\n", + conn->c_connid, conn->c_sd); +- return 0; ++ return; + } else { + conn->c_ns_close_jobs++; /* now 1 active closure job */ + connection_acquire_nolock_ext(conn, 1 /* allow acquire even when closing */); /* event framework now has a reference */ +@@ -1822,7 +1822,7 @@ ns_connection_post_io_or_closing(Connection *conn) + * The error occurs when we get a connection in a closing state. + * For now we return, but there is probably a better way to handle the error case. + */ +- return 0; ++ return; + } + #endif + ns_result_t job_result = ns_add_io_timeout_job(conn->c_tp, conn->c_prfd, &tv, +@@ -1844,7 +1844,7 @@ ns_connection_post_io_or_closing(Connection *conn) + conn->c_connid, conn->c_sd); + } + } +- return 0; ++ return; + } + + /* This function must be called without the thread flag, in the +-- +2.17.1 + diff --git a/SPECS/389-ds-base.spec b/SPECS/389-ds-base.spec index 56f8b3c..5d9af60 100644 --- a/SPECS/389-ds-base.spec +++ b/SPECS/389-ds-base.spec @@ -39,7 +39,7 @@ Summary: 389 Directory Server (%{variant}) Name: 389-ds-base Version: 1.3.7.5 -Release: %{?relprefix}24%{?prerel}%{?dist} +Release: %{?relprefix}25%{?prerel}%{?dist} License: GPLv3+ URL: https://www.port389.org/ Group: System Environment/Daemons @@ -237,6 +237,8 @@ Patch87: 0087-Ticket-48184-clean-up-and-delete-connections-at-shut.patc Patch88: 0088-Ticket-49736-Hardening-of-active-connection-list.patch Patch89: 0089-Ticket-49652-DENY-aci-s-are-not-handled-properly.patch Patch90: 0090-Ticket-49576-ds-replcheck-fix-certificate-directory-.patch +Patch91: 0091-Ticket-49765-Async-operations-can-hang-when-the-serv.patch +Patch92: 0092-Ticket-49765-compiler-warning.patch %description 389 Directory Server is an LDAPv3 compliant server. The base package includes @@ -591,6 +593,10 @@ fi %{_sysconfdir}/%{pkgname}/dirsrvtests %changelog +* Tue Jul 3 2018 Mark Reynolds - 1.3.7.5-25 +- Bump version to 1.3.7.5-25 +- Resolves: Bug 1597530 - Async operations can hang when the server is running nunc-stans + * Wed Jun 13 2018 Mark Reynolds - 1.3.7.5-24 - Bump version to 1.3.7.5-24 - Resolves: Bug 1580257 - Fix certificate directory verification