From 041026835bce4d4ae2390daccade0e74c4fa3c1c Mon Sep 17 00:00:00 2001 From: Christine Caulfield Date: Thu, 26 Jul 2018 08:06:45 +0100 Subject: [PATCH] Shutdown corosync after a fatal error If pacemaker shuts down due to being fenced by a non-power (eg fabric) fence agent then it should also take down corosync so that full cluster service on that node is lost, rather than just resource management. https://bugzilla.redhat.com/show_bug.cgi?id=1448221 Signed-off-by: Christine Caulfield --- mcp/pacemaker.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c index f57fc25..40a2de2 100644 --- a/mcp/pacemaker.c +++ b/mcp/pacemaker.c @@ -21,6 +21,9 @@ #include #include #include +#ifdef SUPPORT_COROSYNC +#include +#endif #include #include @@ -142,6 +145,28 @@ pcmk_process_exit(pcmk_child_t * child) } } +static void pcmk_exit_with_cluster(int exitcode) +{ +#ifdef SUPPORT_COROSYNC + corosync_cfg_handle_t cfg_handle; + cs_error_t err; + + if (exitcode == DAEMON_RESPAWN_STOP) { + crm_info("Asking Corosync to shut down"); + err = corosync_cfg_initialize(&cfg_handle, NULL); + if (err != CS_OK) { + crm_warn("Unable to open handle to corosync to close it down. err=%d", err); + } + err = corosync_cfg_try_shutdown(cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE); + if (err != CS_OK) { + crm_warn("Corosync shutdown failed. err=%d", err); + } + corosync_cfg_finalize(cfg_handle); + } +#endif + crm_exit(exitcode); +} + static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { @@ -423,7 +448,7 @@ pcmk_shutdown_worker(gpointer user_data) if (fatal_error) { crm_notice("Attempting to inhibit respawning after fatal error"); - crm_exit(DAEMON_RESPAWN_STOP); + pcmk_exit_with_cluster(DAEMON_RESPAWN_STOP); } return TRUE; -- 1.8.3.1