From 041026835bce4d4ae2390daccade0e74c4fa3c1c Mon Sep 17 00:00:00 2001
From: Christine Caulfield <ccaulfie@redhat.com>
Date: Thu, 26 Jul 2018 08:06:45 +0100
Subject: [PATCH] Shutdown corosync after a fatal error
If pacemaker shuts down due to being fenced by a non-power (eg fabric)
fence agent then it should also take down corosync so that full cluster
service on that node is lost, rather than just resource management.
https://bugzilla.redhat.com/show_bug.cgi?id=1448221
Signed-off-by: Christine Caulfield <ccaulfie@redhat.com>
---
mcp/pacemaker.c | 27 ++++++++++++++++++++++++++-
1 file changed, 26 insertions(+), 1 deletion(-)
diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c
index f57fc25..40a2de2 100644
--- a/mcp/pacemaker.c
+++ b/mcp/pacemaker.c
@@ -21,6 +21,9 @@
#include <crm/common/mainloop.h>
#include <crm/cluster/internal.h>
#include <crm/cluster.h>
+#ifdef SUPPORT_COROSYNC
+#include <corosync/cfg.h>
+#endif
#include <dirent.h>
#include <ctype.h>
@@ -142,6 +145,28 @@ pcmk_process_exit(pcmk_child_t * child)
}
}
+static void pcmk_exit_with_cluster(int exitcode)
+{
+#ifdef SUPPORT_COROSYNC
+ corosync_cfg_handle_t cfg_handle;
+ cs_error_t err;
+
+ if (exitcode == DAEMON_RESPAWN_STOP) {
+ crm_info("Asking Corosync to shut down");
+ err = corosync_cfg_initialize(&cfg_handle, NULL);
+ if (err != CS_OK) {
+ crm_warn("Unable to open handle to corosync to close it down. err=%d", err);
+ }
+ err = corosync_cfg_try_shutdown(cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
+ if (err != CS_OK) {
+ crm_warn("Corosync shutdown failed. err=%d", err);
+ }
+ corosync_cfg_finalize(cfg_handle);
+ }
+#endif
+ crm_exit(exitcode);
+}
+
static void
pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
@@ -423,7 +448,7 @@ pcmk_shutdown_worker(gpointer user_data)
if (fatal_error) {
crm_notice("Attempting to inhibit respawning after fatal error");
- crm_exit(DAEMON_RESPAWN_STOP);
+ pcmk_exit_with_cluster(DAEMON_RESPAWN_STOP);
}
return TRUE;
--
1.8.3.1