951ecb
From 041026835bce4d4ae2390daccade0e74c4fa3c1c Mon Sep 17 00:00:00 2001
951ecb
From: Christine Caulfield <ccaulfie@redhat.com>
951ecb
Date: Thu, 26 Jul 2018 08:06:45 +0100
951ecb
Subject: [PATCH] Shutdown corosync after a fatal error
951ecb
951ecb
If pacemaker shuts down due to being fenced by a non-power (eg fabric)
951ecb
fence agent then it should also take down corosync so that full cluster
951ecb
service on that node is lost, rather than just resource management.
951ecb
951ecb
https://bugzilla.redhat.com/show_bug.cgi?id=1448221
951ecb
951ecb
Signed-off-by: Christine Caulfield <ccaulfie@redhat.com>
951ecb
---
951ecb
 mcp/pacemaker.c | 27 ++++++++++++++++++++++++++-
951ecb
 1 file changed, 26 insertions(+), 1 deletion(-)
951ecb
951ecb
diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c
951ecb
index f57fc25..40a2de2 100644
951ecb
--- a/mcp/pacemaker.c
951ecb
+++ b/mcp/pacemaker.c
951ecb
@@ -21,6 +21,9 @@
951ecb
 #include <crm/common/mainloop.h>
951ecb
 #include <crm/cluster/internal.h>
951ecb
 #include <crm/cluster.h>
951ecb
+#ifdef SUPPORT_COROSYNC
951ecb
+#include <corosync/cfg.h>
951ecb
+#endif
951ecb
 
951ecb
 #include <dirent.h>
951ecb
 #include <ctype.h>
951ecb
@@ -142,6 +145,28 @@ pcmk_process_exit(pcmk_child_t * child)
951ecb
     }
951ecb
 }
951ecb
 
951ecb
+static void pcmk_exit_with_cluster(int exitcode)
951ecb
+{
951ecb
+#ifdef SUPPORT_COROSYNC
951ecb
+    corosync_cfg_handle_t cfg_handle;
951ecb
+    cs_error_t err;
951ecb
+
951ecb
+    if (exitcode == DAEMON_RESPAWN_STOP) {
951ecb
+	    crm_info("Asking Corosync to shut down");
951ecb
+	    err = corosync_cfg_initialize(&cfg_handle, NULL);
951ecb
+	    if (err != CS_OK) {
951ecb
+		    crm_warn("Unable to open handle to corosync to close it down. err=%d", err);
951ecb
+	    }
951ecb
+	    err = corosync_cfg_try_shutdown(cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
951ecb
+	    if (err != CS_OK) {
951ecb
+		    crm_warn("Corosync shutdown failed. err=%d", err);
951ecb
+	    }
951ecb
+	    corosync_cfg_finalize(cfg_handle);
951ecb
+    }
951ecb
+#endif
951ecb
+    crm_exit(exitcode);
951ecb
+}
951ecb
+
951ecb
 static void
951ecb
 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
951ecb
 {
951ecb
@@ -423,7 +448,7 @@ pcmk_shutdown_worker(gpointer user_data)
951ecb
 
951ecb
     if (fatal_error) {
951ecb
         crm_notice("Attempting to inhibit respawning after fatal error");
951ecb
-        crm_exit(DAEMON_RESPAWN_STOP);
951ecb
+        pcmk_exit_with_cluster(DAEMON_RESPAWN_STOP);
951ecb
     }
951ecb
 
951ecb
     return TRUE;
951ecb
-- 
951ecb
1.8.3.1
951ecb