|
|
951ecb |
From 041026835bce4d4ae2390daccade0e74c4fa3c1c Mon Sep 17 00:00:00 2001
|
|
|
951ecb |
From: Christine Caulfield <ccaulfie@redhat.com>
|
|
|
951ecb |
Date: Thu, 26 Jul 2018 08:06:45 +0100
|
|
|
951ecb |
Subject: [PATCH] Shutdown corosync after a fatal error
|
|
|
951ecb |
|
|
|
951ecb |
If pacemaker shuts down due to being fenced by a non-power (eg fabric)
|
|
|
951ecb |
fence agent then it should also take down corosync so that full cluster
|
|
|
951ecb |
service on that node is lost, rather than just resource management.
|
|
|
951ecb |
|
|
|
951ecb |
https://bugzilla.redhat.com/show_bug.cgi?id=1448221
|
|
|
951ecb |
|
|
|
951ecb |
Signed-off-by: Christine Caulfield <ccaulfie@redhat.com>
|
|
|
951ecb |
---
|
|
|
951ecb |
mcp/pacemaker.c | 27 ++++++++++++++++++++++++++-
|
|
|
951ecb |
1 file changed, 26 insertions(+), 1 deletion(-)
|
|
|
951ecb |
|
|
|
951ecb |
diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c
|
|
|
951ecb |
index f57fc25..40a2de2 100644
|
|
|
951ecb |
--- a/mcp/pacemaker.c
|
|
|
951ecb |
+++ b/mcp/pacemaker.c
|
|
|
951ecb |
@@ -21,6 +21,9 @@
|
|
|
951ecb |
#include <crm/common/mainloop.h>
|
|
|
951ecb |
#include <crm/cluster/internal.h>
|
|
|
951ecb |
#include <crm/cluster.h>
|
|
|
951ecb |
+#ifdef SUPPORT_COROSYNC
|
|
|
951ecb |
+#include <corosync/cfg.h>
|
|
|
951ecb |
+#endif
|
|
|
951ecb |
|
|
|
951ecb |
#include <dirent.h>
|
|
|
951ecb |
#include <ctype.h>
|
|
|
951ecb |
@@ -142,6 +145,28 @@ pcmk_process_exit(pcmk_child_t * child)
|
|
|
951ecb |
}
|
|
|
951ecb |
}
|
|
|
951ecb |
|
|
|
951ecb |
+static void pcmk_exit_with_cluster(int exitcode)
|
|
|
951ecb |
+{
|
|
|
951ecb |
+#ifdef SUPPORT_COROSYNC
|
|
|
951ecb |
+ corosync_cfg_handle_t cfg_handle;
|
|
|
951ecb |
+ cs_error_t err;
|
|
|
951ecb |
+
|
|
|
951ecb |
+ if (exitcode == DAEMON_RESPAWN_STOP) {
|
|
|
951ecb |
+ crm_info("Asking Corosync to shut down");
|
|
|
951ecb |
+ err = corosync_cfg_initialize(&cfg_handle, NULL);
|
|
|
951ecb |
+ if (err != CS_OK) {
|
|
|
951ecb |
+ crm_warn("Unable to open handle to corosync to close it down. err=%d", err);
|
|
|
951ecb |
+ }
|
|
|
951ecb |
+ err = corosync_cfg_try_shutdown(cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
|
|
|
951ecb |
+ if (err != CS_OK) {
|
|
|
951ecb |
+ crm_warn("Corosync shutdown failed. err=%d", err);
|
|
|
951ecb |
+ }
|
|
|
951ecb |
+ corosync_cfg_finalize(cfg_handle);
|
|
|
951ecb |
+ }
|
|
|
951ecb |
+#endif
|
|
|
951ecb |
+ crm_exit(exitcode);
|
|
|
951ecb |
+}
|
|
|
951ecb |
+
|
|
|
951ecb |
static void
|
|
|
951ecb |
pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
|
|
|
951ecb |
{
|
|
|
951ecb |
@@ -423,7 +448,7 @@ pcmk_shutdown_worker(gpointer user_data)
|
|
|
951ecb |
|
|
|
951ecb |
if (fatal_error) {
|
|
|
951ecb |
crm_notice("Attempting to inhibit respawning after fatal error");
|
|
|
951ecb |
- crm_exit(DAEMON_RESPAWN_STOP);
|
|
|
951ecb |
+ pcmk_exit_with_cluster(DAEMON_RESPAWN_STOP);
|
|
|
951ecb |
}
|
|
|
951ecb |
|
|
|
951ecb |
return TRUE;
|
|
|
951ecb |
--
|
|
|
951ecb |
1.8.3.1
|
|
|
951ecb |
|