|
|
4c8e44 |
From edd133ade2bd9b003d3437280271a9c9dbab3ed6 Mon Sep 17 00:00:00 2001
|
|
|
4c8e44 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
4c8e44 |
Date: Thu, 23 May 2019 16:36:12 -0500
|
|
|
4c8e44 |
Subject: [PATCH] Refactor: controller: separate fencing-related functionality
|
|
|
4c8e44 |
into own source file
|
|
|
4c8e44 |
|
|
|
4c8e44 |
Before:
|
|
|
4c8e44 |
748 daemons/controld/controld_te_actions.c
|
|
|
4c8e44 |
942 daemons/controld/controld_te_callbacks.c
|
|
|
4c8e44 |
725 daemons/controld/controld_te_utils.c
|
|
|
4c8e44 |
84 daemons/controld/controld_transition.h
|
|
|
4c8e44 |
110 daemons/controld/controld_utils.h
|
|
|
4c8e44 |
|
|
|
4c8e44 |
After:
|
|
|
4c8e44 |
838 daemons/controld/controld_fencing.c
|
|
|
4c8e44 |
37 daemons/controld/controld_fencing.h
|
|
|
4c8e44 |
631 daemons/controld/controld_te_actions.c
|
|
|
4c8e44 |
701 daemons/controld/controld_te_callbacks.c
|
|
|
4c8e44 |
298 daemons/controld/controld_te_utils.c
|
|
|
4c8e44 |
65 daemons/controld/controld_transition.h
|
|
|
4c8e44 |
106 daemons/controld/controld_utils.h
|
|
|
4c8e44 |
---
|
|
|
4c8e44 |
daemons/controld/Makefile.am | 5 +-
|
|
|
4c8e44 |
daemons/controld/controld_callbacks.c | 3 +-
|
|
|
4c8e44 |
daemons/controld/controld_control.c | 2 +-
|
|
|
4c8e44 |
daemons/controld/controld_election.c | 3 +-
|
|
|
4c8e44 |
daemons/controld/controld_fencing.c | 838 +++++++++++++++++++++++++++++++
|
|
|
4c8e44 |
daemons/controld/controld_fencing.h | 37 ++
|
|
|
4c8e44 |
daemons/controld/controld_fsa.c | 1 +
|
|
|
4c8e44 |
daemons/controld/controld_messages.c | 1 +
|
|
|
4c8e44 |
daemons/controld/controld_te_actions.c | 121 +----
|
|
|
4c8e44 |
daemons/controld/controld_te_callbacks.c | 243 +--------
|
|
|
4c8e44 |
daemons/controld/controld_te_utils.c | 429 +---------------
|
|
|
4c8e44 |
daemons/controld/controld_transition.c | 1 -
|
|
|
4c8e44 |
daemons/controld/controld_transition.h | 21 +-
|
|
|
4c8e44 |
daemons/controld/controld_utils.h | 4 -
|
|
|
4c8e44 |
14 files changed, 891 insertions(+), 818 deletions(-)
|
|
|
4c8e44 |
create mode 100644 daemons/controld/controld_fencing.c
|
|
|
4c8e44 |
create mode 100644 daemons/controld/controld_fencing.h
|
|
|
4c8e44 |
|
|
|
4c8e44 |
diff --git a/daemons/controld/Makefile.am b/daemons/controld/Makefile.am
|
|
|
4c8e44 |
index 17c3342..858e1bb 100644
|
|
|
4c8e44 |
--- a/daemons/controld/Makefile.am
|
|
|
4c8e44 |
+++ b/daemons/controld/Makefile.am
|
|
|
4c8e44 |
@@ -1,5 +1,7 @@
|
|
|
4c8e44 |
#
|
|
|
4c8e44 |
-# Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
|
|
|
4c8e44 |
+# Copyright 2018-2019 the Pacemaker project contributors
|
|
|
4c8e44 |
+#
|
|
|
4c8e44 |
+# The version control history for this file may have further details.
|
|
|
4c8e44 |
#
|
|
|
4c8e44 |
# This source code is licensed under the GNU General Public License version 2
|
|
|
4c8e44 |
# or later (GPLv2+) WITHOUT ANY WARRANTY.
|
|
|
4c8e44 |
@@ -46,6 +48,7 @@ pacemaker_controld_SOURCES = pacemaker-controld.c \
|
|
|
4c8e44 |
controld_election.c \
|
|
|
4c8e44 |
controld_execd.c \
|
|
|
4c8e44 |
controld_execd_state.c \
|
|
|
4c8e44 |
+ controld_fencing.c \
|
|
|
4c8e44 |
controld_fsa.c \
|
|
|
4c8e44 |
controld_join_client.c \
|
|
|
4c8e44 |
controld_join_dc.c \
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
|
|
|
4c8e44 |
index a188263..06ffb9d 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_callbacks.c
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_callbacks.c
|
|
|
4c8e44 |
@@ -1,5 +1,5 @@
|
|
|
4c8e44 |
/*
|
|
|
4c8e44 |
- * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
|
|
|
4c8e44 |
+ * Copyright 2004-2019 the Pacemaker project contributors
|
|
|
4c8e44 |
*
|
|
|
4c8e44 |
* This source code is licensed under the GNU General Public License version 2
|
|
|
4c8e44 |
* or later (GPLv2+) WITHOUT ANY WARRANTY.
|
|
|
4c8e44 |
@@ -22,6 +22,7 @@
|
|
|
4c8e44 |
#include <controld_messages.h>
|
|
|
4c8e44 |
#include <controld_callbacks.h>
|
|
|
4c8e44 |
#include <controld_lrm.h>
|
|
|
4c8e44 |
+#include <controld_fencing.h>
|
|
|
4c8e44 |
#include <controld_transition.h>
|
|
|
4c8e44 |
#include <controld_membership.h>
|
|
|
4c8e44 |
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
|
|
|
4c8e44 |
index 6d9f335..7f918c0 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_control.c
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_control.c
|
|
|
4c8e44 |
@@ -25,6 +25,7 @@
|
|
|
4c8e44 |
#include <controld_messages.h>
|
|
|
4c8e44 |
#include <controld_callbacks.h>
|
|
|
4c8e44 |
#include <controld_lrm.h>
|
|
|
4c8e44 |
+#include <controld_fencing.h>
|
|
|
4c8e44 |
#include <controld_alerts.h>
|
|
|
4c8e44 |
#include <controld_metadata.h>
|
|
|
4c8e44 |
#include <controld_transition.h>
|
|
|
4c8e44 |
@@ -147,7 +148,6 @@ extern char *max_generation_from;
|
|
|
4c8e44 |
extern xmlNode *max_generation_xml;
|
|
|
4c8e44 |
extern GHashTable *resource_history;
|
|
|
4c8e44 |
extern GHashTable *voted;
|
|
|
4c8e44 |
-extern char *te_client_id;
|
|
|
4c8e44 |
|
|
|
4c8e44 |
void
|
|
|
4c8e44 |
crmd_fast_exit(crm_exit_t exit_code)
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c
|
|
|
4c8e44 |
index 5d6858c..9e49c7b 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_election.c
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_election.c
|
|
|
4c8e44 |
@@ -1,5 +1,5 @@
|
|
|
4c8e44 |
/*
|
|
|
4c8e44 |
- * Copyright 2004-2019 Andrew Beekhof <andrew@beekhof.net>
|
|
|
4c8e44 |
+ * Copyright 2004-2019 the Pacemaker project contributors
|
|
|
4c8e44 |
*
|
|
|
4c8e44 |
* This source code is licensed under the GNU General Public License version 2
|
|
|
4c8e44 |
* or later (GPLv2+) WITHOUT ANY WARRANTY.
|
|
|
4c8e44 |
@@ -18,6 +18,7 @@
|
|
|
4c8e44 |
#include <crm/crm.h>
|
|
|
4c8e44 |
#include <pacemaker-controld.h>
|
|
|
4c8e44 |
#include <controld_fsa.h>
|
|
|
4c8e44 |
+#include <controld_fencing.h>
|
|
|
4c8e44 |
#include <controld_messages.h>
|
|
|
4c8e44 |
#include <controld_callbacks.h>
|
|
|
4c8e44 |
#include <controld_transition.h>
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
|
|
|
4c8e44 |
new file mode 100644
|
|
|
4c8e44 |
index 0000000..cde57b5
|
|
|
4c8e44 |
--- /dev/null
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_fencing.c
|
|
|
4c8e44 |
@@ -0,0 +1,838 @@
|
|
|
4c8e44 |
+/*
|
|
|
4c8e44 |
+ * Copyright 2004-2019 the Pacemaker project contributors
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * This source code is licensed under the GNU General Public License version 2
|
|
|
4c8e44 |
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+#include <crm_internal.h>
|
|
|
4c8e44 |
+#include <crm/crm.h>
|
|
|
4c8e44 |
+#include <crm/msg_xml.h>
|
|
|
4c8e44 |
+#include <crm/common/xml.h>
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+#include <controld_transition.h>
|
|
|
4c8e44 |
+#include <controld_fsa.h>
|
|
|
4c8e44 |
+#include <controld_lrm.h>
|
|
|
4c8e44 |
+#include <controld_fencing.h>
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+#ifdef HAVE_SYS_REBOOT_H
|
|
|
4c8e44 |
+# include <unistd.h>
|
|
|
4c8e44 |
+# include <sys/reboot.h>
|
|
|
4c8e44 |
+#endif
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/*
|
|
|
4c8e44 |
+ * stonith failure counting
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * We don't want to get stuck in a permanent fencing loop. Keep track of the
|
|
|
4c8e44 |
+ * number of fencing failures for each target node, and the most we'll restart a
|
|
|
4c8e44 |
+ * transition for.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+struct st_fail_rec {
|
|
|
4c8e44 |
+ int count;
|
|
|
4c8e44 |
+};
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static unsigned long int stonith_max_attempts = 10;
|
|
|
4c8e44 |
+static GHashTable *stonith_failures = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+void
|
|
|
4c8e44 |
+update_stonith_max_attempts(const char *value)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ if (safe_str_eq(value, CRM_INFINITY_S)) {
|
|
|
4c8e44 |
+ stonith_max_attempts = CRM_SCORE_INFINITY;
|
|
|
4c8e44 |
+ } else {
|
|
|
4c8e44 |
+ stonith_max_attempts = crm_int_helper(value, NULL);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static gboolean
|
|
|
4c8e44 |
+too_many_st_failures(const char *target)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ GHashTableIter iter;
|
|
|
4c8e44 |
+ const char *key = NULL;
|
|
|
4c8e44 |
+ struct st_fail_rec *value = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (stonith_failures == NULL) {
|
|
|
4c8e44 |
+ return FALSE;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (target == NULL) {
|
|
|
4c8e44 |
+ g_hash_table_iter_init(&iter, stonith_failures);
|
|
|
4c8e44 |
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
|
|
|
4c8e44 |
+ (gpointer *) &value)) {
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (value->count >= stonith_max_attempts) {
|
|
|
4c8e44 |
+ target = (const char*)key;
|
|
|
4c8e44 |
+ goto too_many;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ } else {
|
|
|
4c8e44 |
+ value = g_hash_table_lookup(stonith_failures, target);
|
|
|
4c8e44 |
+ if ((value != NULL) && (value->count >= stonith_max_attempts)) {
|
|
|
4c8e44 |
+ goto too_many;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ return FALSE;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+too_many:
|
|
|
4c8e44 |
+ crm_warn("Too many failures (%d) to fence %s, giving up",
|
|
|
4c8e44 |
+ value->count, target);
|
|
|
4c8e44 |
+ return TRUE;
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/*!
|
|
|
4c8e44 |
+ * \internal
|
|
|
4c8e44 |
+ * \brief Reset a stonith fail count
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * \param[in] target Name of node to reset, or NULL for all
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+void
|
|
|
4c8e44 |
+st_fail_count_reset(const char *target)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ if (stonith_failures == NULL) {
|
|
|
4c8e44 |
+ return;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (target) {
|
|
|
4c8e44 |
+ struct st_fail_rec *rec = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ rec = g_hash_table_lookup(stonith_failures, target);
|
|
|
4c8e44 |
+ if (rec) {
|
|
|
4c8e44 |
+ rec->count = 0;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ } else {
|
|
|
4c8e44 |
+ GHashTableIter iter;
|
|
|
4c8e44 |
+ const char *key = NULL;
|
|
|
4c8e44 |
+ struct st_fail_rec *rec = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ g_hash_table_iter_init(&iter, stonith_failures);
|
|
|
4c8e44 |
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
|
|
|
4c8e44 |
+ (gpointer *) &rec)) {
|
|
|
4c8e44 |
+ rec->count = 0;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static void
|
|
|
4c8e44 |
+st_fail_count_increment(const char *target)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ struct st_fail_rec *rec = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (stonith_failures == NULL) {
|
|
|
4c8e44 |
+ stonith_failures = crm_str_table_new();
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ rec = g_hash_table_lookup(stonith_failures, target);
|
|
|
4c8e44 |
+ if (rec) {
|
|
|
4c8e44 |
+ rec->count++;
|
|
|
4c8e44 |
+ } else {
|
|
|
4c8e44 |
+ rec = malloc(sizeof(struct st_fail_rec));
|
|
|
4c8e44 |
+ if(rec == NULL) {
|
|
|
4c8e44 |
+ return;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ rec->count = 1;
|
|
|
4c8e44 |
+ g_hash_table_insert(stonith_failures, strdup(target), rec);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/* end stonith fail count functions */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static void
|
|
|
4c8e44 |
+cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
|
|
|
4c8e44 |
+ void *user_data)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ if (rc < pcmk_ok) {
|
|
|
4c8e44 |
+ crm_err("Fencing update %d for %s: failed - %s (%d)",
|
|
|
4c8e44 |
+ call_id, (char *)user_data, pcmk_strerror(rc), rc);
|
|
|
4c8e44 |
+ crm_log_xml_warn(msg, "Failed update");
|
|
|
4c8e44 |
+ abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ } else {
|
|
|
4c8e44 |
+ crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static void
|
|
|
4c8e44 |
+send_stonith_update(crm_action_t *action, const char *target, const char *uuid)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ int rc = pcmk_ok;
|
|
|
4c8e44 |
+ crm_node_t *peer = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* We (usually) rely on the membership layer to do node_update_cluster,
|
|
|
4c8e44 |
+ * and the peer status callback to do node_update_peer, because the node
|
|
|
4c8e44 |
+ * might have already rejoined before we get the stonith result here.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+ int flags = node_update_join | node_update_expected;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* zero out the node-status & remove all LRM status info */
|
|
|
4c8e44 |
+ xmlNode *node_state = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ CRM_CHECK(target != NULL, return);
|
|
|
4c8e44 |
+ CRM_CHECK(uuid != NULL, return);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Make sure the membership and join caches are accurate */
|
|
|
4c8e44 |
+ peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ CRM_CHECK(peer != NULL, return);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (peer->state == NULL) {
|
|
|
4c8e44 |
+ /* Usually, we rely on the membership layer to update the cluster state
|
|
|
4c8e44 |
+ * in the CIB. However, if the node has never been seen, do it here, so
|
|
|
4c8e44 |
+ * the node is not considered unclean.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+ flags |= node_update_cluster;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (peer->uuid == NULL) {
|
|
|
4c8e44 |
+ crm_info("Recording uuid '%s' for node '%s'", uuid, target);
|
|
|
4c8e44 |
+ peer->uuid = strdup(uuid);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ crmd_peer_down(peer, TRUE);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Generate a node state update for the CIB */
|
|
|
4c8e44 |
+ node_state = create_node_state_update(peer, flags, NULL, __FUNCTION__);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* we have to mark whether or not remote nodes have already been fenced */
|
|
|
4c8e44 |
+ if (peer->flags & crm_remote_node) {
|
|
|
4c8e44 |
+ time_t now = time(NULL);
|
|
|
4c8e44 |
+ char *now_s = crm_itoa(now);
|
|
|
4c8e44 |
+ crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
|
|
|
4c8e44 |
+ free(now_s);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Force our known ID */
|
|
|
4c8e44 |
+ crm_xml_add(node_state, XML_ATTR_UUID, uuid);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
|
|
|
4c8e44 |
+ cib_quorum_override | cib_scope_local | cib_can_create);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Delay processing the trigger until the update completes */
|
|
|
4c8e44 |
+ crm_debug("Sending fencing update %d for %s", rc, target);
|
|
|
4c8e44 |
+ fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Make sure it sticks */
|
|
|
4c8e44 |
+ /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local);
|
|
|
4c8e44 |
+ erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ free_xml(node_state);
|
|
|
4c8e44 |
+ return;
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/*!
|
|
|
4c8e44 |
+ * \internal
|
|
|
4c8e44 |
+ * \brief Abort transition due to stonith failure
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * \param[in] abort_action Whether to restart or stop transition
|
|
|
4c8e44 |
+ * \param[in] target Don't restart if this (NULL for any) has too many failures
|
|
|
4c8e44 |
+ * \param[in] reason Log this stonith action XML as abort reason (or NULL)
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+static void
|
|
|
4c8e44 |
+abort_for_stonith_failure(enum transition_action abort_action,
|
|
|
4c8e44 |
+ const char *target, xmlNode *reason)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ /* If stonith repeatedly fails, we eventually give up on starting a new
|
|
|
4c8e44 |
+ * transition for that reason.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+ if ((abort_action != tg_stop) && too_many_st_failures(target)) {
|
|
|
4c8e44 |
+ abort_action = tg_stop;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ abort_transition(INFINITY, abort_action, "Stonith failed", reason);
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/*
|
|
|
4c8e44 |
+ * stonith cleanup list
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * If the DC is shot, proper notifications might not go out.
|
|
|
4c8e44 |
+ * The stonith cleanup list allows the cluster to (re-)send
|
|
|
4c8e44 |
+ * notifications once a new DC is elected.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static GListPtr stonith_cleanup_list = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/*!
|
|
|
4c8e44 |
+ * \internal
|
|
|
4c8e44 |
+ * \brief Add a node to the stonith cleanup list
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * \param[in] target Name of node to add
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+void
|
|
|
4c8e44 |
+add_stonith_cleanup(const char *target) {
|
|
|
4c8e44 |
+ stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/*!
|
|
|
4c8e44 |
+ * \internal
|
|
|
4c8e44 |
+ * \brief Remove a node from the stonith cleanup list
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * \param[in] Name of node to remove
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+void
|
|
|
4c8e44 |
+remove_stonith_cleanup(const char *target)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ GListPtr iter = stonith_cleanup_list;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ while (iter != NULL) {
|
|
|
4c8e44 |
+ GListPtr tmp = iter;
|
|
|
4c8e44 |
+ char *iter_name = tmp->data;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ iter = iter->next;
|
|
|
4c8e44 |
+ if (safe_str_eq(target, iter_name)) {
|
|
|
4c8e44 |
+ crm_trace("Removing %s from the cleanup list", iter_name);
|
|
|
4c8e44 |
+ stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
|
|
|
4c8e44 |
+ free(iter_name);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/*!
|
|
|
4c8e44 |
+ * \internal
|
|
|
4c8e44 |
+ * \brief Purge all entries from the stonith cleanup list
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+void
|
|
|
4c8e44 |
+purge_stonith_cleanup()
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ if (stonith_cleanup_list) {
|
|
|
4c8e44 |
+ GListPtr iter = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
|
|
|
4c8e44 |
+ char *target = iter->data;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ crm_info("Purging %s from stonith cleanup list", target);
|
|
|
4c8e44 |
+ free(target);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ g_list_free(stonith_cleanup_list);
|
|
|
4c8e44 |
+ stonith_cleanup_list = NULL;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/*!
|
|
|
4c8e44 |
+ * \internal
|
|
|
4c8e44 |
+ * \brief Send stonith updates for all entries in cleanup list, then purge it
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+void
|
|
|
4c8e44 |
+execute_stonith_cleanup()
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ GListPtr iter;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
|
|
|
4c8e44 |
+ char *target = iter->data;
|
|
|
4c8e44 |
+ crm_node_t *target_node = crm_get_peer(0, target);
|
|
|
4c8e44 |
+ const char *uuid = crm_peer_uuid(target_node);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ crm_notice("Marking %s, target of a previous stonith action, as clean", target);
|
|
|
4c8e44 |
+ send_stonith_update(NULL, target, uuid);
|
|
|
4c8e44 |
+ free(target);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ g_list_free(stonith_cleanup_list);
|
|
|
4c8e44 |
+ stonith_cleanup_list = NULL;
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/* end stonith cleanup list functions */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/* stonith API client
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * Functions that need to interact directly with the fencer via its API
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+stonith_t *stonith_api = NULL;
|
|
|
4c8e44 |
+crm_trigger_t *stonith_reconnect = NULL;
|
|
|
4c8e44 |
+char *te_client_id = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static gboolean
|
|
|
4c8e44 |
+fail_incompletable_stonith(crm_graph_t *graph)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ GListPtr lpc = NULL;
|
|
|
4c8e44 |
+ const char *task = NULL;
|
|
|
4c8e44 |
+ xmlNode *last_action = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (graph == NULL) {
|
|
|
4c8e44 |
+ return FALSE;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
|
|
|
4c8e44 |
+ GListPtr lpc2 = NULL;
|
|
|
4c8e44 |
+ synapse_t *synapse = (synapse_t *) lpc->data;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (synapse->confirmed) {
|
|
|
4c8e44 |
+ continue;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
|
|
|
4c8e44 |
+ crm_action_t *action = (crm_action_t *) lpc2->data;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (action->type != action_type_crm || action->confirmed) {
|
|
|
4c8e44 |
+ continue;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
|
|
|
4c8e44 |
+ if (task && safe_str_eq(task, CRM_OP_FENCE)) {
|
|
|
4c8e44 |
+ action->failed = TRUE;
|
|
|
4c8e44 |
+ last_action = action->xml;
|
|
|
4c8e44 |
+ update_graph(graph, action);
|
|
|
4c8e44 |
+ crm_notice("Failing action %d (%s): fencer terminated",
|
|
|
4c8e44 |
+ action->id, ID(action->xml));
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (last_action != NULL) {
|
|
|
4c8e44 |
+ crm_warn("Fencer failure resulted in unrunnable actions");
|
|
|
4c8e44 |
+ abort_for_stonith_failure(tg_restart, NULL, last_action);
|
|
|
4c8e44 |
+ return TRUE;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ return FALSE;
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static void
|
|
|
4c8e44 |
+tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ if (is_set(fsa_input_register, R_ST_REQUIRED)) {
|
|
|
4c8e44 |
+ crm_crit("Fencing daemon connection failed");
|
|
|
4c8e44 |
+ mainloop_set_trigger(stonith_reconnect);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ } else {
|
|
|
4c8e44 |
+ crm_info("Fencing daemon disconnected");
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (stonith_api) {
|
|
|
4c8e44 |
+ stonith_api->state = stonith_disconnected;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (AM_I_DC) {
|
|
|
4c8e44 |
+ fail_incompletable_stonith(transition_graph);
|
|
|
4c8e44 |
+ trigger_graph();
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static void
|
|
|
4c8e44 |
+tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ if (te_client_id == NULL) {
|
|
|
4c8e44 |
+ te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
|
|
|
4c8e44 |
+ (unsigned long) getpid());
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (st_event == NULL) {
|
|
|
4c8e44 |
+ crm_err("Notify data not found");
|
|
|
4c8e44 |
+ return;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ crmd_alert_fencing_op(st_event);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if ((st_event->result == pcmk_ok) && safe_str_eq("on", st_event->action)) {
|
|
|
4c8e44 |
+ crm_notice("%s was successfully unfenced by %s (at the request of %s)",
|
|
|
4c8e44 |
+ st_event->target,
|
|
|
4c8e44 |
+ st_event->executioner? st_event->executioner : "<anyone>",
|
|
|
4c8e44 |
+ st_event->origin);
|
|
|
4c8e44 |
+ /* TODO: Hook up st_event->device */
|
|
|
4c8e44 |
+ return;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ } else if (safe_str_eq("on", st_event->action)) {
|
|
|
4c8e44 |
+ crm_err("Unfencing of %s by %s failed: %s (%d)",
|
|
|
4c8e44 |
+ st_event->target,
|
|
|
4c8e44 |
+ st_event->executioner? st_event->executioner : "<anyone>",
|
|
|
4c8e44 |
+ pcmk_strerror(st_event->result), st_event->result);
|
|
|
4c8e44 |
+ return;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ } else if ((st_event->result == pcmk_ok)
|
|
|
4c8e44 |
+ && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ crm_crit("We were allegedly just fenced by %s for %s!",
|
|
|
4c8e44 |
+ st_event->executioner? st_event->executioner : "<anyone>",
|
|
|
4c8e44 |
+ st_event->origin); /* Dumps blackbox if enabled */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ qb_log_fini(); /* Try to get the above log message to disk - somehow */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Get out ASAP and do not come back up.
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * Triggering a reboot is also not the worst idea either since
|
|
|
4c8e44 |
+ * the rest of the cluster thinks we're safely down
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+#ifdef RB_HALT_SYSTEM
|
|
|
4c8e44 |
+ reboot(RB_HALT_SYSTEM);
|
|
|
4c8e44 |
+#endif
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /*
|
|
|
4c8e44 |
+ * If reboot() fails or is not supported, coming back up will
|
|
|
4c8e44 |
+ * probably lead to a situation where the other nodes set our
|
|
|
4c8e44 |
+ * status to 'lost' because of the fencing callback and will
|
|
|
4c8e44 |
+ * discard subsequent election votes with:
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * So just stay dead, something is seriously messed up anyway.
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+ exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini()
|
|
|
4c8e44 |
+ return;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Update the count of stonith failures for this target, in case we become
|
|
|
4c8e44 |
+ * DC later. The current DC has already updated its fail count in
|
|
|
4c8e44 |
+ * tengine_stonith_callback().
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+ if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
|
|
|
4c8e44 |
+ if (st_event->result == pcmk_ok) {
|
|
|
4c8e44 |
+ st_fail_count_reset(st_event->target);
|
|
|
4c8e44 |
+ } else {
|
|
|
4c8e44 |
+ st_fail_count_increment(st_event->target);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
|
|
|
4c8e44 |
+ CRM_XS " initiator=%s ref=%s",
|
|
|
4c8e44 |
+ st_event->target, st_event->result == pcmk_ok ? "" : " not",
|
|
|
4c8e44 |
+ st_event->action,
|
|
|
4c8e44 |
+ st_event->executioner ? st_event->executioner : "<anyone>",
|
|
|
4c8e44 |
+ (st_event->client_origin? st_event->client_origin : "<unknown>"),
|
|
|
4c8e44 |
+ pcmk_strerror(st_event->result),
|
|
|
4c8e44 |
+ st_event->origin, st_event->id);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (st_event->result == pcmk_ok) {
|
|
|
4c8e44 |
+ crm_node_t *peer = crm_find_known_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
|
|
|
4c8e44 |
+ const char *uuid = NULL;
|
|
|
4c8e44 |
+ gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (peer == NULL) {
|
|
|
4c8e44 |
+ return;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ uuid = crm_peer_uuid(peer);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
|
|
|
4c8e44 |
+ if(AM_I_DC) {
|
|
|
4c8e44 |
+ /* The DC always sends updates */
|
|
|
4c8e44 |
+ send_stonith_update(NULL, st_event->target, uuid);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* @TODO Ideally, at this point, we'd check whether the fenced node
|
|
|
4c8e44 |
+ * hosted any guest nodes, and call remote_node_down() for them.
|
|
|
4c8e44 |
+ * Unfortunately, the controller doesn't have a simple, reliable way
|
|
|
4c8e44 |
+ * to map hosts to guests. It might be possible to track this in the
|
|
|
4c8e44 |
+ * peer cache via crm_remote_peer_cache_refresh(). For now, we rely
|
|
|
4c8e44 |
+ * on the PE creating fence pseudo-events for the guests.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (st_event->client_origin
|
|
|
4c8e44 |
+ && safe_str_neq(st_event->client_origin, te_client_id)) {
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Abort the current transition graph if it wasn't us
|
|
|
4c8e44 |
+ * that invoked stonith to fence someone
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
|
|
|
4c8e44 |
+ abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Assume it was our leader if we don't currently have one */
|
|
|
4c8e44 |
+ } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
|
|
|
4c8e44 |
+ && is_not_set(peer->flags, crm_remote_node)) {
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ crm_notice("Target %s our leader %s (recorded: %s)",
|
|
|
4c8e44 |
+ fsa_our_dc ? "was" : "may have been", st_event->target,
|
|
|
4c8e44 |
+ fsa_our_dc ? fsa_our_dc : "<unset>");
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Given the CIB resyncing that occurs around elections,
|
|
|
4c8e44 |
+ * have one node update the CIB now and, if the new DC is different,
|
|
|
4c8e44 |
+ * have them do so too after the election
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+ if (we_are_executioner) {
|
|
|
4c8e44 |
+ send_stonith_update(NULL, st_event->target, uuid);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ add_stonith_cleanup(st_event->target);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* If the target is a remote node, and we host its connection,
|
|
|
4c8e44 |
+ * immediately fail all monitors so it can be recovered quickly.
|
|
|
4c8e44 |
+ * The connection won't necessarily drop when a remote node is fenced,
|
|
|
4c8e44 |
+ * so the failure might not otherwise be detected until the next poke.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+ if (is_set(peer->flags, crm_remote_node)) {
|
|
|
4c8e44 |
+ remote_ra_fail(st_event->target);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ crmd_peer_down(peer, TRUE);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/*!
|
|
|
4c8e44 |
+ * \brief Connect to fencer
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * \return TRUE
|
|
|
4c8e44 |
+ * \note If user_data is NULL, this will wait 2s between attempts, for up to
|
|
|
4c8e44 |
+ * 30 attempts, meaning the controller could be blocked as long as 58s.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+gboolean
|
|
|
4c8e44 |
+te_connect_stonith(gpointer user_data)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ int rc = pcmk_ok;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (stonith_api == NULL) {
|
|
|
4c8e44 |
+ stonith_api = stonith_api_new();
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (stonith_api->state != stonith_disconnected) {
|
|
|
4c8e44 |
+ crm_trace("Already connected to fencer, no need to retry");
|
|
|
4c8e44 |
+ return TRUE;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (user_data == NULL) {
|
|
|
4c8e44 |
+ // Blocking (retry failures now until successful)
|
|
|
4c8e44 |
+ rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
|
|
|
4c8e44 |
+ if (rc != pcmk_ok) {
|
|
|
4c8e44 |
+ crm_err("Could not connect to fencer in 30 attempts: %s "
|
|
|
4c8e44 |
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ } else {
|
|
|
4c8e44 |
+ // Non-blocking (retry failures later in main loop)
|
|
|
4c8e44 |
+ rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
|
|
|
4c8e44 |
+ if (rc != pcmk_ok) {
|
|
|
4c8e44 |
+ if (is_set(fsa_input_register, R_ST_REQUIRED)) {
|
|
|
4c8e44 |
+ crm_err("Fencer connection failed (will retry): %s "
|
|
|
4c8e44 |
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
|
4c8e44 |
+ mainloop_set_trigger(stonith_reconnect);
|
|
|
4c8e44 |
+ } else {
|
|
|
4c8e44 |
+ crm_info("Fencer connection failed (ignoring because no longer required): %s "
|
|
|
4c8e44 |
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ return TRUE;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (rc == pcmk_ok) {
|
|
|
4c8e44 |
+ stonith_api->cmds->register_notification(stonith_api,
|
|
|
4c8e44 |
+ T_STONITH_NOTIFY_DISCONNECT,
|
|
|
4c8e44 |
+ tengine_stonith_connection_destroy);
|
|
|
4c8e44 |
+ stonith_api->cmds->register_notification(stonith_api,
|
|
|
4c8e44 |
+ T_STONITH_NOTIFY_FENCE,
|
|
|
4c8e44 |
+ tengine_stonith_notify);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ return TRUE;
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static gboolean
|
|
|
4c8e44 |
+do_stonith_history_sync(gpointer user_data)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ if (stonith_api && (stonith_api->state != stonith_disconnected)) {
|
|
|
4c8e44 |
+ stonith_history_t *history = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ stonith_api->cmds->history(stonith_api,
|
|
|
4c8e44 |
+ st_opt_sync_call | st_opt_broadcast,
|
|
|
4c8e44 |
+ NULL, &history, 5);
|
|
|
4c8e44 |
+ stonith_history_free(history);
|
|
|
4c8e44 |
+ return TRUE;
|
|
|
4c8e44 |
+ } else {
|
|
|
4c8e44 |
+ crm_info("Skip triggering stonith history-sync as stonith is disconnected");
|
|
|
4c8e44 |
+ return FALSE;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static void
|
|
|
4c8e44 |
+tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ char *uuid = NULL;
|
|
|
4c8e44 |
+ int stonith_id = -1;
|
|
|
4c8e44 |
+ int transition_id = -1;
|
|
|
4c8e44 |
+ crm_action_t *action = NULL;
|
|
|
4c8e44 |
+ int call_id = data->call_id;
|
|
|
4c8e44 |
+ int rc = data->rc;
|
|
|
4c8e44 |
+ char *userdata = data->userdata;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ CRM_CHECK(userdata != NULL, return);
|
|
|
4c8e44 |
+ crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
|
|
|
4c8e44 |
+ pcmk_strerror(rc), rc);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (AM_I_DC == FALSE) {
|
|
|
4c8e44 |
+ return;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
|
|
|
4c8e44 |
+ /* op->call_id, op->optype, op->node_name, op->op_result, */
|
|
|
4c8e44 |
+ /* (char *)op->node_list, op->private_data); */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* filter out old STONITH actions */
|
|
|
4c8e44 |
+ CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL),
|
|
|
4c8e44 |
+ goto bail);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid)
|
|
|
4c8e44 |
+ || transition_graph->id != transition_id) {
|
|
|
4c8e44 |
+ crm_info("Ignoring STONITH action initiated outside of the current transition");
|
|
|
4c8e44 |
+ goto bail;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ action = get_action(stonith_id, FALSE);
|
|
|
4c8e44 |
+ if (action == NULL) {
|
|
|
4c8e44 |
+ crm_err("Stonith action not matched");
|
|
|
4c8e44 |
+ goto bail;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ stop_te_timer(action->timer);
|
|
|
4c8e44 |
+ if (rc == pcmk_ok) {
|
|
|
4c8e44 |
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
|
|
|
4c8e44 |
+ const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
|
|
|
4c8e44 |
+ const char *op = crm_meta_value(action->params, "stonith_action");
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ crm_info("Stonith operation %d for %s passed", call_id, target);
|
|
|
4c8e44 |
+ if (action->confirmed == FALSE) {
|
|
|
4c8e44 |
+ te_action_confirmed(action);
|
|
|
4c8e44 |
+ if (safe_str_eq("on", op)) {
|
|
|
4c8e44 |
+ const char *value = NULL;
|
|
|
4c8e44 |
+ char *now = crm_itoa(time(NULL));
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
|
|
|
4c8e44 |
+ free(now);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
|
|
|
4c8e44 |
+ update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
|
|
|
4c8e44 |
+ update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ } else if (action->sent_update == FALSE) {
|
|
|
4c8e44 |
+ send_stonith_update(action, target, uuid);
|
|
|
4c8e44 |
+ action->sent_update = TRUE;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ st_fail_count_reset(target);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ } else {
|
|
|
4c8e44 |
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
|
|
|
4c8e44 |
+ enum transition_action abort_action = tg_restart;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ action->failed = TRUE;
|
|
|
4c8e44 |
+ crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
|
|
|
4c8e44 |
+ call_id, target, pcmk_strerror(rc));
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* If no fence devices were available, there's no use in immediately
|
|
|
4c8e44 |
+ * checking again, so don't start a new transition in that case.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+ if (rc == -ENODEV) {
|
|
|
4c8e44 |
+ crm_warn("No devices found in cluster to fence %s, giving up",
|
|
|
4c8e44 |
+ target);
|
|
|
4c8e44 |
+ abort_action = tg_stop;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Increment the fail count now, so abort_for_stonith_failure() can
|
|
|
4c8e44 |
+ * check it. Non-DC nodes will increment it in tengine_stonith_notify().
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+ st_fail_count_increment(target);
|
|
|
4c8e44 |
+ abort_for_stonith_failure(abort_action, target, NULL);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ update_graph(transition_graph, action);
|
|
|
4c8e44 |
+ trigger_graph();
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ bail:
|
|
|
4c8e44 |
+ free(userdata);
|
|
|
4c8e44 |
+ free(uuid);
|
|
|
4c8e44 |
+ return;
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+gboolean
|
|
|
4c8e44 |
+te_fence_node(crm_graph_t *graph, crm_action_t *action)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ int rc = 0;
|
|
|
4c8e44 |
+ const char *id = NULL;
|
|
|
4c8e44 |
+ const char *uuid = NULL;
|
|
|
4c8e44 |
+ const char *target = NULL;
|
|
|
4c8e44 |
+ const char *type = NULL;
|
|
|
4c8e44 |
+ gboolean invalid_action = FALSE;
|
|
|
4c8e44 |
+ enum stonith_call_options options = st_opt_none;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ id = ID(action->xml);
|
|
|
4c8e44 |
+ target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
|
|
|
4c8e44 |
+ uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
|
|
|
4c8e44 |
+ type = crm_meta_value(action->params, "stonith_action");
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ CRM_CHECK(id != NULL, invalid_action = TRUE);
|
|
|
4c8e44 |
+ CRM_CHECK(uuid != NULL, invalid_action = TRUE);
|
|
|
4c8e44 |
+ CRM_CHECK(type != NULL, invalid_action = TRUE);
|
|
|
4c8e44 |
+ CRM_CHECK(target != NULL, invalid_action = TRUE);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (invalid_action) {
|
|
|
4c8e44 |
+ crm_log_xml_warn(action->xml, "BadAction");
|
|
|
4c8e44 |
+ return FALSE;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ crm_notice("Requesting fencing (%s) of node %s "
|
|
|
4c8e44 |
+ CRM_XS " action=%s timeout=%d",
|
|
|
4c8e44 |
+ type, target, id, transition_graph->stonith_timeout);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* Passing NULL means block until we can connect... */
|
|
|
4c8e44 |
+ te_connect_stonith(NULL);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if (crmd_join_phase_count(crm_join_confirmed) == 1) {
|
|
|
4c8e44 |
+ options |= st_opt_allow_suicide;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ rc = stonith_api->cmds->fence(stonith_api, options, target, type,
|
|
|
4c8e44 |
+ transition_graph->stonith_timeout / 1000, 0);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000,
|
|
|
4c8e44 |
+ st_opt_timeout_updates,
|
|
|
4c8e44 |
+ generate_transition_key(transition_graph->id, action->id,
|
|
|
4c8e44 |
+ 0, te_uuid),
|
|
|
4c8e44 |
+ "tengine_stonith_callback", tengine_stonith_callback);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ return TRUE;
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/* end stonith API client functions */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/*
|
|
|
4c8e44 |
+ * stonith history synchronization
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * Each node's fencer keeps track of a cluster-wide fencing history. When a node
|
|
|
4c8e44 |
+ * joins or leaves, we need to synchronize the history across all nodes.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static crm_trigger_t *stonith_history_sync_trigger = NULL;
|
|
|
4c8e44 |
+static mainloop_timer_t *stonith_history_sync_timer = NULL;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+static gboolean
|
|
|
4c8e44 |
+stonith_history_sync_set_trigger(gpointer user_data)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ mainloop_set_trigger(stonith_history_sync_trigger);
|
|
|
4c8e44 |
+ return FALSE;
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+void
|
|
|
4c8e44 |
+te_trigger_stonith_history_sync(void)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ /* trigger a sync in 5s to give more nodes the
|
|
|
4c8e44 |
+ * chance to show up so that we don't create
|
|
|
4c8e44 |
+ * unnecessary stonith-history-sync traffic
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ /* as we are finally checking the stonith-connection
|
|
|
4c8e44 |
+ * in do_stonith_history_sync we should be fine
|
|
|
4c8e44 |
+ * leaving stonith_history_sync_time & stonith_history_sync_trigger
|
|
|
4c8e44 |
+ * around
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+ if (stonith_history_sync_trigger == NULL) {
|
|
|
4c8e44 |
+ stonith_history_sync_trigger =
|
|
|
4c8e44 |
+ mainloop_add_trigger(G_PRIORITY_LOW,
|
|
|
4c8e44 |
+ do_stonith_history_sync, NULL);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ if(stonith_history_sync_timer == NULL) {
|
|
|
4c8e44 |
+ stonith_history_sync_timer =
|
|
|
4c8e44 |
+ mainloop_timer_add("history_sync", 5000,
|
|
|
4c8e44 |
+ FALSE, stonith_history_sync_set_trigger,
|
|
|
4c8e44 |
+ NULL);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
|
|
|
4c8e44 |
+ mainloop_timer_start(stonith_history_sync_timer);
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+/* end stonith history synchronization functions */
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
|
|
|
4c8e44 |
new file mode 100644
|
|
|
4c8e44 |
index 0000000..b80a6c9
|
|
|
4c8e44 |
--- /dev/null
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_fencing.h
|
|
|
4c8e44 |
@@ -0,0 +1,37 @@
|
|
|
4c8e44 |
+/*
|
|
|
4c8e44 |
+ * Copyright 2004-2019 the Pacemaker project contributors
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * The version control history for this file may have further details.
|
|
|
4c8e44 |
+ *
|
|
|
4c8e44 |
+ * This source code is licensed under the GNU Lesser General Public License
|
|
|
4c8e44 |
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
|
|
|
4c8e44 |
+ */
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+#ifndef CONTROLD_FENCING__H
|
|
|
4c8e44 |
+# define CONTROLD_FENCING__H
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+#include <stdbool.h> // bool
|
|
|
4c8e44 |
+#include <crm/transition.h> // crm_graph_t, crm_action_t
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+extern crm_trigger_t *stonith_reconnect;
|
|
|
4c8e44 |
+extern char *te_client_id;
|
|
|
4c8e44 |
+extern stonith_t *stonith_api;
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+// stonith fail counts
|
|
|
4c8e44 |
+void st_fail_count_reset(const char * target);
|
|
|
4c8e44 |
+void update_stonith_max_attempts(const char* value);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+// stonith API client
|
|
|
4c8e44 |
+gboolean te_connect_stonith(gpointer user_data);
|
|
|
4c8e44 |
+gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+// stonith cleanup list
|
|
|
4c8e44 |
+void add_stonith_cleanup(const char *target);
|
|
|
4c8e44 |
+void remove_stonith_cleanup(const char *target);
|
|
|
4c8e44 |
+void purge_stonith_cleanup(void);
|
|
|
4c8e44 |
+void execute_stonith_cleanup(void);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+// stonith history synchronization
|
|
|
4c8e44 |
+void te_trigger_stonith_history_sync(void);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+#endif
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c
|
|
|
4c8e44 |
index 9eca530..dc1937f 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_fsa.c
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_fsa.c
|
|
|
4c8e44 |
@@ -26,6 +26,7 @@
|
|
|
4c8e44 |
#include <pacemaker-controld.h>
|
|
|
4c8e44 |
#include <controld_messages.h>
|
|
|
4c8e44 |
#include <controld_fsa.h>
|
|
|
4c8e44 |
+#include <controld_fencing.h>
|
|
|
4c8e44 |
#include <controld_transition.h>
|
|
|
4c8e44 |
#include <controld_matrix.h>
|
|
|
4c8e44 |
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
|
|
|
4c8e44 |
index 2ebc203..8f37cbf 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_messages.c
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_messages.c
|
|
|
4c8e44 |
@@ -25,6 +25,7 @@
|
|
|
4c8e44 |
#include <pacemaker-controld.h>
|
|
|
4c8e44 |
#include <controld_messages.h>
|
|
|
4c8e44 |
#include <controld_lrm.h>
|
|
|
4c8e44 |
+#include <controld_fencing.h>
|
|
|
4c8e44 |
#include <controld_transition.h>
|
|
|
4c8e44 |
#include <controld_throttle.h>
|
|
|
4c8e44 |
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
|
|
|
4c8e44 |
index c95c6c7..2f61556 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_te_actions.c
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_te_actions.c
|
|
|
4c8e44 |
@@ -1,5 +1,5 @@
|
|
|
4c8e44 |
/*
|
|
|
4c8e44 |
- * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
|
|
|
4c8e44 |
+ * Copyright 2004-2019 the Pacemaker project contributors
|
|
|
4c8e44 |
*
|
|
|
4c8e44 |
* This source code is licensed under the GNU General Public License version 2
|
|
|
4c8e44 |
* or later (GPLv2+) WITHOUT ANY WARRANTY.
|
|
|
4c8e44 |
@@ -17,6 +17,7 @@
|
|
|
4c8e44 |
|
|
|
4c8e44 |
#include <controld_fsa.h>
|
|
|
4c8e44 |
#include <controld_lrm.h>
|
|
|
4c8e44 |
+#include <controld_fencing.h>
|
|
|
4c8e44 |
#include <controld_messages.h>
|
|
|
4c8e44 |
#include <crm/cluster.h>
|
|
|
4c8e44 |
#include <controld_throttle.h>
|
|
|
4c8e44 |
@@ -76,124 +77,6 @@ te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
|
|
|
4c8e44 |
return TRUE;
|
|
|
4c8e44 |
}
|
|
|
4c8e44 |
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-send_stonith_update(crm_action_t * action, const char *target, const char *uuid)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- int rc = pcmk_ok;
|
|
|
4c8e44 |
- crm_node_t *peer = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* We (usually) rely on the membership layer to do node_update_cluster,
|
|
|
4c8e44 |
- * and the peer status callback to do node_update_peer, because the node
|
|
|
4c8e44 |
- * might have already rejoined before we get the stonith result here.
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
- int flags = node_update_join | node_update_expected;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* zero out the node-status & remove all LRM status info */
|
|
|
4c8e44 |
- xmlNode *node_state = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- CRM_CHECK(target != NULL, return);
|
|
|
4c8e44 |
- CRM_CHECK(uuid != NULL, return);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Make sure the membership and join caches are accurate */
|
|
|
4c8e44 |
- peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- CRM_CHECK(peer != NULL, return);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (peer->state == NULL) {
|
|
|
4c8e44 |
- /* Usually, we rely on the membership layer to update the cluster state
|
|
|
4c8e44 |
- * in the CIB. However, if the node has never been seen, do it here, so
|
|
|
4c8e44 |
- * the node is not considered unclean.
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
- flags |= node_update_cluster;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (peer->uuid == NULL) {
|
|
|
4c8e44 |
- crm_info("Recording uuid '%s' for node '%s'", uuid, target);
|
|
|
4c8e44 |
- peer->uuid = strdup(uuid);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- crmd_peer_down(peer, TRUE);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Generate a node state update for the CIB */
|
|
|
4c8e44 |
- node_state = create_node_state_update(peer, flags, NULL, __FUNCTION__);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* we have to mark whether or not remote nodes have already been fenced */
|
|
|
4c8e44 |
- if (peer->flags & crm_remote_node) {
|
|
|
4c8e44 |
- time_t now = time(NULL);
|
|
|
4c8e44 |
- char *now_s = crm_itoa(now);
|
|
|
4c8e44 |
- crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
|
|
|
4c8e44 |
- free(now_s);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Force our known ID */
|
|
|
4c8e44 |
- crm_xml_add(node_state, XML_ATTR_UUID, uuid);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
|
|
|
4c8e44 |
- cib_quorum_override | cib_scope_local | cib_can_create);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Delay processing the trigger until the update completes */
|
|
|
4c8e44 |
- crm_debug("Sending fencing update %d for %s", rc, target);
|
|
|
4c8e44 |
- fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Make sure it sticks */
|
|
|
4c8e44 |
- /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local);
|
|
|
4c8e44 |
- erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- free_xml(node_state);
|
|
|
4c8e44 |
- return;
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-static gboolean
|
|
|
4c8e44 |
-te_fence_node(crm_graph_t * graph, crm_action_t * action)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- int rc = 0;
|
|
|
4c8e44 |
- const char *id = NULL;
|
|
|
4c8e44 |
- const char *uuid = NULL;
|
|
|
4c8e44 |
- const char *target = NULL;
|
|
|
4c8e44 |
- const char *type = NULL;
|
|
|
4c8e44 |
- gboolean invalid_action = FALSE;
|
|
|
4c8e44 |
- enum stonith_call_options options = st_opt_none;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- id = ID(action->xml);
|
|
|
4c8e44 |
- target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
|
|
|
4c8e44 |
- uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
|
|
|
4c8e44 |
- type = crm_meta_value(action->params, "stonith_action");
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- CRM_CHECK(id != NULL, invalid_action = TRUE);
|
|
|
4c8e44 |
- CRM_CHECK(uuid != NULL, invalid_action = TRUE);
|
|
|
4c8e44 |
- CRM_CHECK(type != NULL, invalid_action = TRUE);
|
|
|
4c8e44 |
- CRM_CHECK(target != NULL, invalid_action = TRUE);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (invalid_action) {
|
|
|
4c8e44 |
- crm_log_xml_warn(action->xml, "BadAction");
|
|
|
4c8e44 |
- return FALSE;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- crm_notice("Requesting fencing (%s) of node %s "
|
|
|
4c8e44 |
- CRM_XS " action=%s timeout=%d",
|
|
|
4c8e44 |
- type, target, id, transition_graph->stonith_timeout);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Passing NULL means block until we can connect... */
|
|
|
4c8e44 |
- te_connect_stonith(NULL);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (crmd_join_phase_count(crm_join_confirmed) == 1) {
|
|
|
4c8e44 |
- options |= st_opt_allow_suicide;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- rc = stonith_api->cmds->fence(stonith_api, options, target, type,
|
|
|
4c8e44 |
- transition_graph->stonith_timeout / 1000, 0);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000,
|
|
|
4c8e44 |
- st_opt_timeout_updates,
|
|
|
4c8e44 |
- generate_transition_key(transition_graph->id, action->id,
|
|
|
4c8e44 |
- 0, te_uuid),
|
|
|
4c8e44 |
- "tengine_stonith_callback", tengine_stonith_callback);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- return TRUE;
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
static int
|
|
|
4c8e44 |
get_target_rc(crm_action_t * action)
|
|
|
4c8e44 |
{
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c
|
|
|
4c8e44 |
index 22b5f4b..1ab703f 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_te_callbacks.c
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_te_callbacks.c
|
|
|
4c8e44 |
@@ -17,6 +17,7 @@
|
|
|
4c8e44 |
|
|
|
4c8e44 |
#include <controld_transition.h>
|
|
|
4c8e44 |
#include <controld_fsa.h>
|
|
|
4c8e44 |
+#include <controld_fencing.h>
|
|
|
4c8e44 |
|
|
|
4c8e44 |
#include <crm/cluster.h> /* For ONLINESTATUS etc */
|
|
|
4c8e44 |
|
|
|
4c8e44 |
@@ -27,21 +28,9 @@ gboolean shuttingdown = FALSE;
|
|
|
4c8e44 |
crm_graph_t *transition_graph;
|
|
|
4c8e44 |
crm_trigger_t *transition_trigger = NULL;
|
|
|
4c8e44 |
|
|
|
4c8e44 |
-static unsigned long int stonith_max_attempts = 10;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
/* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
|
|
|
4c8e44 |
#define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"
|
|
|
4c8e44 |
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-update_stonith_max_attempts(const char* value)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- if (safe_str_eq(value, CRM_INFINITY_S)) {
|
|
|
4c8e44 |
- stonith_max_attempts = CRM_SCORE_INFINITY;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- else {
|
|
|
4c8e44 |
- stonith_max_attempts = crm_int_helper(value, NULL);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
static void
|
|
|
4c8e44 |
te_update_diff_v1(const char *event, xmlNode *diff)
|
|
|
4c8e44 |
{
|
|
|
4c8e44 |
@@ -646,236 +635,6 @@ process_te_message(xmlNode * msg, xmlNode * xml_data)
|
|
|
4c8e44 |
return TRUE;
|
|
|
4c8e44 |
}
|
|
|
4c8e44 |
|
|
|
4c8e44 |
-GHashTable *stonith_failures = NULL;
|
|
|
4c8e44 |
-struct st_fail_rec {
|
|
|
4c8e44 |
- int count;
|
|
|
4c8e44 |
-};
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-static gboolean
|
|
|
4c8e44 |
-too_many_st_failures(const char *target)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- GHashTableIter iter;
|
|
|
4c8e44 |
- const char *key = NULL;
|
|
|
4c8e44 |
- struct st_fail_rec *value = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (stonith_failures == NULL) {
|
|
|
4c8e44 |
- return FALSE;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (target == NULL) {
|
|
|
4c8e44 |
- g_hash_table_iter_init(&iter, stonith_failures);
|
|
|
4c8e44 |
- while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
|
|
|
4c8e44 |
- if (value->count >= stonith_max_attempts) {
|
|
|
4c8e44 |
- target = (const char*)key;
|
|
|
4c8e44 |
- goto too_many;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- } else {
|
|
|
4c8e44 |
- value = g_hash_table_lookup(stonith_failures, target);
|
|
|
4c8e44 |
- if ((value != NULL) && (value->count >= stonith_max_attempts)) {
|
|
|
4c8e44 |
- goto too_many;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- return FALSE;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-too_many:
|
|
|
4c8e44 |
- crm_warn("Too many failures (%d) to fence %s, giving up",
|
|
|
4c8e44 |
- value->count, target);
|
|
|
4c8e44 |
- return TRUE;
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-/*!
|
|
|
4c8e44 |
- * \internal
|
|
|
4c8e44 |
- * \brief Reset a stonith fail count
|
|
|
4c8e44 |
- *
|
|
|
4c8e44 |
- * \param[in] target Name of node to reset, or NULL for all
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-st_fail_count_reset(const char *target)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- if (stonith_failures == NULL) {
|
|
|
4c8e44 |
- return;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (target) {
|
|
|
4c8e44 |
- struct st_fail_rec *rec = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- rec = g_hash_table_lookup(stonith_failures, target);
|
|
|
4c8e44 |
- if (rec) {
|
|
|
4c8e44 |
- rec->count = 0;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- } else {
|
|
|
4c8e44 |
- GHashTableIter iter;
|
|
|
4c8e44 |
- const char *key = NULL;
|
|
|
4c8e44 |
- struct st_fail_rec *rec = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- g_hash_table_iter_init(&iter, stonith_failures);
|
|
|
4c8e44 |
- while (g_hash_table_iter_next(&iter, (gpointer *) &key,
|
|
|
4c8e44 |
- (gpointer *) &rec)) {
|
|
|
4c8e44 |
- rec->count = 0;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-st_fail_count_increment(const char *target)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- struct st_fail_rec *rec = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (stonith_failures == NULL) {
|
|
|
4c8e44 |
- stonith_failures = crm_str_table_new();
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- rec = g_hash_table_lookup(stonith_failures, target);
|
|
|
4c8e44 |
- if (rec) {
|
|
|
4c8e44 |
- rec->count++;
|
|
|
4c8e44 |
- } else {
|
|
|
4c8e44 |
- rec = malloc(sizeof(struct st_fail_rec));
|
|
|
4c8e44 |
- if(rec == NULL) {
|
|
|
4c8e44 |
- return;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- rec->count = 1;
|
|
|
4c8e44 |
- g_hash_table_insert(stonith_failures, strdup(target), rec);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-/*!
|
|
|
4c8e44 |
- * \internal
|
|
|
4c8e44 |
- * \brief Abort transition due to stonith failure
|
|
|
4c8e44 |
- *
|
|
|
4c8e44 |
- * \param[in] abort_action Whether to restart or stop transition
|
|
|
4c8e44 |
- * \param[in] target Don't restart if this (NULL for any) has too many failures
|
|
|
4c8e44 |
- * \param[in] reason Log this stonith action XML as abort reason (or NULL)
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-abort_for_stonith_failure(enum transition_action abort_action,
|
|
|
4c8e44 |
- const char *target, xmlNode *reason)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- /* If stonith repeatedly fails, we eventually give up on starting a new
|
|
|
4c8e44 |
- * transition for that reason.
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
- if ((abort_action != tg_stop) && too_many_st_failures(target)) {
|
|
|
4c8e44 |
- abort_action = tg_stop;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- abort_transition(INFINITY, abort_action, "Stonith failed", reason);
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- char *uuid = NULL;
|
|
|
4c8e44 |
- int stonith_id = -1;
|
|
|
4c8e44 |
- int transition_id = -1;
|
|
|
4c8e44 |
- crm_action_t *action = NULL;
|
|
|
4c8e44 |
- int call_id = data->call_id;
|
|
|
4c8e44 |
- int rc = data->rc;
|
|
|
4c8e44 |
- char *userdata = data->userdata;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- CRM_CHECK(userdata != NULL, return);
|
|
|
4c8e44 |
- crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
|
|
|
4c8e44 |
- pcmk_strerror(rc), rc);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (AM_I_DC == FALSE) {
|
|
|
4c8e44 |
- return;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
|
|
|
4c8e44 |
- /* op->call_id, op->optype, op->node_name, op->op_result, */
|
|
|
4c8e44 |
- /* (char *)op->node_list, op->private_data); */
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* filter out old STONITH actions */
|
|
|
4c8e44 |
- CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL),
|
|
|
4c8e44 |
- goto bail);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid)
|
|
|
4c8e44 |
- || transition_graph->id != transition_id) {
|
|
|
4c8e44 |
- crm_info("Ignoring STONITH action initiated outside of the current transition");
|
|
|
4c8e44 |
- goto bail;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- action = get_action(stonith_id, FALSE);
|
|
|
4c8e44 |
- if (action == NULL) {
|
|
|
4c8e44 |
- crm_err("Stonith action not matched");
|
|
|
4c8e44 |
- goto bail;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- stop_te_timer(action->timer);
|
|
|
4c8e44 |
- if (rc == pcmk_ok) {
|
|
|
4c8e44 |
- const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
|
|
|
4c8e44 |
- const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
|
|
|
4c8e44 |
- const char *op = crm_meta_value(action->params, "stonith_action");
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- crm_info("Stonith operation %d for %s passed", call_id, target);
|
|
|
4c8e44 |
- if (action->confirmed == FALSE) {
|
|
|
4c8e44 |
- te_action_confirmed(action);
|
|
|
4c8e44 |
- if (safe_str_eq("on", op)) {
|
|
|
4c8e44 |
- const char *value = NULL;
|
|
|
4c8e44 |
- char *now = crm_itoa(time(NULL));
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
|
|
|
4c8e44 |
- free(now);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
|
|
|
4c8e44 |
- update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
|
|
|
4c8e44 |
- update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- } else if (action->sent_update == FALSE) {
|
|
|
4c8e44 |
- send_stonith_update(action, target, uuid);
|
|
|
4c8e44 |
- action->sent_update = TRUE;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- st_fail_count_reset(target);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- } else {
|
|
|
4c8e44 |
- const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
|
|
|
4c8e44 |
- enum transition_action abort_action = tg_restart;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- action->failed = TRUE;
|
|
|
4c8e44 |
- crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
|
|
|
4c8e44 |
- call_id, target, pcmk_strerror(rc));
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* If no fence devices were available, there's no use in immediately
|
|
|
4c8e44 |
- * checking again, so don't start a new transition in that case.
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
- if (rc == -ENODEV) {
|
|
|
4c8e44 |
- crm_warn("No devices found in cluster to fence %s, giving up",
|
|
|
4c8e44 |
- target);
|
|
|
4c8e44 |
- abort_action = tg_stop;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Increment the fail count now, so abort_for_stonith_failure() can
|
|
|
4c8e44 |
- * check it. Non-DC nodes will increment it in tengine_stonith_notify().
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
- st_fail_count_increment(target);
|
|
|
4c8e44 |
- abort_for_stonith_failure(abort_action, target, NULL);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- update_graph(transition_graph, action);
|
|
|
4c8e44 |
- trigger_graph();
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- bail:
|
|
|
4c8e44 |
- free(userdata);
|
|
|
4c8e44 |
- free(uuid);
|
|
|
4c8e44 |
- return;
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-cib_fencing_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- if (rc < pcmk_ok) {
|
|
|
4c8e44 |
- crm_err("Fencing update %d for %s: failed - %s (%d)",
|
|
|
4c8e44 |
- call_id, (char *)user_data, pcmk_strerror(rc), rc);
|
|
|
4c8e44 |
- crm_log_xml_warn(msg, "Failed update");
|
|
|
4c8e44 |
- abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- } else {
|
|
|
4c8e44 |
- crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
void
|
|
|
4c8e44 |
cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
|
|
|
4c8e44 |
{
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c
|
|
|
4c8e44 |
index 22f83ad..1496244 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_te_utils.c
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_te_utils.c
|
|
|
4c8e44 |
@@ -6,441 +6,14 @@
|
|
|
4c8e44 |
*/
|
|
|
4c8e44 |
|
|
|
4c8e44 |
#include <crm_internal.h>
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-#include <sys/param.h>
|
|
|
4c8e44 |
#include <crm/crm.h>
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
#include <crm/msg_xml.h>
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
#include <crm/common/xml.h>
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
#include <controld_transition.h>
|
|
|
4c8e44 |
#include <controld_fsa.h>
|
|
|
4c8e44 |
-#include <controld_lrm.h>
|
|
|
4c8e44 |
#include <controld_messages.h>
|
|
|
4c8e44 |
#include <controld_throttle.h>
|
|
|
4c8e44 |
-#include <crm/fencing/internal.h>
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-crm_trigger_t *stonith_reconnect = NULL;
|
|
|
4c8e44 |
-static crm_trigger_t *stonith_history_sync_trigger = NULL;
|
|
|
4c8e44 |
-static mainloop_timer_t *stonith_history_sync_timer = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-/*
|
|
|
4c8e44 |
- * stonith cleanup list
|
|
|
4c8e44 |
- *
|
|
|
4c8e44 |
- * If the DC is shot, proper notifications might not go out.
|
|
|
4c8e44 |
- * The stonith cleanup list allows the cluster to (re-)send
|
|
|
4c8e44 |
- * notifications once a new DC is elected.
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-static GListPtr stonith_cleanup_list = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-/*!
|
|
|
4c8e44 |
- * \internal
|
|
|
4c8e44 |
- * \brief Add a node to the stonith cleanup list
|
|
|
4c8e44 |
- *
|
|
|
4c8e44 |
- * \param[in] target Name of node to add
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-add_stonith_cleanup(const char *target) {
|
|
|
4c8e44 |
- stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-/*!
|
|
|
4c8e44 |
- * \internal
|
|
|
4c8e44 |
- * \brief Remove a node from the stonith cleanup list
|
|
|
4c8e44 |
- *
|
|
|
4c8e44 |
- * \param[in] Name of node to remove
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-remove_stonith_cleanup(const char *target)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- GListPtr iter = stonith_cleanup_list;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- while (iter != NULL) {
|
|
|
4c8e44 |
- GListPtr tmp = iter;
|
|
|
4c8e44 |
- char *iter_name = tmp->data;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- iter = iter->next;
|
|
|
4c8e44 |
- if (safe_str_eq(target, iter_name)) {
|
|
|
4c8e44 |
- crm_trace("Removing %s from the cleanup list", iter_name);
|
|
|
4c8e44 |
- stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
|
|
|
4c8e44 |
- free(iter_name);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-/*!
|
|
|
4c8e44 |
- * \internal
|
|
|
4c8e44 |
- * \brief Purge all entries from the stonith cleanup list
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-purge_stonith_cleanup()
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- if (stonith_cleanup_list) {
|
|
|
4c8e44 |
- GListPtr iter = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
|
|
|
4c8e44 |
- char *target = iter->data;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- crm_info("Purging %s from stonith cleanup list", target);
|
|
|
4c8e44 |
- free(target);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- g_list_free(stonith_cleanup_list);
|
|
|
4c8e44 |
- stonith_cleanup_list = NULL;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-/*!
|
|
|
4c8e44 |
- * \internal
|
|
|
4c8e44 |
- * \brief Send stonith updates for all entries in cleanup list, then purge it
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-execute_stonith_cleanup()
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- GListPtr iter;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
|
|
|
4c8e44 |
- char *target = iter->data;
|
|
|
4c8e44 |
- crm_node_t *target_node = crm_get_peer(0, target);
|
|
|
4c8e44 |
- const char *uuid = crm_peer_uuid(target_node);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- crm_notice("Marking %s, target of a previous stonith action, as clean", target);
|
|
|
4c8e44 |
- send_stonith_update(NULL, target, uuid);
|
|
|
4c8e44 |
- free(target);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- g_list_free(stonith_cleanup_list);
|
|
|
4c8e44 |
- stonith_cleanup_list = NULL;
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-/* end stonith cleanup list functions */
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-static gboolean
|
|
|
4c8e44 |
-fail_incompletable_stonith(crm_graph_t * graph)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- GListPtr lpc = NULL;
|
|
|
4c8e44 |
- const char *task = NULL;
|
|
|
4c8e44 |
- xmlNode *last_action = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (graph == NULL) {
|
|
|
4c8e44 |
- return FALSE;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
|
|
|
4c8e44 |
- GListPtr lpc2 = NULL;
|
|
|
4c8e44 |
- synapse_t *synapse = (synapse_t *) lpc->data;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (synapse->confirmed) {
|
|
|
4c8e44 |
- continue;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
|
|
|
4c8e44 |
- crm_action_t *action = (crm_action_t *) lpc2->data;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (action->type != action_type_crm || action->confirmed) {
|
|
|
4c8e44 |
- continue;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
|
|
|
4c8e44 |
- if (task && safe_str_eq(task, CRM_OP_FENCE)) {
|
|
|
4c8e44 |
- action->failed = TRUE;
|
|
|
4c8e44 |
- last_action = action->xml;
|
|
|
4c8e44 |
- update_graph(graph, action);
|
|
|
4c8e44 |
- crm_notice("Failing action %d (%s): fencer terminated",
|
|
|
4c8e44 |
- action->id, ID(action->xml));
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (last_action != NULL) {
|
|
|
4c8e44 |
- crm_warn("Fencer failure resulted in unrunnable actions");
|
|
|
4c8e44 |
- abort_for_stonith_failure(tg_restart, NULL, last_action);
|
|
|
4c8e44 |
- return TRUE;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- return FALSE;
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-static void
|
|
|
4c8e44 |
-tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- if (is_set(fsa_input_register, R_ST_REQUIRED)) {
|
|
|
4c8e44 |
- crm_crit("Fencing daemon connection failed");
|
|
|
4c8e44 |
- mainloop_set_trigger(stonith_reconnect);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- } else {
|
|
|
4c8e44 |
- crm_info("Fencing daemon disconnected");
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* cbchan will be garbage at this point, arrange for it to be reset */
|
|
|
4c8e44 |
- if(stonith_api) {
|
|
|
4c8e44 |
- stonith_api->state = stonith_disconnected;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (AM_I_DC) {
|
|
|
4c8e44 |
- fail_incompletable_stonith(transition_graph);
|
|
|
4c8e44 |
- trigger_graph();
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-char *te_client_id = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-#ifdef HAVE_SYS_REBOOT_H
|
|
|
4c8e44 |
-# include <unistd.h>
|
|
|
4c8e44 |
-# include <sys/reboot.h>
|
|
|
4c8e44 |
-#endif
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-static void
|
|
|
4c8e44 |
-tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- if(te_client_id == NULL) {
|
|
|
4c8e44 |
- te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
|
|
|
4c8e44 |
- (unsigned long) getpid());
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (st_event == NULL) {
|
|
|
4c8e44 |
- crm_err("Notify data not found");
|
|
|
4c8e44 |
- return;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- crmd_alert_fencing_op(st_event);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
|
|
|
4c8e44 |
- crm_notice("%s was successfully unfenced by %s (at the request of %s)",
|
|
|
4c8e44 |
- st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
|
|
|
4c8e44 |
- /* TODO: Hook up st_event->device */
|
|
|
4c8e44 |
- return;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- } else if (safe_str_eq("on", st_event->action)) {
|
|
|
4c8e44 |
- crm_err("Unfencing of %s by %s failed: %s (%d)",
|
|
|
4c8e44 |
- st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
|
|
|
4c8e44 |
- pcmk_strerror(st_event->result), st_event->result);
|
|
|
4c8e44 |
- return;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
|
|
|
4c8e44 |
- crm_crit("We were allegedly just fenced by %s for %s!",
|
|
|
4c8e44 |
- st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- qb_log_fini(); /* Try to get the above log message to disk - somehow */
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Get out ASAP and do not come back up.
|
|
|
4c8e44 |
- *
|
|
|
4c8e44 |
- * Triggering a reboot is also not the worst idea either since
|
|
|
4c8e44 |
- * the rest of the cluster thinks we're safely down
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-#ifdef RB_HALT_SYSTEM
|
|
|
4c8e44 |
- reboot(RB_HALT_SYSTEM);
|
|
|
4c8e44 |
-#endif
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /*
|
|
|
4c8e44 |
- * If reboot() fails or is not supported, coming back up will
|
|
|
4c8e44 |
- * probably lead to a situation where the other nodes set our
|
|
|
4c8e44 |
- * status to 'lost' because of the fencing callback and will
|
|
|
4c8e44 |
- * discard subsequent election votes with:
|
|
|
4c8e44 |
- *
|
|
|
4c8e44 |
- * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
|
|
|
4c8e44 |
- *
|
|
|
4c8e44 |
- * So just stay dead, something is seriously messed up anyway.
|
|
|
4c8e44 |
- *
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
- exit(CRM_EX_FATAL); // None of our wrappers since we already called qb_log_fini()
|
|
|
4c8e44 |
- return;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Update the count of stonith failures for this target, in case we become
|
|
|
4c8e44 |
- * DC later. The current DC has already updated its fail count in
|
|
|
4c8e44 |
- * tengine_stonith_callback().
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
- if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
|
|
|
4c8e44 |
- if (st_event->result == pcmk_ok) {
|
|
|
4c8e44 |
- st_fail_count_reset(st_event->target);
|
|
|
4c8e44 |
- } else {
|
|
|
4c8e44 |
- st_fail_count_increment(st_event->target);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
|
|
|
4c8e44 |
- CRM_XS " initiator=%s ref=%s",
|
|
|
4c8e44 |
- st_event->target, st_event->result == pcmk_ok ? "" : " not",
|
|
|
4c8e44 |
- st_event->action,
|
|
|
4c8e44 |
- st_event->executioner ? st_event->executioner : "<anyone>",
|
|
|
4c8e44 |
- (st_event->client_origin? st_event->client_origin : "<unknown>"),
|
|
|
4c8e44 |
- pcmk_strerror(st_event->result),
|
|
|
4c8e44 |
- st_event->origin, st_event->id);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (st_event->result == pcmk_ok) {
|
|
|
4c8e44 |
- crm_node_t *peer = crm_find_known_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
|
|
|
4c8e44 |
- const char *uuid = NULL;
|
|
|
4c8e44 |
- gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (peer == NULL) {
|
|
|
4c8e44 |
- return;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- uuid = crm_peer_uuid(peer);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
|
|
|
4c8e44 |
- if(AM_I_DC) {
|
|
|
4c8e44 |
- /* The DC always sends updates */
|
|
|
4c8e44 |
- send_stonith_update(NULL, st_event->target, uuid);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* @TODO Ideally, at this point, we'd check whether the fenced node
|
|
|
4c8e44 |
- * hosted any guest nodes, and call remote_node_down() for them.
|
|
|
4c8e44 |
- * Unfortunately, the controller doesn't have a simple, reliable way
|
|
|
4c8e44 |
- * to map hosts to guests. It might be possible to track this in the
|
|
|
4c8e44 |
- * peer cache via crm_remote_peer_cache_refresh(). For now, we rely
|
|
|
4c8e44 |
- * on the PE creating fence pseudo-events for the guests.
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Abort the current transition graph if it wasn't us
|
|
|
4c8e44 |
- * that invoked stonith to fence someone
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
- crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
|
|
|
4c8e44 |
- abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Assume it was our leader if we don't currently have one */
|
|
|
4c8e44 |
- } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
|
|
|
4c8e44 |
- && !is_set(peer->flags, crm_remote_node)) {
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- crm_notice("Target %s our leader %s (recorded: %s)",
|
|
|
4c8e44 |
- fsa_our_dc ? "was" : "may have been", st_event->target,
|
|
|
4c8e44 |
- fsa_our_dc ? fsa_our_dc : "<unset>");
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* Given the CIB resyncing that occurs around elections,
|
|
|
4c8e44 |
- * have one node update the CIB now and, if the new DC is different,
|
|
|
4c8e44 |
- * have them do so too after the election
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
- if (we_are_executioner) {
|
|
|
4c8e44 |
- send_stonith_update(NULL, st_event->target, uuid);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- add_stonith_cleanup(st_event->target);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* If the target is a remote node, and we host its connection,
|
|
|
4c8e44 |
- * immediately fail all monitors so it can be recovered quickly.
|
|
|
4c8e44 |
- * The connection won't necessarily drop when a remote node is fenced,
|
|
|
4c8e44 |
- * so the failure might not otherwise be detected until the next poke.
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
- if (is_set(peer->flags, crm_remote_node)) {
|
|
|
4c8e44 |
- remote_ra_fail(st_event->target);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- crmd_peer_down(peer, TRUE);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-static gboolean
|
|
|
4c8e44 |
-do_stonith_history_sync(gpointer user_data)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- if (stonith_api && (stonith_api->state != stonith_disconnected)) {
|
|
|
4c8e44 |
- stonith_history_t *history = NULL;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- stonith_api->cmds->history(stonith_api,
|
|
|
4c8e44 |
- st_opt_sync_call | st_opt_broadcast,
|
|
|
4c8e44 |
- NULL, &history, 5);
|
|
|
4c8e44 |
- stonith_history_free(history);
|
|
|
4c8e44 |
- return TRUE;
|
|
|
4c8e44 |
- } else {
|
|
|
4c8e44 |
- crm_info("Skip triggering stonith history-sync as stonith is disconnected");
|
|
|
4c8e44 |
- return FALSE;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-static gboolean
|
|
|
4c8e44 |
-stonith_history_sync_set_trigger(gpointer user_data)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- mainloop_set_trigger(stonith_history_sync_trigger);
|
|
|
4c8e44 |
- return FALSE;
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-void
|
|
|
4c8e44 |
-te_trigger_stonith_history_sync(void)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- /* trigger a sync in 5s to give more nodes the
|
|
|
4c8e44 |
- * chance to show up so that we don't create
|
|
|
4c8e44 |
- * unnecessary stonith-history-sync traffic
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- /* as we are finally checking the stonith-connection
|
|
|
4c8e44 |
- * in do_stonith_history_sync we should be fine
|
|
|
4c8e44 |
- * leaving stonith_history_sync_time & stonith_history_sync_trigger
|
|
|
4c8e44 |
- * around
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
- if (stonith_history_sync_trigger == NULL) {
|
|
|
4c8e44 |
- stonith_history_sync_trigger =
|
|
|
4c8e44 |
- mainloop_add_trigger(G_PRIORITY_LOW,
|
|
|
4c8e44 |
- do_stonith_history_sync, NULL);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if(stonith_history_sync_timer == NULL) {
|
|
|
4c8e44 |
- stonith_history_sync_timer =
|
|
|
4c8e44 |
- mainloop_timer_add("history_sync", 5000,
|
|
|
4c8e44 |
- FALSE, stonith_history_sync_set_trigger,
|
|
|
4c8e44 |
- NULL);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
|
|
|
4c8e44 |
- mainloop_timer_start(stonith_history_sync_timer);
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-/*!
|
|
|
4c8e44 |
- * \brief Connect to fencer
|
|
|
4c8e44 |
- *
|
|
|
4c8e44 |
- * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
|
|
|
4c8e44 |
- *
|
|
|
4c8e44 |
- * \return TRUE
|
|
|
4c8e44 |
- * \note If user_data is NULL, this will wait 2s between attempts, for up to
|
|
|
4c8e44 |
- * 30 attempts, meaning the controller could be blocked as long as 58s.
|
|
|
4c8e44 |
- */
|
|
|
4c8e44 |
-gboolean
|
|
|
4c8e44 |
-te_connect_stonith(gpointer user_data)
|
|
|
4c8e44 |
-{
|
|
|
4c8e44 |
- int rc = pcmk_ok;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (stonith_api == NULL) {
|
|
|
4c8e44 |
- stonith_api = stonith_api_new();
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (stonith_api->state != stonith_disconnected) {
|
|
|
4c8e44 |
- crm_trace("Already connected to fencer, no need to retry");
|
|
|
4c8e44 |
- return TRUE;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (user_data == NULL) {
|
|
|
4c8e44 |
- // Blocking (retry failures now until successful)
|
|
|
4c8e44 |
- rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
|
|
|
4c8e44 |
- if (rc != pcmk_ok) {
|
|
|
4c8e44 |
- crm_err("Could not connect to fencer in 30 attempts: %s "
|
|
|
4c8e44 |
- CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- } else {
|
|
|
4c8e44 |
- // Non-blocking (retry failures later in main loop)
|
|
|
4c8e44 |
- rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
|
|
|
4c8e44 |
- if (rc != pcmk_ok) {
|
|
|
4c8e44 |
- if (is_set(fsa_input_register, R_ST_REQUIRED)) {
|
|
|
4c8e44 |
- crm_err("Fencer connection failed (will retry): %s "
|
|
|
4c8e44 |
- CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
|
4c8e44 |
- mainloop_set_trigger(stonith_reconnect);
|
|
|
4c8e44 |
- } else {
|
|
|
4c8e44 |
- crm_info("Fencer connection failed (ignoring because no longer required): %s "
|
|
|
4c8e44 |
- CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- return TRUE;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (rc == pcmk_ok) {
|
|
|
4c8e44 |
- stonith_api->cmds->register_notification(stonith_api,
|
|
|
4c8e44 |
- T_STONITH_NOTIFY_DISCONNECT,
|
|
|
4c8e44 |
- tengine_stonith_connection_destroy);
|
|
|
4c8e44 |
- stonith_api->cmds->register_notification(stonith_api,
|
|
|
4c8e44 |
- T_STONITH_NOTIFY_FENCE,
|
|
|
4c8e44 |
- tengine_stonith_notify);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- return TRUE;
|
|
|
4c8e44 |
-}
|
|
|
4c8e44 |
|
|
|
4c8e44 |
gboolean
|
|
|
4c8e44 |
stop_te_timer(crm_action_timer_t * timer)
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_transition.c b/daemons/controld/controld_transition.c
|
|
|
4c8e44 |
index 5f164ab..b942ab4 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_transition.c
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_transition.c
|
|
|
4c8e44 |
@@ -18,7 +18,6 @@
|
|
|
4c8e44 |
|
|
|
4c8e44 |
|
|
|
4c8e44 |
extern crm_graph_functions_t te_graph_fns;
|
|
|
4c8e44 |
-stonith_t *stonith_api = NULL;
|
|
|
4c8e44 |
|
|
|
4c8e44 |
static void
|
|
|
4c8e44 |
global_cib_callback(const xmlNode * msg, int callid, int rc, xmlNode * output)
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h
|
|
|
4c8e44 |
index a162f99..f31ac2d 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_transition.h
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_transition.h
|
|
|
4c8e44 |
@@ -1,5 +1,5 @@
|
|
|
4c8e44 |
/*
|
|
|
4c8e44 |
- * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
|
|
|
4c8e44 |
+ * Copyright 2004-2019 the Pacemaker project contributors
|
|
|
4c8e44 |
*
|
|
|
4c8e44 |
* This source code is licensed under the GNU Lesser General Public License
|
|
|
4c8e44 |
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
|
|
|
4c8e44 |
@@ -12,15 +12,6 @@
|
|
|
4c8e44 |
# include <crm/common/mainloop.h>
|
|
|
4c8e44 |
# include <crm/stonith-ng.h>
|
|
|
4c8e44 |
# include <crm/services.h>
|
|
|
4c8e44 |
-extern stonith_t *stonith_api;
|
|
|
4c8e44 |
-extern void send_stonith_update(crm_action_t * stonith_action, const char *target,
|
|
|
4c8e44 |
- const char *uuid);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-/* stonith cleanup list */
|
|
|
4c8e44 |
-void add_stonith_cleanup(const char *target);
|
|
|
4c8e44 |
-void remove_stonith_cleanup(const char *target);
|
|
|
4c8e44 |
-void purge_stonith_cleanup(void);
|
|
|
4c8e44 |
-void execute_stonith_cleanup(void);
|
|
|
4c8e44 |
|
|
|
4c8e44 |
/* tengine */
|
|
|
4c8e44 |
extern crm_action_t *match_down_event(const char *target);
|
|
|
4c8e44 |
@@ -46,16 +37,11 @@ extern char *te_uuid;
|
|
|
4c8e44 |
|
|
|
4c8e44 |
extern void notify_crmd(crm_graph_t * graph);
|
|
|
4c8e44 |
|
|
|
4c8e44 |
-void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
|
|
|
4c8e44 |
- void *user_data);
|
|
|
4c8e44 |
void cib_action_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
|
|
|
4c8e44 |
void *user_data);
|
|
|
4c8e44 |
gboolean action_timer_callback(gpointer data);
|
|
|
4c8e44 |
gboolean te_graph_trigger(gpointer user_data);
|
|
|
4c8e44 |
void te_update_diff(const char *event, xmlNode *msg);
|
|
|
4c8e44 |
-void tengine_stonith_callback(stonith_t *stonith,
|
|
|
4c8e44 |
- stonith_callback_data_t *data);
|
|
|
4c8e44 |
-void update_stonith_max_attempts(const char* value);
|
|
|
4c8e44 |
|
|
|
4c8e44 |
extern void trigger_graph_processing(const char *fn, int line);
|
|
|
4c8e44 |
void abort_after_delay(int abort_priority, enum transition_action abort_action,
|
|
|
4c8e44 |
@@ -68,12 +54,7 @@ extern void abort_transition_graph(int abort_priority, enum transition_action ab
|
|
|
4c8e44 |
# define abort_transition(pri, action, text, reason) \
|
|
|
4c8e44 |
abort_transition_graph(pri, action, text, reason,__FUNCTION__,__LINE__);
|
|
|
4c8e44 |
|
|
|
4c8e44 |
-extern gboolean te_connect_stonith(gpointer user_data);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
-extern void te_trigger_stonith_history_sync(void);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
extern crm_trigger_t *transition_trigger;
|
|
|
4c8e44 |
-extern crm_trigger_t *stonith_reconnect;
|
|
|
4c8e44 |
|
|
|
4c8e44 |
extern char *failed_stop_offset;
|
|
|
4c8e44 |
extern char *failed_start_offset;
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
|
|
|
4c8e44 |
index 68992f5..8b80e3c 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_utils.h
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_utils.h
|
|
|
4c8e44 |
@@ -85,10 +85,6 @@ int crmd_join_phase_count(enum crm_join_phase phase);
|
|
|
4c8e44 |
void crmd_join_phase_log(int level);
|
|
|
4c8e44 |
|
|
|
4c8e44 |
const char *get_timer_desc(fsa_timer_t * timer);
|
|
|
4c8e44 |
-void st_fail_count_reset(const char * target);
|
|
|
4c8e44 |
-void st_fail_count_increment(const char *target);
|
|
|
4c8e44 |
-void abort_for_stonith_failure(enum transition_action abort_action,
|
|
|
4c8e44 |
- const char *target, xmlNode *reason);
|
|
|
4c8e44 |
void crmd_peer_down(crm_node_t *peer, bool full);
|
|
|
4c8e44 |
unsigned int cib_op_timeout(void);
|
|
|
4c8e44 |
|
|
|
4c8e44 |
--
|
|
|
4c8e44 |
1.8.3.1
|
|
|
4c8e44 |
|
|
|
4c8e44 |
From 3002e485651e1ad18da6d44e7672dbe4f0380d3b Mon Sep 17 00:00:00 2001
|
|
|
4c8e44 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
4c8e44 |
Date: Thu, 23 May 2019 18:18:06 -0500
|
|
|
4c8e44 |
Subject: [PATCH] Refactor: controller: isolate stonith API handling
|
|
|
4c8e44 |
|
|
|
4c8e44 |
can now make more variables and functions static
|
|
|
4c8e44 |
---
|
|
|
4c8e44 |
daemons/controld/controld_control.c | 28 +++------------------
|
|
|
4c8e44 |
daemons/controld/controld_fencing.c | 49 ++++++++++++++++++++++++++++++++++---
|
|
|
4c8e44 |
daemons/controld/controld_fencing.h | 7 ++----
|
|
|
4c8e44 |
3 files changed, 50 insertions(+), 34 deletions(-)
|
|
|
4c8e44 |
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
|
|
|
4c8e44 |
index 7f918c0..e99d605 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_control.c
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_control.c
|
|
|
4c8e44 |
@@ -113,14 +113,7 @@ do_shutdown(long long action,
|
|
|
4c8e44 |
{
|
|
|
4c8e44 |
/* just in case */
|
|
|
4c8e44 |
set_bit(fsa_input_register, R_SHUTDOWN);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if (stonith_api) {
|
|
|
4c8e44 |
- /* Prevent it from coming up again */
|
|
|
4c8e44 |
- clear_bit(fsa_input_register, R_ST_REQUIRED);
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- crm_info("Disconnecting from fencer");
|
|
|
4c8e44 |
- stonith_api->cmds->disconnect(stonith_api);
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
+ controld_disconnect_fencer(FALSE);
|
|
|
4c8e44 |
}
|
|
|
4c8e44 |
|
|
|
4c8e44 |
/* A_SHUTDOWN_REQ */
|
|
|
4c8e44 |
@@ -201,12 +194,7 @@ crmd_exit(crm_exit_t exit_code)
|
|
|
4c8e44 |
|
|
|
4c8e44 |
controld_close_attrd_ipc();
|
|
|
4c8e44 |
pe_subsystem_free();
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- if(stonith_api) {
|
|
|
4c8e44 |
- crm_trace("Disconnecting fencing API");
|
|
|
4c8e44 |
- clear_bit(fsa_input_register, R_ST_REQUIRED);
|
|
|
4c8e44 |
- stonith_api->cmds->free(stonith_api); stonith_api = NULL;
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
+ controld_disconnect_fencer(TRUE);
|
|
|
4c8e44 |
|
|
|
4c8e44 |
if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) {
|
|
|
4c8e44 |
crm_debug("No mainloop detected");
|
|
|
4c8e44 |
@@ -258,7 +246,6 @@ crmd_exit(crm_exit_t exit_code)
|
|
|
4c8e44 |
mainloop_destroy_trigger(fsa_source); fsa_source = NULL;
|
|
|
4c8e44 |
|
|
|
4c8e44 |
mainloop_destroy_trigger(config_read); config_read = NULL;
|
|
|
4c8e44 |
- mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL;
|
|
|
4c8e44 |
mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;
|
|
|
4c8e44 |
|
|
|
4c8e44 |
crm_client_cleanup();
|
|
|
4c8e44 |
@@ -288,7 +275,6 @@ crmd_exit(crm_exit_t exit_code)
|
|
|
4c8e44 |
free(fsa_cluster_name); fsa_cluster_name = NULL;
|
|
|
4c8e44 |
|
|
|
4c8e44 |
free(te_uuid); te_uuid = NULL;
|
|
|
4c8e44 |
- free(te_client_id); te_client_id = NULL;
|
|
|
4c8e44 |
free(fsa_pe_ref); fsa_pe_ref = NULL;
|
|
|
4c8e44 |
free(failed_stop_offset); failed_stop_offset = NULL;
|
|
|
4c8e44 |
free(failed_start_offset); failed_start_offset = NULL;
|
|
|
4c8e44 |
@@ -627,15 +613,7 @@ do_started(long long action,
|
|
|
4c8e44 |
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
|
|
|
4c8e44 |
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
|
|
|
4c8e44 |
}
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
- // Try connecting to fencer (retrying later in mainloop if failed)
|
|
|
4c8e44 |
- if (stonith_reconnect == NULL) {
|
|
|
4c8e44 |
- stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
|
|
|
4c8e44 |
- te_connect_stonith,
|
|
|
4c8e44 |
- GINT_TO_POINTER(TRUE));
|
|
|
4c8e44 |
- }
|
|
|
4c8e44 |
- set_bit(fsa_input_register, R_ST_REQUIRED);
|
|
|
4c8e44 |
- mainloop_set_trigger(stonith_reconnect);
|
|
|
4c8e44 |
+ controld_trigger_fencer_connect();
|
|
|
4c8e44 |
|
|
|
4c8e44 |
crm_notice("Pacemaker controller successfully started and accepting connections");
|
|
|
4c8e44 |
clear_bit(fsa_input_register, R_STARTING);
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
|
|
|
4c8e44 |
index cde57b5..92336e9 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_fencing.c
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_fencing.c
|
|
|
4c8e44 |
@@ -341,9 +341,9 @@ execute_stonith_cleanup()
|
|
|
4c8e44 |
* Functions that need to interact directly with the fencer via its API
|
|
|
4c8e44 |
*/
|
|
|
4c8e44 |
|
|
|
4c8e44 |
-stonith_t *stonith_api = NULL;
|
|
|
4c8e44 |
-crm_trigger_t *stonith_reconnect = NULL;
|
|
|
4c8e44 |
-char *te_client_id = NULL;
|
|
|
4c8e44 |
+static stonith_t *stonith_api = NULL;
|
|
|
4c8e44 |
+static crm_trigger_t *stonith_reconnect = NULL;
|
|
|
4c8e44 |
+static char *te_client_id = NULL;
|
|
|
4c8e44 |
|
|
|
4c8e44 |
static gboolean
|
|
|
4c8e44 |
fail_incompletable_stonith(crm_graph_t *graph)
|
|
|
4c8e44 |
@@ -571,7 +571,7 @@ tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
|
|
|
4c8e44 |
* \note If user_data is NULL, this will wait 2s between attempts, for up to
|
|
|
4c8e44 |
* 30 attempts, meaning the controller could be blocked as long as 58s.
|
|
|
4c8e44 |
*/
|
|
|
4c8e44 |
-gboolean
|
|
|
4c8e44 |
+static gboolean
|
|
|
4c8e44 |
te_connect_stonith(gpointer user_data)
|
|
|
4c8e44 |
{
|
|
|
4c8e44 |
int rc = pcmk_ok;
|
|
|
4c8e44 |
@@ -619,6 +619,47 @@ te_connect_stonith(gpointer user_data)
|
|
|
4c8e44 |
return TRUE;
|
|
|
4c8e44 |
}
|
|
|
4c8e44 |
|
|
|
4c8e44 |
+/*!
|
|
|
4c8e44 |
+ \internal
|
|
|
4c8e44 |
+ \brief Schedule fencer connection attempt in main loop
|
|
|
4c8e44 |
+*/
|
|
|
4c8e44 |
+void
|
|
|
4c8e44 |
+controld_trigger_fencer_connect()
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ if (stonith_reconnect == NULL) {
|
|
|
4c8e44 |
+ stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
|
|
|
4c8e44 |
+ te_connect_stonith,
|
|
|
4c8e44 |
+ GINT_TO_POINTER(TRUE));
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ set_bit(fsa_input_register, R_ST_REQUIRED);
|
|
|
4c8e44 |
+ mainloop_set_trigger(stonith_reconnect);
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+void
|
|
|
4c8e44 |
+controld_disconnect_fencer(bool destroy)
|
|
|
4c8e44 |
+{
|
|
|
4c8e44 |
+ if (stonith_api) {
|
|
|
4c8e44 |
+ // Prevent fencer connection from coming up again
|
|
|
4c8e44 |
+ clear_bit(fsa_input_register, R_ST_REQUIRED);
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
+ stonith_api->cmds->disconnect(stonith_api);
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ if (destroy) {
|
|
|
4c8e44 |
+ if (stonith_api) {
|
|
|
4c8e44 |
+ stonith_api->cmds->free(stonith_api);
|
|
|
4c8e44 |
+ stonith_api = NULL;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ if (stonith_reconnect) {
|
|
|
4c8e44 |
+ mainloop_destroy_trigger(stonith_reconnect);
|
|
|
4c8e44 |
+ stonith_reconnect = NULL;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ if (te_client_id) {
|
|
|
4c8e44 |
+ free(te_client_id);
|
|
|
4c8e44 |
+ te_client_id = NULL;
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+ }
|
|
|
4c8e44 |
+}
|
|
|
4c8e44 |
+
|
|
|
4c8e44 |
static gboolean
|
|
|
4c8e44 |
do_stonith_history_sync(gpointer user_data)
|
|
|
4c8e44 |
{
|
|
|
4c8e44 |
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
|
|
|
4c8e44 |
index b80a6c9..3ef537f 100644
|
|
|
4c8e44 |
--- a/daemons/controld/controld_fencing.h
|
|
|
4c8e44 |
+++ b/daemons/controld/controld_fencing.h
|
|
|
4c8e44 |
@@ -13,16 +13,13 @@
|
|
|
4c8e44 |
#include <stdbool.h> // bool
|
|
|
4c8e44 |
#include <crm/transition.h> // crm_graph_t, crm_action_t
|
|
|
4c8e44 |
|
|
|
4c8e44 |
-extern crm_trigger_t *stonith_reconnect;
|
|
|
4c8e44 |
-extern char *te_client_id;
|
|
|
4c8e44 |
-extern stonith_t *stonith_api;
|
|
|
4c8e44 |
-
|
|
|
4c8e44 |
// stonith fail counts
|
|
|
4c8e44 |
void st_fail_count_reset(const char * target);
|
|
|
4c8e44 |
void update_stonith_max_attempts(const char* value);
|
|
|
4c8e44 |
|
|
|
4c8e44 |
// stonith API client
|
|
|
4c8e44 |
-gboolean te_connect_stonith(gpointer user_data);
|
|
|
4c8e44 |
+void controld_trigger_fencer_connect(void);
|
|
|
4c8e44 |
+void controld_disconnect_fencer(bool destroy);
|
|
|
4c8e44 |
gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action);
|
|
|
4c8e44 |
|
|
|
4c8e44 |
// stonith cleanup list
|
|
|
4c8e44 |
--
|
|
|
4c8e44 |
1.8.3.1
|
|
|
4c8e44 |
|