|
|
60de42 |
From b005b4f2809020304862000326b22cded7b14377 Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Thu, 6 Apr 2017 15:51:47 -0500
|
|
|
60de42 |
Subject: [PATCH 01/13] Fix: libpe_status: guest nodes from bundles may have
|
|
|
60de42 |
attributes
|
|
|
60de42 |
|
|
|
60de42 |
Previously, if a guest node created by a bundle had a node attribute,
|
|
|
60de42 |
pe_create_node() would get called twice, once when parsing the <node> entry and
|
|
|
60de42 |
once when parsing the <bundle>, resulting in any bundle primitive being unable
|
|
|
60de42 |
to run.
|
|
|
60de42 |
---
|
|
|
60de42 |
lib/pengine/container.c | 15 ++++++++++++---
|
|
|
60de42 |
1 file changed, 12 insertions(+), 3 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/lib/pengine/container.c b/lib/pengine/container.c
|
|
|
60de42 |
index 127b144..054ef5f 100644
|
|
|
60de42 |
--- a/lib/pengine/container.c
|
|
|
60de42 |
+++ b/lib/pengine/container.c
|
|
|
60de42 |
@@ -390,9 +390,18 @@ create_remote_resource(
|
|
|
60de42 |
|
|
|
60de42 |
// tuple->docker->fillers = g_list_append(tuple->docker->fillers, child);
|
|
|
60de42 |
|
|
|
60de42 |
- // -INFINITY prevents anyone else from running here
|
|
|
60de42 |
- node = pe_create_node(strdup(nodeid), nodeid, "remote", "-INFINITY",
|
|
|
60de42 |
- data_set);
|
|
|
60de42 |
+ /* Ensure a node has been created for the guest (it may have already
|
|
|
60de42 |
+ * been, if it has a permanent node attribute), and ensure its weight is
|
|
|
60de42 |
+ * -INFINITY so no other resources can run on it.
|
|
|
60de42 |
+ */
|
|
|
60de42 |
+ node = pe_find_node(data_set->nodes, nodeid);
|
|
|
60de42 |
+ if (node == NULL) {
|
|
|
60de42 |
+ node = pe_create_node(strdup(nodeid), nodeid, "remote", "-INFINITY",
|
|
|
60de42 |
+ data_set);
|
|
|
60de42 |
+ } else {
|
|
|
60de42 |
+ node->weight = -INFINITY;
|
|
|
60de42 |
+ }
|
|
|
60de42 |
+
|
|
|
60de42 |
tuple->node = node_copy(node);
|
|
|
60de42 |
tuple->node->weight = 500;
|
|
|
60de42 |
nodeid = NULL;
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 7b89ff8b65fcdcad55676578361080eb23edb3e4 Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Thu, 6 Apr 2017 16:56:52 -0500
|
|
|
60de42 |
Subject: [PATCH 02/13] Low: fencing: ignore empty 'action' parameter in fence
|
|
|
60de42 |
devices
|
|
|
60de42 |
|
|
|
60de42 |
This makes the fix in 9c0c3d6 more comprehensive.
|
|
|
60de42 |
---
|
|
|
60de42 |
fencing/commands.c | 5 ++++-
|
|
|
60de42 |
1 file changed, 4 insertions(+), 1 deletion(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/fencing/commands.c b/fencing/commands.c
|
|
|
60de42 |
index b4e6eb5..deec050 100644
|
|
|
60de42 |
--- a/fencing/commands.c
|
|
|
60de42 |
+++ b/fencing/commands.c
|
|
|
60de42 |
@@ -829,7 +829,10 @@ xml2device_params(const char *name, xmlNode *dev)
|
|
|
60de42 |
crm_warn("%s has '%s' parameter, which should never be specified in configuration",
|
|
|
60de42 |
name, STONITH_ATTR_ACTION_OP);
|
|
|
60de42 |
|
|
|
60de42 |
- if (strcmp(value, "reboot") == 0) {
|
|
|
60de42 |
+ if (*value == '\0') {
|
|
|
60de42 |
+ crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);
|
|
|
60de42 |
+
|
|
|
60de42 |
+ } else if (strcmp(value, "reboot") == 0) {
|
|
|
60de42 |
crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)",
|
|
|
60de42 |
STONITH_ATTR_ACTION_OP);
|
|
|
60de42 |
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 100dd5fda476ef526ac1964260252b30864d5ca7 Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Fri, 7 Apr 2017 16:51:29 -0500
|
|
|
60de42 |
Subject: [PATCH 03/13] Fix: crmd: check for too many stonith failures only
|
|
|
60de42 |
when aborting for that reason
|
|
|
60de42 |
|
|
|
60de42 |
Previously, crmd would check for too many stonith failures whenever aborting
|
|
|
60de42 |
a transition. This would lead to a new transition not being triggered when
|
|
|
60de42 |
aborting for some other unrelated reason, such as a configuration change.
|
|
|
60de42 |
|
|
|
60de42 |
Now, crmd checks for too many stonith failures only when aborting due to a new
|
|
|
60de42 |
stonith failure.
|
|
|
60de42 |
---
|
|
|
60de42 |
crmd/crmd_utils.h | 2 +-
|
|
|
60de42 |
crmd/te_actions.c | 12 ++++--------
|
|
|
60de42 |
crmd/te_callbacks.c | 24 ++++++++++++++++++++++--
|
|
|
60de42 |
crmd/te_utils.c | 2 +-
|
|
|
60de42 |
4 files changed, 28 insertions(+), 12 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h
|
|
|
60de42 |
index a1aaad3..d2f8eb2 100644
|
|
|
60de42 |
--- a/crmd/crmd_utils.h
|
|
|
60de42 |
+++ b/crmd/crmd_utils.h
|
|
|
60de42 |
@@ -100,8 +100,8 @@ int crmd_join_phase_count(enum crm_join_phase phase);
|
|
|
60de42 |
void crmd_join_phase_log(int level);
|
|
|
60de42 |
|
|
|
60de42 |
const char *get_timer_desc(fsa_timer_t * timer);
|
|
|
60de42 |
-gboolean too_many_st_failures(void);
|
|
|
60de42 |
void st_fail_count_reset(const char * target);
|
|
|
60de42 |
+void abort_for_stonith_failure(xmlNode *reason);
|
|
|
60de42 |
void crmd_peer_down(crm_node_t *peer, bool full);
|
|
|
60de42 |
|
|
|
60de42 |
/* Convenience macro for registering a CIB callback
|
|
|
60de42 |
diff --git a/crmd/te_actions.c b/crmd/te_actions.c
|
|
|
60de42 |
index a8ad86f..66dd16e 100644
|
|
|
60de42 |
--- a/crmd/te_actions.c
|
|
|
60de42 |
+++ b/crmd/te_actions.c
|
|
|
60de42 |
@@ -726,15 +726,11 @@ notify_crmd(crm_graph_t * graph)
|
|
|
60de42 |
case tg_restart:
|
|
|
60de42 |
type = "restart";
|
|
|
60de42 |
if (fsa_state == S_TRANSITION_ENGINE) {
|
|
|
60de42 |
- if (too_many_st_failures() == FALSE) {
|
|
|
60de42 |
- if (transition_timer->period_ms > 0) {
|
|
|
60de42 |
- crm_timer_stop(transition_timer);
|
|
|
60de42 |
- crm_timer_start(transition_timer);
|
|
|
60de42 |
- } else {
|
|
|
60de42 |
- event = I_PE_CALC;
|
|
|
60de42 |
- }
|
|
|
60de42 |
+ if (transition_timer->period_ms > 0) {
|
|
|
60de42 |
+ crm_timer_stop(transition_timer);
|
|
|
60de42 |
+ crm_timer_start(transition_timer);
|
|
|
60de42 |
} else {
|
|
|
60de42 |
- event = I_TE_SUCCESS;
|
|
|
60de42 |
+ event = I_PE_CALC;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
} else if (fsa_state == S_POLICY_ENGINE) {
|
|
|
60de42 |
diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c
|
|
|
60de42 |
index 6c0670c..a0aa081 100644
|
|
|
60de42 |
--- a/crmd/te_callbacks.c
|
|
|
60de42 |
+++ b/crmd/te_callbacks.c
|
|
|
60de42 |
@@ -635,7 +635,7 @@ struct st_fail_rec {
|
|
|
60de42 |
int count;
|
|
|
60de42 |
};
|
|
|
60de42 |
|
|
|
60de42 |
-gboolean
|
|
|
60de42 |
+static gboolean
|
|
|
60de42 |
too_many_st_failures(void)
|
|
|
60de42 |
{
|
|
|
60de42 |
GHashTableIter iter;
|
|
|
60de42 |
@@ -694,6 +694,26 @@ st_fail_count_increment(const char *target, int rc)
|
|
|
60de42 |
}
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
+/*!
|
|
|
60de42 |
+ * \internal
|
|
|
60de42 |
+ * \brief Abort transition due to stonith failure
|
|
|
60de42 |
+ *
|
|
|
60de42 |
+ * \param[in] reason Failed stonith action XML, or NULL
|
|
|
60de42 |
+ */
|
|
|
60de42 |
+void
|
|
|
60de42 |
+abort_for_stonith_failure(xmlNode *reason)
|
|
|
60de42 |
+{
|
|
|
60de42 |
+ enum transition_action abort_action = tg_restart;
|
|
|
60de42 |
+
|
|
|
60de42 |
+ /* If stonith repeatedly fails, we eventually give up on starting a new
|
|
|
60de42 |
+ * transition for that reason.
|
|
|
60de42 |
+ */
|
|
|
60de42 |
+ if (too_many_st_failures()) {
|
|
|
60de42 |
+ abort_action = tg_stop;
|
|
|
60de42 |
+ }
|
|
|
60de42 |
+ abort_transition(INFINITY, abort_action, "Stonith failed", reason);
|
|
|
60de42 |
+}
|
|
|
60de42 |
+
|
|
|
60de42 |
void
|
|
|
60de42 |
tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
|
|
|
60de42 |
{
|
|
|
60de42 |
@@ -759,7 +779,7 @@ tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
|
|
|
60de42 |
action->failed = TRUE;
|
|
|
60de42 |
crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
|
|
|
60de42 |
call_id, target, pcmk_strerror(rc));
|
|
|
60de42 |
- abort_transition(INFINITY, tg_restart, "Stonith failed", NULL);
|
|
|
60de42 |
+ abort_for_stonith_failure(NULL);
|
|
|
60de42 |
st_fail_count_increment(target, rc);
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
|
|
|
60de42 |
index 3b67afe..4603307 100644
|
|
|
60de42 |
--- a/crmd/te_utils.c
|
|
|
60de42 |
+++ b/crmd/te_utils.c
|
|
|
60de42 |
@@ -162,7 +162,7 @@ fail_incompletable_stonith(crm_graph_t * graph)
|
|
|
60de42 |
|
|
|
60de42 |
if (last_action != NULL) {
|
|
|
60de42 |
crm_warn("STONITHd failure resulted in un-runnable actions");
|
|
|
60de42 |
- abort_transition(INFINITY, tg_restart, "Stonith failure", last_action);
|
|
|
60de42 |
+ abort_for_stonith_failure(last_action);
|
|
|
60de42 |
return TRUE;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 3c49a1cf86cb819eca18c841661d90fa65bcb185 Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Fri, 7 Apr 2017 21:03:31 -0500
|
|
|
60de42 |
Subject: [PATCH 04/13] Low: crmd: consider target when checking stonith
|
|
|
60de42 |
failures
|
|
|
60de42 |
|
|
|
60de42 |
Previously, if the crmd aborted a transition due to failure to fence a
|
|
|
60de42 |
particular node, a new transition would not be started if *any* node had
|
|
|
60de42 |
been fenced too many times. Now, only failures of the particular target are
|
|
|
60de42 |
checked in that situation.
|
|
|
60de42 |
---
|
|
|
60de42 |
crmd/crmd_utils.h | 2 +-
|
|
|
60de42 |
crmd/te_callbacks.c | 33 +++++++++++++++++++++++----------
|
|
|
60de42 |
crmd/te_utils.c | 2 +-
|
|
|
60de42 |
3 files changed, 25 insertions(+), 12 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h
|
|
|
60de42 |
index d2f8eb2..f0289d4 100644
|
|
|
60de42 |
--- a/crmd/crmd_utils.h
|
|
|
60de42 |
+++ b/crmd/crmd_utils.h
|
|
|
60de42 |
@@ -101,7 +101,7 @@ void crmd_join_phase_log(int level);
|
|
|
60de42 |
|
|
|
60de42 |
const char *get_timer_desc(fsa_timer_t * timer);
|
|
|
60de42 |
void st_fail_count_reset(const char * target);
|
|
|
60de42 |
-void abort_for_stonith_failure(xmlNode *reason);
|
|
|
60de42 |
+void abort_for_stonith_failure(const char *target, xmlNode *reason);
|
|
|
60de42 |
void crmd_peer_down(crm_node_t *peer, bool full);
|
|
|
60de42 |
|
|
|
60de42 |
/* Convenience macro for registering a CIB callback
|
|
|
60de42 |
diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c
|
|
|
60de42 |
index a0aa081..6e306fd 100644
|
|
|
60de42 |
--- a/crmd/te_callbacks.c
|
|
|
60de42 |
+++ b/crmd/te_callbacks.c
|
|
|
60de42 |
@@ -636,7 +636,7 @@ struct st_fail_rec {
|
|
|
60de42 |
};
|
|
|
60de42 |
|
|
|
60de42 |
static gboolean
|
|
|
60de42 |
-too_many_st_failures(void)
|
|
|
60de42 |
+too_many_st_failures(const char *target)
|
|
|
60de42 |
{
|
|
|
60de42 |
GHashTableIter iter;
|
|
|
60de42 |
const char *key = NULL;
|
|
|
60de42 |
@@ -646,14 +646,26 @@ too_many_st_failures(void)
|
|
|
60de42 |
return FALSE;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
- g_hash_table_iter_init(&iter, stonith_failures);
|
|
|
60de42 |
- while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
|
|
|
60de42 |
- if (value->count > stonith_max_attempts ) {
|
|
|
60de42 |
- crm_notice("Too many failures to fence %s (%d), giving up", key, value->count);
|
|
|
60de42 |
- return TRUE;
|
|
|
60de42 |
+ if (target == NULL) {
|
|
|
60de42 |
+ g_hash_table_iter_init(&iter, stonith_failures);
|
|
|
60de42 |
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
|
|
|
60de42 |
+ if (value->count > stonith_max_attempts) {
|
|
|
60de42 |
+ target = (const char*)key;
|
|
|
60de42 |
+ goto too_many;
|
|
|
60de42 |
+ }
|
|
|
60de42 |
+ }
|
|
|
60de42 |
+ } else {
|
|
|
60de42 |
+ value = g_hash_table_lookup(stonith_failures, target);
|
|
|
60de42 |
+ if ((value != NULL) && (value->count > stonith_max_attempts)) {
|
|
|
60de42 |
+ goto too_many;
|
|
|
60de42 |
}
|
|
|
60de42 |
}
|
|
|
60de42 |
return FALSE;
|
|
|
60de42 |
+
|
|
|
60de42 |
+too_many:
|
|
|
60de42 |
+ crm_warn("Too many failures (%d) to fence %s, giving up",
|
|
|
60de42 |
+ value->count, target);
|
|
|
60de42 |
+ return TRUE;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
void
|
|
|
60de42 |
@@ -698,17 +710,18 @@ st_fail_count_increment(const char *target, int rc)
|
|
|
60de42 |
* \internal
|
|
|
60de42 |
* \brief Abort transition due to stonith failure
|
|
|
60de42 |
*
|
|
|
60de42 |
- * \param[in] reason Failed stonith action XML, or NULL
|
|
|
60de42 |
+ * \param[in] target Don't restart if this (NULL for any) has too many failures
|
|
|
60de42 |
+ * \param[in] reason Log this stonith action XML as abort reason (or NULL)
|
|
|
60de42 |
*/
|
|
|
60de42 |
void
|
|
|
60de42 |
-abort_for_stonith_failure(xmlNode *reason)
|
|
|
60de42 |
+abort_for_stonith_failure(const char *target, xmlNode *reason)
|
|
|
60de42 |
{
|
|
|
60de42 |
enum transition_action abort_action = tg_restart;
|
|
|
60de42 |
|
|
|
60de42 |
/* If stonith repeatedly fails, we eventually give up on starting a new
|
|
|
60de42 |
* transition for that reason.
|
|
|
60de42 |
*/
|
|
|
60de42 |
- if (too_many_st_failures()) {
|
|
|
60de42 |
+ if (too_many_st_failures(target)) {
|
|
|
60de42 |
abort_action = tg_stop;
|
|
|
60de42 |
}
|
|
|
60de42 |
abort_transition(INFINITY, abort_action, "Stonith failed", reason);
|
|
|
60de42 |
@@ -779,7 +792,7 @@ tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
|
|
|
60de42 |
action->failed = TRUE;
|
|
|
60de42 |
crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
|
|
|
60de42 |
call_id, target, pcmk_strerror(rc));
|
|
|
60de42 |
- abort_for_stonith_failure(NULL);
|
|
|
60de42 |
+ abort_for_stonith_failure(target, NULL);
|
|
|
60de42 |
st_fail_count_increment(target, rc);
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
|
|
|
60de42 |
index 4603307..66b0883 100644
|
|
|
60de42 |
--- a/crmd/te_utils.c
|
|
|
60de42 |
+++ b/crmd/te_utils.c
|
|
|
60de42 |
@@ -162,7 +162,7 @@ fail_incompletable_stonith(crm_graph_t * graph)
|
|
|
60de42 |
|
|
|
60de42 |
if (last_action != NULL) {
|
|
|
60de42 |
crm_warn("STONITHd failure resulted in un-runnable actions");
|
|
|
60de42 |
- abort_for_stonith_failure(last_action);
|
|
|
60de42 |
+ abort_for_stonith_failure(NULL, last_action);
|
|
|
60de42 |
return TRUE;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 0c43785dad9be38566cccce677c54da42ff2c691 Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Mon, 10 Apr 2017 14:22:45 -0500
|
|
|
60de42 |
Subject: [PATCH 05/13] Fix: crmd: forget stonith failures when forgetting node
|
|
|
60de42 |
|
|
|
60de42 |
---
|
|
|
60de42 |
crmd/messages.c | 6 ++++++
|
|
|
60de42 |
1 file changed, 6 insertions(+)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/messages.c b/crmd/messages.c
|
|
|
60de42 |
index c79d96e..4307fca 100644
|
|
|
60de42 |
--- a/crmd/messages.c
|
|
|
60de42 |
+++ b/crmd/messages.c
|
|
|
60de42 |
@@ -870,6 +870,12 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause)
|
|
|
60de42 |
|
|
|
60de42 |
} else {
|
|
|
60de42 |
reap_crm_member(id, name);
|
|
|
60de42 |
+
|
|
|
60de42 |
+ /* If we're forgetting this node, also forget any failures to fence
|
|
|
60de42 |
+ * it, so we don't carry that over to any node added later with the
|
|
|
60de42 |
+ * same name.
|
|
|
60de42 |
+ */
|
|
|
60de42 |
+ st_fail_count_reset(name);
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
} else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 515424f01b8ac5eb8705cecb26a60e17de3a7df6 Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Mon, 10 Apr 2017 15:23:46 -0500
|
|
|
60de42 |
Subject: [PATCH 06/13] Fix: crmd: track stonith fail counts on all nodes
|
|
|
60de42 |
|
|
|
60de42 |
Previously, the stonith fail count was incremented in
|
|
|
60de42 |
tengine_stonith_callback(), which is called only on the DC. Now, it is
|
|
|
60de42 |
incremented in tengine_stonith_notify() instead, which is called on all nodes,
|
|
|
60de42 |
ensuring the count is correct when a new node takes over DC.
|
|
|
60de42 |
---
|
|
|
60de42 |
crmd/crmd_utils.h | 1 +
|
|
|
60de42 |
crmd/te_callbacks.c | 5 ++---
|
|
|
60de42 |
crmd/te_utils.c | 9 ++++++---
|
|
|
60de42 |
3 files changed, 9 insertions(+), 6 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h
|
|
|
60de42 |
index f0289d4..fd8fe76 100644
|
|
|
60de42 |
--- a/crmd/crmd_utils.h
|
|
|
60de42 |
+++ b/crmd/crmd_utils.h
|
|
|
60de42 |
@@ -101,6 +101,7 @@ void crmd_join_phase_log(int level);
|
|
|
60de42 |
|
|
|
60de42 |
const char *get_timer_desc(fsa_timer_t * timer);
|
|
|
60de42 |
void st_fail_count_reset(const char * target);
|
|
|
60de42 |
+void st_fail_count_increment(const char *target);
|
|
|
60de42 |
void abort_for_stonith_failure(const char *target, xmlNode *reason);
|
|
|
60de42 |
void crmd_peer_down(crm_node_t *peer, bool full);
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c
|
|
|
60de42 |
index 6e306fd..aa4a141 100644
|
|
|
60de42 |
--- a/crmd/te_callbacks.c
|
|
|
60de42 |
+++ b/crmd/te_callbacks.c
|
|
|
60de42 |
@@ -682,8 +682,8 @@ st_fail_count_reset(const char *target)
|
|
|
60de42 |
}
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
-static void
|
|
|
60de42 |
-st_fail_count_increment(const char *target, int rc)
|
|
|
60de42 |
+void
|
|
|
60de42 |
+st_fail_count_increment(const char *target)
|
|
|
60de42 |
{
|
|
|
60de42 |
struct st_fail_rec *rec = NULL;
|
|
|
60de42 |
|
|
|
60de42 |
@@ -793,7 +793,6 @@ tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
|
|
|
60de42 |
crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
|
|
|
60de42 |
call_id, target, pcmk_strerror(rc));
|
|
|
60de42 |
abort_for_stonith_failure(target, NULL);
|
|
|
60de42 |
- st_fail_count_increment(target, rc);
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
update_graph(transition_graph, action);
|
|
|
60de42 |
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
|
|
|
60de42 |
index 66b0883..32ddae1 100644
|
|
|
60de42 |
--- a/crmd/te_utils.c
|
|
|
60de42 |
+++ b/crmd/te_utils.c
|
|
|
60de42 |
@@ -259,9 +259,12 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
|
|
|
60de42 |
return;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
- if (st_event->result == pcmk_ok &&
|
|
|
60de42 |
- safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
|
|
|
60de42 |
- st_fail_count_reset(st_event->target);
|
|
|
60de42 |
+ if (safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
|
|
|
60de42 |
+ if (st_event->result == pcmk_ok) {
|
|
|
60de42 |
+ st_fail_count_reset(st_event->target);
|
|
|
60de42 |
+ } else {
|
|
|
60de42 |
+ st_fail_count_increment(st_event->target);
|
|
|
60de42 |
+ }
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 714a8d07a500675d84e6ef779ba21e6c23e27853 Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Mon, 10 Apr 2017 17:20:08 -0500
|
|
|
60de42 |
Subject: [PATCH 07/13] Low: crmd: allow clearing all stonith fail counts
|
|
|
60de42 |
|
|
|
60de42 |
for future improvements
|
|
|
60de42 |
---
|
|
|
60de42 |
crmd/te_callbacks.c | 29 ++++++++++++++++++++++++-----
|
|
|
60de42 |
1 file changed, 24 insertions(+), 5 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c
|
|
|
60de42 |
index aa4a141..4f896ee 100644
|
|
|
60de42 |
--- a/crmd/te_callbacks.c
|
|
|
60de42 |
+++ b/crmd/te_callbacks.c
|
|
|
60de42 |
@@ -668,17 +668,36 @@ too_many:
|
|
|
60de42 |
return TRUE;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
+/*!
|
|
|
60de42 |
+ * \internal
|
|
|
60de42 |
+ * \brief Reset a stonith fail count
|
|
|
60de42 |
+ *
|
|
|
60de42 |
+ * \param[in] target Name of node to reset, or NULL for all
|
|
|
60de42 |
+ */
|
|
|
60de42 |
void
|
|
|
60de42 |
st_fail_count_reset(const char *target)
|
|
|
60de42 |
{
|
|
|
60de42 |
- struct st_fail_rec *rec = NULL;
|
|
|
60de42 |
+ if (stonith_failures == NULL) {
|
|
|
60de42 |
+ return;
|
|
|
60de42 |
+ }
|
|
|
60de42 |
+
|
|
|
60de42 |
+ if (target) {
|
|
|
60de42 |
+ struct st_fail_rec *rec = NULL;
|
|
|
60de42 |
|
|
|
60de42 |
- if (stonith_failures) {
|
|
|
60de42 |
rec = g_hash_table_lookup(stonith_failures, target);
|
|
|
60de42 |
- }
|
|
|
60de42 |
+ if (rec) {
|
|
|
60de42 |
+ rec->count = 0;
|
|
|
60de42 |
+ }
|
|
|
60de42 |
+ } else {
|
|
|
60de42 |
+ GHashTableIter iter;
|
|
|
60de42 |
+ const char *key = NULL;
|
|
|
60de42 |
+ struct st_fail_rec *rec = NULL;
|
|
|
60de42 |
|
|
|
60de42 |
- if (rec) {
|
|
|
60de42 |
- rec->count = 0;
|
|
|
60de42 |
+ g_hash_table_iter_init(&iter, stonith_failures);
|
|
|
60de42 |
+ while (g_hash_table_iter_next(&iter, (gpointer *) &key,
|
|
|
60de42 |
+ (gpointer *) &rec)) {
|
|
|
60de42 |
+ rec->count = 0;
|
|
|
60de42 |
+ }
|
|
|
60de42 |
}
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 8fd6691558d94a8294f3d860cc9451c1a8e0c7a1 Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Mon, 17 Apr 2017 13:55:19 -0500
|
|
|
60de42 |
Subject: [PATCH 08/13] Low: crmd: skip restart at (not above)
|
|
|
60de42 |
stonith-max-attempts
|
|
|
60de42 |
|
|
|
60de42 |
---
|
|
|
60de42 |
crmd/te_callbacks.c | 4 ++--
|
|
|
60de42 |
1 file changed, 2 insertions(+), 2 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c
|
|
|
60de42 |
index 4f896ee..b4d8713 100644
|
|
|
60de42 |
--- a/crmd/te_callbacks.c
|
|
|
60de42 |
+++ b/crmd/te_callbacks.c
|
|
|
60de42 |
@@ -649,14 +649,14 @@ too_many_st_failures(const char *target)
|
|
|
60de42 |
if (target == NULL) {
|
|
|
60de42 |
g_hash_table_iter_init(&iter, stonith_failures);
|
|
|
60de42 |
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
|
|
|
60de42 |
- if (value->count > stonith_max_attempts) {
|
|
|
60de42 |
+ if (value->count >= stonith_max_attempts) {
|
|
|
60de42 |
target = (const char*)key;
|
|
|
60de42 |
goto too_many;
|
|
|
60de42 |
}
|
|
|
60de42 |
}
|
|
|
60de42 |
} else {
|
|
|
60de42 |
value = g_hash_table_lookup(stonith_failures, target);
|
|
|
60de42 |
- if ((value != NULL) && (value->count > stonith_max_attempts)) {
|
|
|
60de42 |
+ if ((value != NULL) && (value->count >= stonith_max_attempts)) {
|
|
|
60de42 |
goto too_many;
|
|
|
60de42 |
}
|
|
|
60de42 |
}
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 9e9a271fd666ff371487f22c28ba9e420a22434c Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Mon, 17 Apr 2017 18:18:42 -0500
|
|
|
60de42 |
Subject: [PATCH 09/13] Fix: crmd: don't restart transition if no fence devices
|
|
|
60de42 |
|
|
|
60de42 |
This restores the behavior removed by ff881376, but more precisely where it's
|
|
|
60de42 |
needed.
|
|
|
60de42 |
---
|
|
|
60de42 |
crmd/crmd_utils.h | 4 +++-
|
|
|
60de42 |
crmd/te_callbacks.c | 21 ++++++++++++++++-----
|
|
|
60de42 |
crmd/te_utils.c | 2 +-
|
|
|
60de42 |
include/crm/transition.h | 4 ++++
|
|
|
60de42 |
4 files changed, 24 insertions(+), 7 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h
|
|
|
60de42 |
index fd8fe76..9a09340 100644
|
|
|
60de42 |
--- a/crmd/crmd_utils.h
|
|
|
60de42 |
+++ b/crmd/crmd_utils.h
|
|
|
60de42 |
@@ -19,6 +19,7 @@
|
|
|
60de42 |
# define CRMD_UTILS__H
|
|
|
60de42 |
|
|
|
60de42 |
# include <crm/crm.h>
|
|
|
60de42 |
+# include <crm/transition.h>
|
|
|
60de42 |
# include <crm/common/xml.h>
|
|
|
60de42 |
# include <crm/cib/internal.h> /* For CIB_OP_MODIFY */
|
|
|
60de42 |
# include "notify.h"
|
|
|
60de42 |
@@ -102,7 +103,8 @@ void crmd_join_phase_log(int level);
|
|
|
60de42 |
const char *get_timer_desc(fsa_timer_t * timer);
|
|
|
60de42 |
void st_fail_count_reset(const char * target);
|
|
|
60de42 |
void st_fail_count_increment(const char *target);
|
|
|
60de42 |
-void abort_for_stonith_failure(const char *target, xmlNode *reason);
|
|
|
60de42 |
+void abort_for_stonith_failure(enum transition_action abort_action,
|
|
|
60de42 |
+ const char *target, xmlNode *reason);
|
|
|
60de42 |
void crmd_peer_down(crm_node_t *peer, bool full);
|
|
|
60de42 |
|
|
|
60de42 |
/* Convenience macro for registering a CIB callback
|
|
|
60de42 |
diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c
|
|
|
60de42 |
index b4d8713..c2b0c0d 100644
|
|
|
60de42 |
--- a/crmd/te_callbacks.c
|
|
|
60de42 |
+++ b/crmd/te_callbacks.c
|
|
|
60de42 |
@@ -729,18 +729,18 @@ st_fail_count_increment(const char *target)
|
|
|
60de42 |
* \internal
|
|
|
60de42 |
* \brief Abort transition due to stonith failure
|
|
|
60de42 |
*
|
|
|
60de42 |
+ * \param[in] abort_action Whether to restart or stop transition
|
|
|
60de42 |
* \param[in] target Don't restart if this (NULL for any) has too many failures
|
|
|
60de42 |
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
|
|
|
60de42 |
*/
|
|
|
60de42 |
void
|
|
|
60de42 |
-abort_for_stonith_failure(const char *target, xmlNode *reason)
|
|
|
60de42 |
+abort_for_stonith_failure(enum transition_action abort_action,
|
|
|
60de42 |
+ const char *target, xmlNode *reason)
|
|
|
60de42 |
{
|
|
|
60de42 |
- enum transition_action abort_action = tg_restart;
|
|
|
60de42 |
-
|
|
|
60de42 |
/* If stonith repeatedly fails, we eventually give up on starting a new
|
|
|
60de42 |
* transition for that reason.
|
|
|
60de42 |
*/
|
|
|
60de42 |
- if (too_many_st_failures(target)) {
|
|
|
60de42 |
+ if ((abort_action != tg_stop) && too_many_st_failures(target)) {
|
|
|
60de42 |
abort_action = tg_stop;
|
|
|
60de42 |
}
|
|
|
60de42 |
abort_transition(INFINITY, abort_action, "Stonith failed", reason);
|
|
|
60de42 |
@@ -807,11 +807,22 @@ tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
|
|
|
60de42 |
|
|
|
60de42 |
} else {
|
|
|
60de42 |
const char *target = crm_element_value_const(action->xml, XML_LRM_ATTR_TARGET);
|
|
|
60de42 |
+ enum transition_action abort_action = tg_restart;
|
|
|
60de42 |
|
|
|
60de42 |
action->failed = TRUE;
|
|
|
60de42 |
crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
|
|
|
60de42 |
call_id, target, pcmk_strerror(rc));
|
|
|
60de42 |
- abort_for_stonith_failure(target, NULL);
|
|
|
60de42 |
+
|
|
|
60de42 |
+ /* If no fence devices were available, there's no use in immediately
|
|
|
60de42 |
+ * checking again, so don't start a new transition in that case.
|
|
|
60de42 |
+ */
|
|
|
60de42 |
+ if (rc == -ENODEV) {
|
|
|
60de42 |
+ crm_warn("No devices found in cluster to fence %s, giving up",
|
|
|
60de42 |
+ target);
|
|
|
60de42 |
+ abort_action = tg_stop;
|
|
|
60de42 |
+ }
|
|
|
60de42 |
+
|
|
|
60de42 |
+ abort_for_stonith_failure(abort_action, target, NULL);
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
update_graph(transition_graph, action);
|
|
|
60de42 |
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
|
|
|
60de42 |
index 32ddae1..dcfbb3b 100644
|
|
|
60de42 |
--- a/crmd/te_utils.c
|
|
|
60de42 |
+++ b/crmd/te_utils.c
|
|
|
60de42 |
@@ -162,7 +162,7 @@ fail_incompletable_stonith(crm_graph_t * graph)
|
|
|
60de42 |
|
|
|
60de42 |
if (last_action != NULL) {
|
|
|
60de42 |
crm_warn("STONITHd failure resulted in un-runnable actions");
|
|
|
60de42 |
- abort_for_stonith_failure(NULL, last_action);
|
|
|
60de42 |
+ abort_for_stonith_failure(tg_restart, NULL, last_action);
|
|
|
60de42 |
return TRUE;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/include/crm/transition.h b/include/crm/transition.h
|
|
|
60de42 |
index f2069cc..21f7c55 100644
|
|
|
60de42 |
--- a/include/crm/transition.h
|
|
|
60de42 |
+++ b/include/crm/transition.h
|
|
|
60de42 |
@@ -15,6 +15,8 @@
|
|
|
60de42 |
* License along with this library; if not, write to the Free Software
|
|
|
60de42 |
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
60de42 |
*/
|
|
|
60de42 |
+#ifndef CRM_TRANSITION__H
|
|
|
60de42 |
+# define CRM_TRANSITION__H
|
|
|
60de42 |
|
|
|
60de42 |
#include <crm/crm.h>
|
|
|
60de42 |
#include <crm/msg_xml.h>
|
|
|
60de42 |
@@ -147,3 +149,5 @@ bool update_abort_priority(crm_graph_t * graph, int priority,
|
|
|
60de42 |
const char *actiontype2text(action_type_e type);
|
|
|
60de42 |
lrmd_event_data_t *convert_graph_action(xmlNode * resource, crm_action_t * action, int status,
|
|
|
60de42 |
int rc);
|
|
|
60de42 |
+
|
|
|
60de42 |
+#endif
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 268f70f9ab644783a8038aa82bcca3628cc942dc Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Mon, 17 Apr 2017 14:39:19 -0500
|
|
|
60de42 |
Subject: [PATCH 10/13] Low: crmd: avoid DC sending offer to itself twice
|
|
|
60de42 |
|
|
|
60de42 |
---
|
|
|
60de42 |
crmd/join_dc.c | 6 ++++--
|
|
|
60de42 |
1 file changed, 4 insertions(+), 2 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/join_dc.c b/crmd/join_dc.c
|
|
|
60de42 |
index 71311de..999996d 100644
|
|
|
60de42 |
--- a/crmd/join_dc.c
|
|
|
60de42 |
+++ b/crmd/join_dc.c
|
|
|
60de42 |
@@ -242,8 +242,10 @@ do_dc_join_offer_one(long long action,
|
|
|
60de42 |
/* always offer to the DC (ourselves)
|
|
|
60de42 |
* this ensures the correct value for max_generation_from
|
|
|
60de42 |
*/
|
|
|
60de42 |
- member = crm_get_peer(0, fsa_our_uname);
|
|
|
60de42 |
- join_make_offer(NULL, member, NULL);
|
|
|
60de42 |
+ if (strcmp(join_to, fsa_our_uname) != 0) {
|
|
|
60de42 |
+ member = crm_get_peer(0, fsa_our_uname);
|
|
|
60de42 |
+ join_make_offer(NULL, member, NULL);
|
|
|
60de42 |
+ }
|
|
|
60de42 |
|
|
|
60de42 |
/* this was a genuine join request, cancel any existing
|
|
|
60de42 |
* transition and invoke the PE
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 249c7c9a83371a67e573276a285697994fe66fed Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Mon, 17 Apr 2017 15:41:18 -0500
|
|
|
60de42 |
Subject: [PATCH 11/13] Fix: crmd: don't fence old DC if it's shutting down as
|
|
|
60de42 |
soon-to-be DC joins
|
|
|
60de42 |
|
|
|
60de42 |
Existing peers of a DC that is shutting down can avoid fencing it (by updating
|
|
|
60de42 |
its expected state) because it broadcasts its shutdown request. However, a
|
|
|
60de42 |
newly joining node won't get that broadcast.
|
|
|
60de42 |
|
|
|
60de42 |
Previously, if the joining node became the new DC, it would fence the old one.
|
|
|
60de42 |
Now, the DC notifies joining nodes (via a join message field) whether it is
|
|
|
60de42 |
shutting down, and joining nodes update its expected state accordingly.
|
|
|
60de42 |
---
|
|
|
60de42 |
crmd/join_client.c | 24 ++++++++++++++++++++++++
|
|
|
60de42 |
crmd/join_dc.c | 32 ++++++++++++++++++++++++++------
|
|
|
60de42 |
include/crm/msg_xml.h | 1 +
|
|
|
60de42 |
3 files changed, 51 insertions(+), 6 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/crmd/join_client.c b/crmd/join_client.c
|
|
|
60de42 |
index 319272d..4510483 100644
|
|
|
60de42 |
--- a/crmd/join_client.c
|
|
|
60de42 |
+++ b/crmd/join_client.c
|
|
|
60de42 |
@@ -30,6 +30,26 @@ void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, v
|
|
|
60de42 |
|
|
|
60de42 |
extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
|
|
|
60de42 |
|
|
|
60de42 |
+/*!
|
|
|
60de42 |
+ * \internal
|
|
|
60de42 |
+ * \brief Remember if DC is shutting down as we join
|
|
|
60de42 |
+ *
|
|
|
60de42 |
+ * If we're joining while the current DC is shutting down, update its expected
|
|
|
60de42 |
+ * state, so we don't fence it if we become the new DC. (We weren't a peer
|
|
|
60de42 |
+ * when it broadcast its shutdown request.)
|
|
|
60de42 |
+ *
|
|
|
60de42 |
+ * \param[in] msg A join message from the DC
|
|
|
60de42 |
+ */
|
|
|
60de42 |
+static void
|
|
|
60de42 |
+update_dc_expected(xmlNode *msg)
|
|
|
60de42 |
+{
|
|
|
60de42 |
+ if (fsa_our_dc && crm_is_true(crm_element_value(msg, F_CRM_DC_LEAVING))) {
|
|
|
60de42 |
+ crm_node_t *dc_node = crm_get_peer(0, fsa_our_dc);
|
|
|
60de42 |
+
|
|
|
60de42 |
+ crm_update_peer_expected(__FUNCTION__, dc_node, CRMD_JOINSTATE_DOWN);
|
|
|
60de42 |
+ }
|
|
|
60de42 |
+}
|
|
|
60de42 |
+
|
|
|
60de42 |
/* A_CL_JOIN_QUERY */
|
|
|
60de42 |
/* is there a DC out there? */
|
|
|
60de42 |
void
|
|
|
60de42 |
@@ -128,6 +148,8 @@ do_cl_join_offer_respond(long long action,
|
|
|
60de42 |
return;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
+ update_dc_expected(input->msg);
|
|
|
60de42 |
+
|
|
|
60de42 |
CRM_LOG_ASSERT(input != NULL);
|
|
|
60de42 |
query_call_id =
|
|
|
60de42 |
fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children);
|
|
|
60de42 |
@@ -250,6 +272,8 @@ do_cl_join_finalize_respond(long long action,
|
|
|
60de42 |
return;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
+ update_dc_expected(input->msg);
|
|
|
60de42 |
+
|
|
|
60de42 |
/* send our status section to the DC */
|
|
|
60de42 |
tmp1 = do_lrm_query(TRUE, fsa_our_uname);
|
|
|
60de42 |
if (tmp1 != NULL) {
|
|
|
60de42 |
diff --git a/crmd/join_dc.c b/crmd/join_dc.c
|
|
|
60de42 |
index 999996d..ebb5059 100644
|
|
|
60de42 |
--- a/crmd/join_dc.c
|
|
|
60de42 |
+++ b/crmd/join_dc.c
|
|
|
60de42 |
@@ -106,6 +106,30 @@ initialize_join(gboolean before)
|
|
|
60de42 |
}
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
+/*!
|
|
|
60de42 |
+ * \internal
|
|
|
60de42 |
+ * \brief Create a join message from the DC
|
|
|
60de42 |
+ *
|
|
|
60de42 |
+ * \param[in] join_op Join operation name
|
|
|
60de42 |
+ * \param[in] host_to Recipient of message
|
|
|
60de42 |
+ */
|
|
|
60de42 |
+static xmlNode *
|
|
|
60de42 |
+create_dc_message(const char *join_op, const char *host_to)
|
|
|
60de42 |
+{
|
|
|
60de42 |
+ xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
|
|
|
60de42 |
+ CRM_SYSTEM_DC, NULL);
|
|
|
60de42 |
+
|
|
|
60de42 |
+ /* Identify which election this is a part of */
|
|
|
60de42 |
+ crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);
|
|
|
60de42 |
+
|
|
|
60de42 |
+ /* Add a field specifying whether the DC is shutting down. This keeps the
|
|
|
60de42 |
+ * joining node from fencing the old DC if it becomes the new DC.
|
|
|
60de42 |
+ */
|
|
|
60de42 |
+ crm_xml_add_boolean(msg, F_CRM_DC_LEAVING,
|
|
|
60de42 |
+ is_set(fsa_input_register, R_SHUTDOWN));
|
|
|
60de42 |
+ return msg;
|
|
|
60de42 |
+}
|
|
|
60de42 |
+
|
|
|
60de42 |
static void
|
|
|
60de42 |
join_make_offer(gpointer key, gpointer value, gpointer user_data)
|
|
|
60de42 |
{
|
|
|
60de42 |
@@ -147,10 +171,8 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)
|
|
|
60de42 |
|
|
|
60de42 |
crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none);
|
|
|
60de42 |
|
|
|
60de42 |
- offer = create_request(CRM_OP_JOIN_OFFER, NULL, member->uname,
|
|
|
60de42 |
- CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
|
|
|
60de42 |
+ offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
|
|
|
60de42 |
|
|
|
60de42 |
- crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id);
|
|
|
60de42 |
/* send the welcome */
|
|
|
60de42 |
crm_info("join-%d: Sending offer to %s", current_join_id, member->uname);
|
|
|
60de42 |
|
|
|
60de42 |
@@ -588,9 +610,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data)
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
/* send the ack/nack to the node */
|
|
|
60de42 |
- acknak = create_request(CRM_OP_JOIN_ACKNAK, NULL, join_to,
|
|
|
60de42 |
- CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL);
|
|
|
60de42 |
- crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id);
|
|
|
60de42 |
+ acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
|
|
|
60de42 |
|
|
|
60de42 |
crm_debug("join-%d: ACK'ing join request from %s",
|
|
|
60de42 |
current_join_id, join_to);
|
|
|
60de42 |
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
|
|
|
60de42 |
index 7198fe5..7504744 100644
|
|
|
60de42 |
--- a/include/crm/msg_xml.h
|
|
|
60de42 |
+++ b/include/crm/msg_xml.h
|
|
|
60de42 |
@@ -64,6 +64,7 @@
|
|
|
60de42 |
# define F_CRM_ORIGIN "origin"
|
|
|
60de42 |
# define F_CRM_USER "crm_user"
|
|
|
60de42 |
# define F_CRM_JOIN_ID "join_id"
|
|
|
60de42 |
+# define F_CRM_DC_LEAVING "dc-leaving"
|
|
|
60de42 |
# define F_CRM_ELECTION_ID "election-id"
|
|
|
60de42 |
# define F_CRM_ELECTION_AGE_S "election-age-sec"
|
|
|
60de42 |
# define F_CRM_ELECTION_AGE_US "election-age-nano-sec"
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 9fe47194b7636bfe3aebdeece0ec89a7f588d77d Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Mon, 17 Apr 2017 19:03:03 -0500
|
|
|
60de42 |
Subject: [PATCH 12/13] Refactor: extra: use whitespace consistently in Dummy
|
|
|
60de42 |
|
|
|
60de42 |
to make changes easier
|
|
|
60de42 |
---
|
|
|
60de42 |
extra/resources/Dummy | 102 +++++++++++++++++++++++++-------------------------
|
|
|
60de42 |
1 file changed, 51 insertions(+), 51 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/extra/resources/Dummy b/extra/resources/Dummy
|
|
|
60de42 |
index 1fd6156..bab56e4 100644
|
|
|
60de42 |
--- a/extra/resources/Dummy
|
|
|
60de42 |
+++ b/extra/resources/Dummy
|
|
|
60de42 |
@@ -1,8 +1,8 @@
|
|
|
60de42 |
#!/bin/sh
|
|
|
60de42 |
#
|
|
|
60de42 |
#
|
|
|
60de42 |
-# Dummy OCF RA. Does nothing but wait a few seconds, can be
|
|
|
60de42 |
-# configured to fail occassionally.
|
|
|
60de42 |
+# Dummy OCF RA. Does nothing but wait a few seconds, can be
|
|
|
60de42 |
+# configured to fail occassionally.
|
|
|
60de42 |
#
|
|
|
60de42 |
# Copyright (c) 2004 SUSE LINUX AG, Lars Marowsky-Brée
|
|
|
60de42 |
# All Rights Reserved.
|
|
|
60de42 |
@@ -37,7 +37,7 @@
|
|
|
60de42 |
#######################################################################
|
|
|
60de42 |
|
|
|
60de42 |
meta_data() {
|
|
|
60de42 |
- cat <
|
|
|
60de42 |
+ cat <
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
<resource-agent name="Dummy" version="1.0">
|
|
|
60de42 |
@@ -130,12 +130,12 @@ END
|
|
|
60de42 |
# don't exit on TERM, to test that lrmd makes sure that we do exit
|
|
|
60de42 |
trap sigterm_handler TERM
|
|
|
60de42 |
sigterm_handler() {
|
|
|
60de42 |
- ocf_log info "They use TERM to bring us down. No such luck."
|
|
|
60de42 |
- return
|
|
|
60de42 |
+ ocf_log info "They use TERM to bring us down. No such luck."
|
|
|
60de42 |
+ return
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
dummy_usage() {
|
|
|
60de42 |
- cat <
|
|
|
60de42 |
+ cat <
|
|
|
60de42 |
usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
|
|
|
60de42 |
|
|
|
60de42 |
Expects to have a fully populated OCF RA-compliant environment set.
|
|
|
60de42 |
@@ -174,34 +174,34 @@ dummy_start() {
|
|
|
60de42 |
dummy_stop() {
|
|
|
60de42 |
dummy_monitor
|
|
|
60de42 |
if [ $? -eq $OCF_SUCCESS ]; then
|
|
|
60de42 |
- rm ${OCF_RESKEY_state}
|
|
|
60de42 |
+ rm ${OCF_RESKEY_state}
|
|
|
60de42 |
fi
|
|
|
60de42 |
rm -f "${VERIFY_SERIALIZED_FILE}"
|
|
|
60de42 |
return $OCF_SUCCESS
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
dummy_monitor() {
|
|
|
60de42 |
- # Monitor _MUST!_ differentiate correctly between running
|
|
|
60de42 |
- # (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
|
|
|
60de42 |
- # That is THREE states, not just yes/no.
|
|
|
60de42 |
-
|
|
|
60de42 |
- if [ $OCF_RESKEY_op_sleep -ne 0 ]; then
|
|
|
60de42 |
- if [ -f "${VERIFY_SERIALIZED_FILE}" ]; then
|
|
|
60de42 |
- # two monitor ops have occurred at the same time.
|
|
|
60de42 |
- # this is to verify a condition in the lrmd regression tests.
|
|
|
60de42 |
- ocf_log err "$VERIFY_SERIALIZED_FILE exists already"
|
|
|
60de42 |
- return $OCF_ERR_GENERIC
|
|
|
60de42 |
- fi
|
|
|
60de42 |
-
|
|
|
60de42 |
- touch "${VERIFY_SERIALIZED_FILE}"
|
|
|
60de42 |
- sleep ${OCF_RESKEY_op_sleep}
|
|
|
60de42 |
- rm "${VERIFY_SERIALIZED_FILE}"
|
|
|
60de42 |
- fi
|
|
|
60de42 |
-
|
|
|
60de42 |
- if [ -f "${OCF_RESKEY_state}" ]; then
|
|
|
60de42 |
- return $OCF_SUCCESS
|
|
|
60de42 |
- fi
|
|
|
60de42 |
- return $OCF_NOT_RUNNING
|
|
|
60de42 |
+ # Monitor _MUST!_ differentiate correctly between running
|
|
|
60de42 |
+ # (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
|
|
|
60de42 |
+ # That is THREE states, not just yes/no.
|
|
|
60de42 |
+
|
|
|
60de42 |
+ if [ $OCF_RESKEY_op_sleep -ne 0 ]; then
|
|
|
60de42 |
+ if [ -f "${VERIFY_SERIALIZED_FILE}" ]; then
|
|
|
60de42 |
+ # two monitor ops have occurred at the same time.
|
|
|
60de42 |
+ # this is to verify a condition in the lrmd regression tests.
|
|
|
60de42 |
+ ocf_log err "$VERIFY_SERIALIZED_FILE exists already"
|
|
|
60de42 |
+ return $OCF_ERR_GENERIC
|
|
|
60de42 |
+ fi
|
|
|
60de42 |
+
|
|
|
60de42 |
+ touch "${VERIFY_SERIALIZED_FILE}"
|
|
|
60de42 |
+ sleep ${OCF_RESKEY_op_sleep}
|
|
|
60de42 |
+ rm "${VERIFY_SERIALIZED_FILE}"
|
|
|
60de42 |
+ fi
|
|
|
60de42 |
+
|
|
|
60de42 |
+ if [ -f "${OCF_RESKEY_state}" ]; then
|
|
|
60de42 |
+ return $OCF_SUCCESS
|
|
|
60de42 |
+ fi
|
|
|
60de42 |
+ return $OCF_NOT_RUNNING
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
dummy_validate() {
|
|
|
60de42 |
@@ -210,7 +210,7 @@ dummy_validate() {
|
|
|
60de42 |
state_dir=`dirname "$OCF_RESKEY_state"`
|
|
|
60de42 |
touch "$state_dir/$$"
|
|
|
60de42 |
if [ $? -ne 0 ]; then
|
|
|
60de42 |
- return $OCF_ERR_ARGS
|
|
|
60de42 |
+ return $OCF_ERR_ARGS
|
|
|
60de42 |
fi
|
|
|
60de42 |
rm "$state_dir/$$"
|
|
|
60de42 |
|
|
|
60de42 |
@@ -235,28 +235,28 @@ VERIFY_SERIALIZED_FILE="${OCF_RESKEY_state}.serialized"
|
|
|
60de42 |
dump_env
|
|
|
60de42 |
|
|
|
60de42 |
case $__OCF_ACTION in
|
|
|
60de42 |
-meta-data) meta_data
|
|
|
60de42 |
- exit $OCF_SUCCESS
|
|
|
60de42 |
- ;;
|
|
|
60de42 |
-start) dummy_start;;
|
|
|
60de42 |
-stop) dummy_stop;;
|
|
|
60de42 |
-monitor) dummy_monitor;;
|
|
|
60de42 |
-migrate_to) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}."
|
|
|
60de42 |
- dummy_stop
|
|
|
60de42 |
- ;;
|
|
|
60de42 |
-migrate_from) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} from ${OCF_RESKEY_CRM_meta_migrate_source}."
|
|
|
60de42 |
- dummy_start
|
|
|
60de42 |
- ;;
|
|
|
60de42 |
-reload) ocf_log err "Reloading..."
|
|
|
60de42 |
- dummy_start
|
|
|
60de42 |
- ;;
|
|
|
60de42 |
-validate-all) dummy_validate;;
|
|
|
60de42 |
-usage|help) dummy_usage
|
|
|
60de42 |
- exit $OCF_SUCCESS
|
|
|
60de42 |
- ;;
|
|
|
60de42 |
-*) dummy_usage
|
|
|
60de42 |
- exit $OCF_ERR_UNIMPLEMENTED
|
|
|
60de42 |
- ;;
|
|
|
60de42 |
+meta-data) meta_data
|
|
|
60de42 |
+ exit $OCF_SUCCESS
|
|
|
60de42 |
+ ;;
|
|
|
60de42 |
+start) dummy_start;;
|
|
|
60de42 |
+stop) dummy_stop;;
|
|
|
60de42 |
+monitor) dummy_monitor;;
|
|
|
60de42 |
+migrate_to) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} to ${OCF_RESKEY_CRM_meta_migrate_target}."
|
|
|
60de42 |
+ dummy_stop
|
|
|
60de42 |
+ ;;
|
|
|
60de42 |
+migrate_from) ocf_log info "Migrating ${OCF_RESOURCE_INSTANCE} from ${OCF_RESKEY_CRM_meta_migrate_source}."
|
|
|
60de42 |
+ dummy_start
|
|
|
60de42 |
+ ;;
|
|
|
60de42 |
+reload) ocf_log err "Reloading..."
|
|
|
60de42 |
+ dummy_start
|
|
|
60de42 |
+ ;;
|
|
|
60de42 |
+validate-all) dummy_validate;;
|
|
|
60de42 |
+usage|help) dummy_usage
|
|
|
60de42 |
+ exit $OCF_SUCCESS
|
|
|
60de42 |
+ ;;
|
|
|
60de42 |
+*) dummy_usage
|
|
|
60de42 |
+ exit $OCF_ERR_UNIMPLEMENTED
|
|
|
60de42 |
+ ;;
|
|
|
60de42 |
esac
|
|
|
60de42 |
rc=$?
|
|
|
60de42 |
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 14d5eb9bcf1a00612fbe952d945b07bbe3c44844 Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
60de42 |
Date: Mon, 17 Apr 2017 19:04:58 -0500
|
|
|
60de42 |
Subject: [PATCH 13/13] Test: extra: Dummy stop shouldn't fail if monitor is in
|
|
|
60de42 |
progress
|
|
|
60de42 |
|
|
|
60de42 |
---
|
|
|
60de42 |
extra/resources/Dummy | 4 ++--
|
|
|
60de42 |
1 file changed, 2 insertions(+), 2 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/extra/resources/Dummy b/extra/resources/Dummy
|
|
|
60de42 |
index bab56e4..4a6884c 100644
|
|
|
60de42 |
--- a/extra/resources/Dummy
|
|
|
60de42 |
+++ b/extra/resources/Dummy
|
|
|
60de42 |
@@ -172,7 +172,7 @@ dummy_start() {
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
dummy_stop() {
|
|
|
60de42 |
- dummy_monitor
|
|
|
60de42 |
+ dummy_monitor --force
|
|
|
60de42 |
if [ $? -eq $OCF_SUCCESS ]; then
|
|
|
60de42 |
rm ${OCF_RESKEY_state}
|
|
|
60de42 |
fi
|
|
|
60de42 |
@@ -186,7 +186,7 @@ dummy_monitor() {
|
|
|
60de42 |
# That is THREE states, not just yes/no.
|
|
|
60de42 |
|
|
|
60de42 |
if [ $OCF_RESKEY_op_sleep -ne 0 ]; then
|
|
|
60de42 |
- if [ -f "${VERIFY_SERIALIZED_FILE}" ]; then
|
|
|
60de42 |
+ if [ "$1" = "" -a -f "${VERIFY_SERIALIZED_FILE}" ]; then
|
|
|
60de42 |
# two monitor ops have occurred at the same time.
|
|
|
60de42 |
# this is to verify a condition in the lrmd regression tests.
|
|
|
60de42 |
ocf_log err "$VERIFY_SERIALIZED_FILE exists already"
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|