From 0113ff6fb6bb576356d201cf698b98455dbf5180 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Wed, 21 Dec 2016 18:08:40 +0100
Subject: [PATCH] Fix: pacemaker-remote: pacemaker_remoted shutdown while
unmanaged
Since introduction of the graceful shutdown of pacemaker_remoted
the shutdown is hanging if the remote-resource is unmanaged.
This happens as pacemaker_remoted is waiting for all resources
running on the remote-node to be shut down and pacemaker
on the other hand doesn't touch resources on a remote-node
when the remote-resource is unmanaged.
Fixes rhbz#1388102
---
crmd/crmd_lrm.h | 2 +
crmd/lrm_state.c | 20 ++++---
crmd/messages.c | 5 ++
crmd/remote_lrmd_ra.c | 124 ++++++++++++++++++++++++++++++++++++++++---
crmd/te_actions.c | 28 +++++++++-
include/crm/crm.h | 1 +
include/crm/lrmd.h | 1 +
include/crm/msg_xml.h | 2 +
include/crm/pengine/status.h | 1 +
include/crm_internal.h | 1 +
lib/lrmd/proxy_common.c | 15 ++++++
lib/pengine/unpack.c | 34 ++++++++----
lib/pengine/utils.c | 28 ++++++----
lrmd/ipc_proxy.c | 5 ++
lrmd/lrmd_private.h | 2 +
lrmd/main.c | 24 +++++++++
pengine/allocate.c | 3 ++
pengine/graph.c | 68 +++++++++++++++++++++++-
pengine/pengine.h | 1 +
19 files changed, 329 insertions(+), 36 deletions(-)
diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h
index c6373f1..64d80c4 100644
--- a/crmd/crmd_lrm.h
+++ b/crmd/crmd_lrm.h
@@ -162,5 +162,7 @@ int remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *acti
void remote_ra_cleanup(lrm_state_t * lrm_state);
void remote_ra_fail(const char *node_name);
void remote_ra_process_pseudo(xmlNode *xml);
+gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state);
+void remote_ra_process_maintenance_nodes(xmlNode *xml);
gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending);
diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c
index 7b4379b..d55755e 100644
--- a/crmd/lrm_state.c
+++ b/crmd/lrm_state.c
@@ -508,14 +508,22 @@ crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg)
crm_notice("%s requested shutdown of its remote connection",
lrm_state->node_name);
- now_s = crm_itoa(now);
- update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE);
- free(now_s);
+ if (!remote_ra_is_in_maintenance(lrm_state)) {
+ now_s = crm_itoa(now);
+ update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE);
+ free(now_s);
- remote_proxy_ack_shutdown(lrmd);
+ remote_proxy_ack_shutdown(lrmd);
- crm_warn("Reconnection attempts to %s may result in failures that must be cleared",
- lrm_state->node_name);
+ crm_warn("Reconnection attempts to %s may result in failures that must be cleared",
+ lrm_state->node_name);
+ } else {
+ remote_proxy_nack_shutdown(lrmd);
+
+ crm_notice("Remote resource for %s is not managed so no ordered shutdown happening",
+ lrm_state->node_name);
+ }
+ return;
} else if (safe_str_eq(op, LRMD_IPC_OP_REQUEST) && proxy->is_local) {
/* this is for the crmd, which we are, so don't try
diff --git a/crmd/messages.c b/crmd/messages.c
index 87d0acf..c79d96e 100644
--- a/crmd/messages.c
+++ b/crmd/messages.c
@@ -872,6 +872,11 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause)
reap_crm_member(id, name);
}
+ } else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
+ xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA);
+
+ remote_ra_process_maintenance_nodes(xml);
+
} else {
crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
crm_log_xml_err(stored_msg, "Unexpected");
diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c
index e68d784..8085219 100644
--- a/crmd/remote_lrmd_ra.c
+++ b/crmd/remote_lrmd_ra.c
@@ -80,6 +80,10 @@ typedef struct remote_ra_data_s {
enum remote_migration_status migrate_status;
gboolean active;
+ gboolean is_maintenance; /* kind of complex to determine from crmd-context
+ * so we have it signalled back with the
+ * transition from pengine
+ */
} remote_ra_data_t;
static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
@@ -485,6 +489,28 @@ monitor_timeout_cb(gpointer data)
return FALSE;
}
+static void
+synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
+{
+ lrmd_event_data_t op = { 0, };
+
+ if (lrm_state == NULL) {
+ /* if lrm_state not given assume local */
+ lrm_state = lrm_state_find(fsa_our_uname);
+ }
+ CRM_ASSERT(lrm_state != NULL);
+
+ op.type = lrmd_event_exec_complete;
+ op.rsc_id = rsc_id;
+ op.op_type = op_type;
+ op.rc = PCMK_OCF_OK;
+ op.op_status = PCMK_LRM_OP_DONE;
+ op.t_run = time(NULL);
+ op.t_rcchange = op.t_run;
+ op.call_id = generate_callid();
+ process_lrm_event(lrm_state, &op, NULL);
+}
+
void
remote_lrm_op_callback(lrmd_event_data_t * op)
{
@@ -536,9 +562,18 @@ remote_lrm_op_callback(lrmd_event_data_t * op)
(ra_data->cur_cmd == NULL) &&
(ra_data->active == TRUE)) {
- crm_err("Unexpected disconnect on remote-node %s", lrm_state->node_name);
- ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
- ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
+ if (!remote_ra_is_in_maintenance(lrm_state)) {
+ crm_err("Unexpected disconnect on remote-node %s", lrm_state->node_name);
+ ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
+ ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
+ } else {
+ crm_notice("Disconnect on unmanaged remote-node %s", lrm_state->node_name);
+ /* Do roughly what a 'stop' on the remote-resource would do */
+ handle_remote_ra_stop(lrm_state, NULL);
+ remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
+ /* now fake the reply of a successful 'stop' */
+ synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
+ }
return;
}
@@ -651,8 +686,6 @@ handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
ra_data->active = FALSE;
lrm_state_disconnect(lrm_state);
- cmd->rc = PCMK_OCF_OK;
- cmd->op_status = PCMK_LRM_OP_DONE;
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
@@ -664,7 +697,12 @@ handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
ra_data->recurring_cmds = NULL;
ra_data->cur_cmd = NULL;
- report_remote_ra_result(cmd);
+ if (cmd) {
+ cmd->rc = PCMK_OCF_OK;
+ cmd->op_status = PCMK_LRM_OP_DONE;
+
+ report_remote_ra_result(cmd);
+ }
}
static int
@@ -1140,3 +1178,77 @@ remote_ra_process_pseudo(xmlNode *xml)
}
freeXpathObject(search);
}
+
+static void
+remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
+{
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+ xmlNode *update, *state;
+ int call_opt, call_id = 0;
+ crm_node_t *node;
+
+ call_opt = crmd_cib_smart_opt();
+ node = crm_remote_peer_get(lrm_state->node_name);
+ CRM_CHECK(node != NULL, return);
+ update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
+ state = create_node_state_update(node, node_update_none, update,
+ __FUNCTION__);
+ crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
+ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
+ if (call_id < 0) {
+ crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
+ } else {
+ /* TODO: still not 100% sure that async update will succeed ... */
+ ra_data->is_maintenance = maintenance;
+ }
+ free_xml(update);
+}
+
+#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
+ "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
+ XML_GRAPH_TAG_MAINTENANCE
+
+/*!
+ * \internal
+ * \brief Check a pseudo-action holding updates for maintenance state
+ *
+ * \param[in] xml XML of pseudo-action to check
+ */
+
+void
+remote_ra_process_maintenance_nodes(xmlNode *xml)
+{
+ xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
+
+ if (numXpathResults(search) == 1) {
+ xmlNode *node;
+ int cnt = 0, cnt_remote = 0;
+
+ for (node =
+ first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
+ node; node = __xml_next(node)) {
+ lrm_state_t *lrm_state = lrm_state_find(ID(node));
+
+ cnt++;
+ if (lrm_state && lrm_state->remote_ra_data &&
+ ((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
+ cnt_remote++;
+ remote_ra_maintenance(lrm_state,
+ crm_atoi(crm_element_value(node,
+ XML_NODE_IS_MAINTENANCE), "0"));
+
+ }
+ }
+ crm_trace("Action holds %d nodes (%d remotes found) "
+ "adjusting maintenance-mode", cnt, cnt_remote);
+ }
+ freeXpathObject(search);
+}
+
+gboolean
+remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
+{
+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
+
+ return ra_data->is_maintenance;
+}
diff --git a/crmd/te_actions.c b/crmd/te_actions.c
index 5508234..c41d44d 100644
--- a/crmd/te_actions.c
+++ b/crmd/te_actions.c
@@ -53,8 +53,32 @@ te_start_action_timer(crm_graph_t * graph, crm_action_t * action)
static gboolean
te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
{
- /* Check action for Pacemaker Remote node side effects */
- remote_ra_process_pseudo(pseudo->xml);
+ const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
+
+ /* send to peers as well? */
+ if (safe_str_eq(task, CRM_OP_MAINTENANCE_NODES)) {
+ GHashTableIter iter;
+ crm_node_t *node = NULL;
+
+ g_hash_table_iter_init(&iter, crm_peer_cache);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
+ xmlNode *cmd = NULL;
+
+ if (safe_str_eq(fsa_our_uname, node->uname)) {
+ continue;
+ }
+
+ cmd = create_request(task, pseudo->xml, node->uname,
+ CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
+ send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
+ free_xml(cmd);
+ }
+
+ remote_ra_process_maintenance_nodes(pseudo->xml);
+ } else {
+ /* Check action for Pacemaker Remote node side effects */
+ remote_ra_process_pseudo(pseudo->xml);
+ }
crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
diff --git a/include/crm/crm.h b/include/crm/crm.h
index 3f83a91..6afc771 100644
--- a/include/crm/crm.h
+++ b/include/crm/crm.h
@@ -142,6 +142,7 @@ extern char *crm_system_name;
# define CRM_OP_RELAXED_SET "one-or-more"
# define CRM_OP_RELAXED_CLONE "clone-one-or-more"
# define CRM_OP_RM_NODE_CACHE "rm_node_cache"
+# define CRM_OP_MAINTENANCE_NODES "maintenance_nodes"
# define CRMD_JOINSTATE_DOWN "down"
# define CRMD_JOINSTATE_PENDING "pending"
diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h
index 446b39c..e4dc61c 100644
--- a/include/crm/lrmd.h
+++ b/include/crm/lrmd.h
@@ -99,6 +99,7 @@ typedef struct lrmd_key_value_s {
#define LRMD_IPC_OP_RESPONSE "response"
#define LRMD_IPC_OP_SHUTDOWN_REQ "shutdown_req"
#define LRMD_IPC_OP_SHUTDOWN_ACK "shutdown_ack"
+#define LRMD_IPC_OP_SHUTDOWN_NACK "shutdown_nack"
#define F_LRMD_IPC_OP "lrmd_ipc_op"
#define F_LRMD_IPC_IPC_SERVER "lrmd_ipc_server"
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
index 4a3cd1e..3a0c0e8 100644
--- a/include/crm/msg_xml.h
+++ b/include/crm/msg_xml.h
@@ -255,6 +255,7 @@
# define XML_NODE_IS_PEER "crmd"
# define XML_NODE_IS_REMOTE "remote_node"
# define XML_NODE_IS_FENCED "node_fenced"
+# define XML_NODE_IS_MAINTENANCE "node_in_maintenance"
# define XML_CIB_ATTR_SHUTDOWN "shutdown"
# define XML_CIB_ATTR_STONITH "stonith"
@@ -297,6 +298,7 @@
# define XML_GRAPH_TAG_PSEUDO_EVENT "pseudo_event"
# define XML_GRAPH_TAG_CRM_EVENT "crm_event"
# define XML_GRAPH_TAG_DOWNED "downed"
+# define XML_GRAPH_TAG_MAINTENANCE "maintenance"
# define XML_TAG_RULE "rule"
# define XML_RULE_ATTR_SCORE "score"
diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h
index 79e4572..eb401be 100644
--- a/include/crm/pengine/status.h
+++ b/include/crm/pengine/status.h
@@ -160,6 +160,7 @@ struct node_shared_s {
gboolean rsc_discovery_enabled;
gboolean remote_requires_reset;
gboolean remote_was_fenced;
+ gboolean remote_maintenance; /* what the remote-rsc is thinking */
};
struct node_s {
diff --git a/include/crm_internal.h b/include/crm_internal.h
index a498bcb..297e6b3 100644
--- a/include/crm_internal.h
+++ b/include/crm_internal.h
@@ -381,6 +381,7 @@ typedef struct remote_proxy_s {
int remote_proxy_check(lrmd_t *lrmd, GHashTable *hash);
void remote_proxy_cb(lrmd_t *lrmd, const char *node_name, xmlNode *msg);
void remote_proxy_ack_shutdown(lrmd_t *lrmd);
+void remote_proxy_nack_shutdown(lrmd_t *lrmd);
int remote_proxy_dispatch(const char *buffer, ssize_t length, gpointer userdata);
void remote_proxy_disconnected(gpointer data);
diff --git a/lib/lrmd/proxy_common.c b/lib/lrmd/proxy_common.c
index eb17e4e..69cfa8c 100644
--- a/lib/lrmd/proxy_common.c
+++ b/lib/lrmd/proxy_common.c
@@ -59,6 +59,21 @@ remote_proxy_ack_shutdown(lrmd_t *lrmd)
free_xml(msg);
}
+/*!
+ * \brief We're not gonna shutdown as response to
+ * a remote proxy shutdown request.
+ *
+ * \param[in] lrmd Connection to proxy
+ */
+void
+remote_proxy_nack_shutdown(lrmd_t *lrmd)
+{
+ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
+ crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_NACK);
+ lrmd_internal_proxy_send(lrmd, msg);
+ free_xml(msg);
+}
+
void
remote_proxy_relay_event(remote_proxy_t *proxy, xmlNode *msg)
{
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index e6a8f58..ed6ee7f 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -89,16 +89,22 @@ pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
set_bit(node->details->remote_rsc->flags, pe_rsc_failed);
} else if (is_baremetal_remote_node(node)) {
- if(pe_can_fence(data_set, node)) {
- crm_warn("Node %s will be fenced %s", node->details->uname, reason);
+ resource_t *rsc = node->details->remote_rsc;
+
+ if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
+ crm_notice("Not fencing node %s because connection is unmanaged, "
+ "otherwise would %s", node->details->uname, reason);
} else {
- crm_warn("Node %s is unclean %s", node->details->uname, reason);
+ if (pe_can_fence(data_set, node)) {
+ crm_warn("Node %s will be fenced %s", node->details->uname, reason);
+ } else {
+ crm_warn("Node %s is unclean %s", node->details->uname, reason);
+ }
+ node->details->remote_requires_reset = TRUE;
}
node->details->unclean = TRUE;
- node->details->remote_requires_reset = TRUE;
-
} else if (node->details->unclean == FALSE) {
- if(pe_can_fence(data_set, node)) {
+ if (pe_can_fence(data_set, node)) {
crm_warn("Node %s will be fenced %s", node->details->uname, reason);
} else {
crm_warn("Node %s is unclean %s", node->details->uname, reason);
@@ -1163,6 +1169,7 @@ unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
const char *id = NULL;
const char *uname = NULL;
const char *shutdown = NULL;
+ resource_t *rsc = NULL;
GListPtr gIter = NULL;
@@ -1202,6 +1209,10 @@ unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
}
crm_trace("Processing remote node id=%s, uname=%s", id, uname);
+ this_node->details->remote_maintenance =
+ crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0");
+
+ rsc = this_node->details->remote_rsc;
if (this_node->details->remote_requires_reset == FALSE) {
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
@@ -1211,11 +1222,11 @@ unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN);
if (shutdown != NULL && safe_str_neq("0", shutdown)) {
- resource_t *rsc = this_node->details->remote_rsc;
-
crm_info("Node %s is shutting down", this_node->details->uname);
this_node->details->shutdown = TRUE;
- rsc->next_role = RSC_ROLE_STOPPED;
+ if (rsc) {
+ rsc->next_role = RSC_ROLE_STOPPED;
+ }
}
if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) {
@@ -1223,7 +1234,8 @@ unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
this_node->details->standby = TRUE;
}
- if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) {
+ if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance")) ||
+ (rsc && !is_set(rsc->flags, pe_rsc_managed))) {
crm_info("Node %s is in maintenance-mode", this_node->details->uname);
this_node->details->maintenance = TRUE;
}
@@ -2825,7 +2837,7 @@ determine_op_status(
result = PCMK_LRM_OP_NOTSUPPORTED;
break;
- } else if(pe_can_fence(data_set, node) == FALSE
+ } else if (pe_can_fence(data_set, node) == FALSE
&& safe_str_eq(task, CRMD_ACTION_STOP)) {
/* If a stop fails and we can't fence, there's nothing else we can do */
pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
index 2b53999..0ce5c53 100644
--- a/lib/pengine/utils.c
+++ b/lib/pengine/utils.c
@@ -824,20 +824,28 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container,
* 2. start - a start failure indicates that an active connection does not already
* exist. The user can set op on-fail=fence if they really want to fence start
* failures. */
- } else if (value == NULL &&
- is_rsc_baremetal_remote_node(action->rsc, data_set) &&
+ } else if (((value == NULL) || !is_set(action->rsc->flags, pe_rsc_managed)) &&
+ (is_rsc_baremetal_remote_node(action->rsc, data_set) &&
!(safe_str_eq(action->task, CRMD_ACTION_STATUS) && interval == 0) &&
- (safe_str_neq(action->task, CRMD_ACTION_START))) {
+ (safe_str_neq(action->task, CRMD_ACTION_START)))) {
- if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
- value = "fence baremetal remote node (default)";
- } else {
- value = "recover baremetal remote node connection (default)";
- }
- if (action->rsc->remote_reconnect_interval) {
+ if (!is_set(action->rsc->flags, pe_rsc_managed)) {
+ action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
+ value = "stop unmanaged baremetal remote node (enforcing default)";
+
+ } else {
+ if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
+ value = "fence baremetal remote node (default)";
+ } else {
+ value = "recover baremetal remote node connection (default)";
+ }
+
+ if (action->rsc->remote_reconnect_interval) {
+ action->fail_role = RSC_ROLE_STOPPED;
+ }
+ action->on_fail = action_fail_reset_remote;
}
- action->on_fail = action_fail_reset_remote;
} else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) {
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
diff --git a/lrmd/ipc_proxy.c b/lrmd/ipc_proxy.c
index 07c13ab..5d6ab34 100644
--- a/lrmd/ipc_proxy.c
+++ b/lrmd/ipc_proxy.c
@@ -164,6 +164,11 @@ ipc_proxy_forward_client(crm_client_t *ipc_proxy, xmlNode *xml)
return;
}
+ if (safe_str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_NACK)) {
+ handle_shutdown_nack();
+ return;
+ }
+
ipc_client = crm_client_get_by_id(session);
if (ipc_client == NULL) {
xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
diff --git a/lrmd/lrmd_private.h b/lrmd/lrmd_private.h
index 62e9c84..5579b92 100644
--- a/lrmd/lrmd_private.h
+++ b/lrmd/lrmd_private.h
@@ -85,6 +85,8 @@ void free_rsc(gpointer data);
void handle_shutdown_ack(void);
+void handle_shutdown_nack(void);
+
void lrmd_client_destroy(crm_client_t *client);
void client_disconnect_cleanup(const char *client_id);
diff --git a/lrmd/main.c b/lrmd/main.c
index e3d3aaa..a3aa08f 100644
--- a/lrmd/main.c
+++ b/lrmd/main.c
@@ -364,6 +364,7 @@ void handle_shutdown_ack()
crm_info("Received shutdown ack");
if (shutdown_ack_timer > 0) {
g_source_remove(shutdown_ack_timer);
+ shutdown_ack_timer = 0;
}
return;
}
@@ -371,6 +372,29 @@ void handle_shutdown_ack()
crm_debug("Ignoring unexpected shutdown ack");
}
+/*!
+ * \internal
+ * \brief Make short exit timer fire immediately
+ */
+void handle_shutdown_nack()
+{
+#ifdef ENABLE_PCMK_REMOTE
+ if (shutting_down) {
+ crm_info("Received shutdown nack");
+ if (shutdown_ack_timer > 0) {
+ GSource *timer =
+ g_main_context_find_source_by_id(NULL, shutdown_ack_timer);
+
+ if (timer != NULL) {
+ g_source_set_ready_time(timer, 0);
+ }
+ }
+ return;
+ }
+#endif
+ crm_debug("Ignoring unexpected shutdown nack");
+}
+
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
/* Top-level Options */
diff --git a/pengine/allocate.c b/pengine/allocate.c
index 9a87816..7562253 100644
--- a/pengine/allocate.c
+++ b/pengine/allocate.c
@@ -2150,6 +2150,9 @@ stage8(pe_working_set_t * data_set)
crm_log_xml_trace(data_set->graph, "created resource-driven action list");
+ /* pseudo action to distribute list of nodes with maintenance state update */
+ add_maintenance_update(data_set);
+
/* catch any non-resource specific actions */
crm_trace("processing non-resource actions");
diff --git a/pengine/graph.c b/pengine/graph.c
index 81d8355..5ba60f7 100644
--- a/pengine/graph.c
+++ b/pengine/graph.c
@@ -788,13 +788,15 @@ get_router_node(action_t *action)
* \param[in] id Node UUID to add
* \param[in,out] xml Parent XML tag to add to
*/
-static void
+static xmlNode*
add_node_to_xml_by_id(const char *id, xmlNode *xml)
{
xmlNode *node_xml;
node_xml = create_xml_node(xml, XML_CIB_TAG_NODE);
crm_xml_add(node_xml, XML_ATTR_UUID, id);
+
+ return node_xml;
}
/*!
@@ -812,6 +814,62 @@ add_node_to_xml(const node_t *node, void *xml)
/*!
* \internal
+ * \brief Add XML with nodes that need an update of their maintenance state
+ *
+ * \param[in,out] xml Parent XML tag to add to
+ * \param[in] data_set Working set for cluster
+ */
+static int
+add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set)
+{
+ GListPtr gIter = NULL;
+ xmlNode *maintenance =
+ xml?create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE):NULL;
+ int count = 0;
+
+ for (gIter = data_set->nodes; gIter != NULL;
+ gIter = gIter->next) {
+ node_t *node = (node_t *) gIter->data;
+ struct node_shared_s *details = node->details;
+
+ if (!(is_remote_node(node))) {
+ continue; /* just remote nodes need to know atm */
+ }
+
+ if (details->maintenance != details->remote_maintenance) {
+ if (maintenance) {
+ crm_xml_add(
+ add_node_to_xml_by_id(node->details->id, maintenance),
+ XML_NODE_IS_MAINTENANCE, details->maintenance?"1":"0");
+ }
+ count++;
+ }
+ }
+ crm_trace("%s %d nodes to adjust maintenance-mode "
+ "to transition", maintenance?"Added":"Counted", count);
+ return count;
+}
+
+/*!
+ * \internal
+ * \brief Add pseudo action with nodes needing maintenance state update
+ *
+ * \param[in,out] data_set Working set for cluster
+ */
+void
+add_maintenance_update(pe_working_set_t *data_set)
+{
+ action_t *action = NULL;
+
+ if (add_maintenance_nodes(NULL, data_set)) {
+ crm_trace("adding maintenance state update pseudo action");
+ action = get_pseudo_op(CRM_OP_MAINTENANCE_NODES, data_set);
+ set_bit(action->flags, pe_action_print_always);
+ }
+}
+
+/*!
+ * \internal
* \brief Add XML with nodes that an action is expected to bring down
*
* If a specified action is expected to bring any nodes down, add an XML block
@@ -874,6 +932,7 @@ static xmlNode *
action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
{
gboolean needs_node_info = TRUE;
+ gboolean needs_maintenance_info = FALSE;
xmlNode *action_xml = NULL;
xmlNode *args_xml = NULL;
@@ -901,6 +960,9 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
/* action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); */
} else if (is_set(action->flags, pe_action_pseudo)) {
+ if (safe_str_eq(action->task, CRM_OP_MAINTENANCE_NODES)) {
+ needs_maintenance_info = TRUE;
+ }
action_xml = create_xml_node(NULL, XML_GRAPH_TAG_PSEUDO_EVENT);
needs_node_info = FALSE;
@@ -1082,6 +1144,10 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
add_downed_nodes(action_xml, action, data_set);
}
+ if (needs_maintenance_info) {
+ add_maintenance_nodes(action_xml, data_set);
+ }
+
crm_log_xml_trace(action_xml, "dumped action");
return action_xml;
}
diff --git a/pengine/pengine.h b/pengine/pengine.h
index 5500819..e3f4874 100644
--- a/pengine/pengine.h
+++ b/pengine/pengine.h
@@ -145,6 +145,7 @@ extern int new_rsc_order(resource_t * lh_rsc, const char *lh_task,
new_rsc_order(rsc1, CRMD_ACTION_STOP, rsc2, CRMD_ACTION_STOP, type, data_set)
extern void graph_element_from_action(action_t * action, pe_working_set_t * data_set);
+extern void add_maintenance_update(pe_working_set_t *data_set);
extern gboolean show_scores;
extern int scores_log_level;
--
1.8.3.1