Blame SOURCES/035-unmanaged-remotes.patch

60de42
From 0113ff6fb6bb576356d201cf698b98455dbf5180 Mon Sep 17 00:00:00 2001
60de42
From: Klaus Wenninger <klaus.wenninger@aon.at>
60de42
Date: Wed, 21 Dec 2016 18:08:40 +0100
60de42
Subject: [PATCH] Fix: pacemaker-remote: pacemaker_remoted shutdown while
60de42
 unmanaged
60de42
60de42
Since introduction of the graceful shutdown of pacemaker_remoted
60de42
the shutdown is hanging if the remote-resource is unmanaged.
60de42
This happens as pacemaker_remoted is waiting for all resources
60de42
running on the remote-node to be shut down and pacemaker
60de42
on the other hand doesn't touch resources on a remote-node
60de42
when the remote-resource is unmanaged.
60de42
60de42
Fixes rhbz#1388102
60de42
---
60de42
 crmd/crmd_lrm.h              |   2 +
60de42
 crmd/lrm_state.c             |  20 ++++---
60de42
 crmd/messages.c              |   5 ++
60de42
 crmd/remote_lrmd_ra.c        | 124 ++++++++++++++++++++++++++++++++++++++++---
60de42
 crmd/te_actions.c            |  28 +++++++++-
60de42
 include/crm/crm.h            |   1 +
60de42
 include/crm/lrmd.h           |   1 +
60de42
 include/crm/msg_xml.h        |   2 +
60de42
 include/crm/pengine/status.h |   1 +
60de42
 include/crm_internal.h       |   1 +
60de42
 lib/lrmd/proxy_common.c      |  15 ++++++
60de42
 lib/pengine/unpack.c         |  34 ++++++++----
60de42
 lib/pengine/utils.c          |  28 ++++++----
60de42
 lrmd/ipc_proxy.c             |   5 ++
60de42
 lrmd/lrmd_private.h          |   2 +
60de42
 lrmd/main.c                  |  24 +++++++++
60de42
 pengine/allocate.c           |   3 ++
60de42
 pengine/graph.c              |  68 +++++++++++++++++++++++-
60de42
 pengine/pengine.h            |   1 +
60de42
 19 files changed, 329 insertions(+), 36 deletions(-)
60de42
60de42
diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h
60de42
index c6373f1..64d80c4 100644
60de42
--- a/crmd/crmd_lrm.h
60de42
+++ b/crmd/crmd_lrm.h
60de42
@@ -162,5 +162,7 @@ int remote_ra_exec(lrm_state_t * lrm_state, const char *rsc_id, const char *acti
60de42
 void remote_ra_cleanup(lrm_state_t * lrm_state);
60de42
 void remote_ra_fail(const char *node_name);
60de42
 void remote_ra_process_pseudo(xmlNode *xml);
60de42
+gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state);
60de42
+void remote_ra_process_maintenance_nodes(xmlNode *xml);
60de42
 
60de42
 gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending);
60de42
diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c
60de42
index 7b4379b..d55755e 100644
60de42
--- a/crmd/lrm_state.c
60de42
+++ b/crmd/lrm_state.c
60de42
@@ -508,14 +508,22 @@ crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg)
60de42
         crm_notice("%s requested shutdown of its remote connection",
60de42
                    lrm_state->node_name);
60de42
60de42
-        now_s = crm_itoa(now);
60de42
-        update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE);
60de42
-        free(now_s);
60de42
+        if (!remote_ra_is_in_maintenance(lrm_state)) {
60de42
+            now_s = crm_itoa(now);
60de42
+            update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE);
60de42
+            free(now_s);
60de42
60de42
-        remote_proxy_ack_shutdown(lrmd);
60de42
+            remote_proxy_ack_shutdown(lrmd);
60de42
60de42
-        crm_warn("Reconnection attempts to %s may result in failures that must be cleared",
60de42
-                 lrm_state->node_name);
60de42
+            crm_warn("Reconnection attempts to %s may result in failures that must be cleared",
60de42
+                    lrm_state->node_name);
60de42
+        } else {
60de42
+            remote_proxy_nack_shutdown(lrmd);
60de42
+
60de42
+            crm_notice("Remote resource for %s is not managed so no ordered shutdown happening",
60de42
+                    lrm_state->node_name);
60de42
+        }
60de42
+        return;
60de42
60de42
     } else if (safe_str_eq(op, LRMD_IPC_OP_REQUEST) && proxy->is_local) {
60de42
         /* this is for the crmd, which we are, so don't try
60de42
diff --git a/crmd/messages.c b/crmd/messages.c
60de42
index 87d0acf..c79d96e 100644
60de42
--- a/crmd/messages.c
60de42
+++ b/crmd/messages.c
60de42
@@ -872,6 +872,11 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause)
60de42
             reap_crm_member(id, name);
60de42
         }
60de42
 
60de42
+    } else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
60de42
+        xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA);
60de42
+
60de42
+        remote_ra_process_maintenance_nodes(xml);
60de42
+
60de42
     } else {
60de42
         crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
60de42
         crm_log_xml_err(stored_msg, "Unexpected");
60de42
diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c
60de42
index e68d784..8085219 100644
60de42
--- a/crmd/remote_lrmd_ra.c
60de42
+++ b/crmd/remote_lrmd_ra.c
60de42
@@ -80,6 +80,10 @@ typedef struct remote_ra_data_s {
60de42
     enum remote_migration_status migrate_status;
60de42
 
60de42
     gboolean active;
60de42
+    gboolean is_maintenance; /* kind of complex to determine from crmd-context
60de42
+                              * so we have it signalled back with the
60de42
+                              * transition from pengine
60de42
+                              */
60de42
 } remote_ra_data_t;
60de42
 
60de42
 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
60de42
@@ -485,6 +489,28 @@ monitor_timeout_cb(gpointer data)
60de42
     return FALSE;
60de42
 }
60de42
 
60de42
+static void
60de42
+synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
60de42
+{
60de42
+    lrmd_event_data_t op = { 0, };
60de42
+
60de42
+    if (lrm_state == NULL) {
60de42
+        /* if lrm_state not given assume local */
60de42
+        lrm_state = lrm_state_find(fsa_our_uname);
60de42
+    }
60de42
+    CRM_ASSERT(lrm_state != NULL);
60de42
+
60de42
+    op.type = lrmd_event_exec_complete;
60de42
+    op.rsc_id = rsc_id;
60de42
+    op.op_type = op_type;
60de42
+    op.rc = PCMK_OCF_OK;
60de42
+    op.op_status = PCMK_LRM_OP_DONE;
60de42
+    op.t_run = time(NULL);
60de42
+    op.t_rcchange = op.t_run;
60de42
+    op.call_id = generate_callid();
60de42
+    process_lrm_event(lrm_state, &op, NULL);
60de42
+}
60de42
+
60de42
 void
60de42
 remote_lrm_op_callback(lrmd_event_data_t * op)
60de42
 {
60de42
@@ -536,9 +562,18 @@ remote_lrm_op_callback(lrmd_event_data_t * op)
60de42
         (ra_data->cur_cmd == NULL) &&
60de42
         (ra_data->active == TRUE)) {
60de42
 
60de42
-        crm_err("Unexpected disconnect on remote-node %s", lrm_state->node_name);
60de42
-        ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
60de42
-        ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
60de42
+        if (!remote_ra_is_in_maintenance(lrm_state)) {
60de42
+            crm_err("Unexpected disconnect on remote-node %s", lrm_state->node_name);
60de42
+            ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
60de42
+            ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
60de42
+        } else {
60de42
+            crm_notice("Disconnect on unmanaged remote-node %s", lrm_state->node_name);
60de42
+            /* Do roughly what a 'stop' on the remote-resource would do */
60de42
+            handle_remote_ra_stop(lrm_state, NULL);
60de42
+            remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
60de42
+            /* now fake the reply of a successful 'stop' */
60de42
+            synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
60de42
+        }
60de42
         return;
60de42
     }
60de42
 
60de42
@@ -651,8 +686,6 @@ handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
60de42
 
60de42
     ra_data->active = FALSE;
60de42
     lrm_state_disconnect(lrm_state);
60de42
-    cmd->rc = PCMK_OCF_OK;
60de42
-    cmd->op_status = PCMK_LRM_OP_DONE;
60de42
 
60de42
     if (ra_data->cmds) {
60de42
         g_list_free_full(ra_data->cmds, free_cmd);
60de42
@@ -664,7 +697,12 @@ handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
60de42
     ra_data->recurring_cmds = NULL;
60de42
     ra_data->cur_cmd = NULL;
60de42
 
60de42
-    report_remote_ra_result(cmd);
60de42
+    if (cmd) {
60de42
+        cmd->rc = PCMK_OCF_OK;
60de42
+        cmd->op_status = PCMK_LRM_OP_DONE;
60de42
+
60de42
+        report_remote_ra_result(cmd);
60de42
+    }
60de42
 }
60de42
 
60de42
 static int
60de42
@@ -1140,3 +1178,77 @@ remote_ra_process_pseudo(xmlNode *xml)
60de42
     }
60de42
     freeXpathObject(search);
60de42
 }
60de42
+
60de42
+static void
60de42
+remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
60de42
+{
60de42
+    remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
60de42
+    xmlNode *update, *state;
60de42
+    int call_opt, call_id = 0;
60de42
+    crm_node_t *node;
60de42
+
60de42
+    call_opt = crmd_cib_smart_opt();
60de42
+    node = crm_remote_peer_get(lrm_state->node_name);
60de42
+    CRM_CHECK(node != NULL, return);
60de42
+    update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
60de42
+    state = create_node_state_update(node, node_update_none, update,
60de42
+                                     __FUNCTION__);
60de42
+    crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
60de42
+    fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
60de42
+    if (call_id < 0) {
60de42
+        crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
60de42
+    } else {
60de42
+        /* TODO: still not 100% sure that async update will succeed ... */
60de42
+        ra_data->is_maintenance = maintenance;
60de42
+    }
60de42
+    free_xml(update);
60de42
+}
60de42
+
60de42
+#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
60de42
+    "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
60de42
+    XML_GRAPH_TAG_MAINTENANCE
60de42
+
60de42
+/*!
60de42
+ * \internal
60de42
+ * \brief Check a pseudo-action holding updates for maintenance state
60de42
+ *
60de42
+ * \param[in] xml  XML of pseudo-action to check
60de42
+ */
60de42
+
60de42
+void
60de42
+remote_ra_process_maintenance_nodes(xmlNode *xml)
60de42
+{
60de42
+    xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
60de42
+
60de42
+    if (numXpathResults(search) == 1) {
60de42
+        xmlNode *node;
60de42
+        int cnt = 0, cnt_remote = 0;
60de42
+
60de42
+        for (node =
60de42
+                first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
60de42
+            node; node = __xml_next(node)) {
60de42
+            lrm_state_t *lrm_state = lrm_state_find(ID(node));
60de42
+
60de42
+            cnt++;
60de42
+            if (lrm_state && lrm_state->remote_ra_data &&
60de42
+                ((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
60de42
+                cnt_remote++;
60de42
+                remote_ra_maintenance(lrm_state,
60de42
+                                        crm_atoi(crm_element_value(node,
60de42
+                                            XML_NODE_IS_MAINTENANCE), "0"));
60de42
+
60de42
+            }
60de42
+        }
60de42
+        crm_trace("Action holds %d nodes (%d remotes found) "
60de42
+                    "adjusting maintenance-mode", cnt, cnt_remote);
60de42
+    }
60de42
+    freeXpathObject(search);
60de42
+}
60de42
+
60de42
+gboolean
60de42
+remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
60de42
+{
60de42
+    remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
60de42
+
60de42
+    return ra_data->is_maintenance;
60de42
+}
60de42
diff --git a/crmd/te_actions.c b/crmd/te_actions.c
60de42
index 5508234..c41d44d 100644
60de42
--- a/crmd/te_actions.c
60de42
+++ b/crmd/te_actions.c
60de42
@@ -53,8 +53,32 @@ te_start_action_timer(crm_graph_t * graph, crm_action_t * action)
60de42
 static gboolean
60de42
 te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
60de42
 {
60de42
-    /* Check action for Pacemaker Remote node side effects */
60de42
-    remote_ra_process_pseudo(pseudo->xml);
60de42
+    const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
60de42
+
60de42
+    /* send to peers as well? */
60de42
+    if (safe_str_eq(task, CRM_OP_MAINTENANCE_NODES)) {
60de42
+        GHashTableIter iter;
60de42
+        crm_node_t *node = NULL;
60de42
+
60de42
+        g_hash_table_iter_init(&iter, crm_peer_cache);
60de42
+        while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
60de42
+            xmlNode *cmd = NULL;
60de42
+
60de42
+            if (safe_str_eq(fsa_our_uname, node->uname)) {
60de42
+                continue;
60de42
+            }
60de42
+
60de42
+            cmd = create_request(task, pseudo->xml, node->uname,
60de42
+                                 CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
60de42
+            send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
60de42
+            free_xml(cmd);
60de42
+        }
60de42
+
60de42
+        remote_ra_process_maintenance_nodes(pseudo->xml);
60de42
+    } else {
60de42
+        /* Check action for Pacemaker Remote node side effects */
60de42
+        remote_ra_process_pseudo(pseudo->xml);
60de42
+    }
60de42
 
60de42
     crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
60de42
               crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
60de42
diff --git a/include/crm/crm.h b/include/crm/crm.h
60de42
index 3f83a91..6afc771 100644
60de42
--- a/include/crm/crm.h
60de42
+++ b/include/crm/crm.h
60de42
@@ -142,6 +142,7 @@ extern char *crm_system_name;
60de42
 #  define CRM_OP_RELAXED_SET  "one-or-more"
60de42
 #  define CRM_OP_RELAXED_CLONE  "clone-one-or-more"
60de42
 #  define CRM_OP_RM_NODE_CACHE "rm_node_cache"
60de42
+#  define CRM_OP_MAINTENANCE_NODES "maintenance_nodes"
60de42
 
60de42
 #  define CRMD_JOINSTATE_DOWN           "down"
60de42
 #  define CRMD_JOINSTATE_PENDING        "pending"
60de42
diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h
60de42
index 446b39c..e4dc61c 100644
60de42
--- a/include/crm/lrmd.h
60de42
+++ b/include/crm/lrmd.h
60de42
@@ -99,6 +99,7 @@ typedef struct lrmd_key_value_s {
60de42
 #define LRMD_IPC_OP_RESPONSE      "response"
60de42
 #define LRMD_IPC_OP_SHUTDOWN_REQ  "shutdown_req"
60de42
 #define LRMD_IPC_OP_SHUTDOWN_ACK  "shutdown_ack"
60de42
+#define LRMD_IPC_OP_SHUTDOWN_NACK "shutdown_nack"
60de42
 
60de42
 #define F_LRMD_IPC_OP           "lrmd_ipc_op"
60de42
 #define F_LRMD_IPC_IPC_SERVER   "lrmd_ipc_server"
60de42
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
60de42
index 4a3cd1e..3a0c0e8 100644
60de42
--- a/include/crm/msg_xml.h
60de42
+++ b/include/crm/msg_xml.h
60de42
@@ -255,6 +255,7 @@
60de42
 #  define XML_NODE_IS_PEER    	"crmd"
60de42
 #  define XML_NODE_IS_REMOTE    	"remote_node"
60de42
 #  define XML_NODE_IS_FENCED		"node_fenced"
60de42
+#  define XML_NODE_IS_MAINTENANCE   "node_in_maintenance"
60de42
 
60de42
 #  define XML_CIB_ATTR_SHUTDOWN       	"shutdown"
60de42
 #  define XML_CIB_ATTR_STONITH	    	"stonith"
60de42
@@ -297,6 +298,7 @@
60de42
 #  define XML_GRAPH_TAG_PSEUDO_EVENT	"pseudo_event"
60de42
 #  define XML_GRAPH_TAG_CRM_EVENT	"crm_event"
60de42
 #  define XML_GRAPH_TAG_DOWNED            "downed"
60de42
+#  define XML_GRAPH_TAG_MAINTENANCE       "maintenance"
60de42
 
60de42
 #  define XML_TAG_RULE			"rule"
60de42
 #  define XML_RULE_ATTR_SCORE		"score"
60de42
diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h
60de42
index 79e4572..eb401be 100644
60de42
--- a/include/crm/pengine/status.h
60de42
+++ b/include/crm/pengine/status.h
60de42
@@ -160,6 +160,7 @@ struct node_shared_s {
60de42
     gboolean rsc_discovery_enabled;
60de42
     gboolean remote_requires_reset;
60de42
     gboolean remote_was_fenced;
60de42
+    gboolean remote_maintenance; /* what the remote-rsc is thinking */
60de42
 };
60de42
 
60de42
 struct node_s {
60de42
diff --git a/include/crm_internal.h b/include/crm_internal.h
60de42
index a498bcb..297e6b3 100644
60de42
--- a/include/crm_internal.h
60de42
+++ b/include/crm_internal.h
60de42
@@ -381,6 +381,7 @@ typedef struct remote_proxy_s {
60de42
 int  remote_proxy_check(lrmd_t *lrmd, GHashTable *hash);
60de42
 void remote_proxy_cb(lrmd_t *lrmd, const char *node_name, xmlNode *msg);
60de42
 void remote_proxy_ack_shutdown(lrmd_t *lrmd);
60de42
+void remote_proxy_nack_shutdown(lrmd_t *lrmd);
60de42
 
60de42
 int  remote_proxy_dispatch(const char *buffer, ssize_t length, gpointer userdata);
60de42
 void remote_proxy_disconnected(gpointer data);
60de42
diff --git a/lib/lrmd/proxy_common.c b/lib/lrmd/proxy_common.c
60de42
index eb17e4e..69cfa8c 100644
60de42
--- a/lib/lrmd/proxy_common.c
60de42
+++ b/lib/lrmd/proxy_common.c
60de42
@@ -59,6 +59,21 @@ remote_proxy_ack_shutdown(lrmd_t *lrmd)
60de42
     free_xml(msg);
60de42
 }
60de42
 
60de42
+/*!
60de42
+ * \brief We're not gonna shutdown as response to
60de42
+ *        a remote proxy shutdown request.
60de42
+ *
60de42
+ * \param[in] lrmd  Connection to proxy
60de42
+ */
60de42
+void
60de42
+remote_proxy_nack_shutdown(lrmd_t *lrmd)
60de42
+{
60de42
+    xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
60de42
+    crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_NACK);
60de42
+    lrmd_internal_proxy_send(lrmd, msg);
60de42
+    free_xml(msg);
60de42
+}
60de42
+
60de42
 void
60de42
 remote_proxy_relay_event(remote_proxy_t *proxy, xmlNode *msg)
60de42
 {
60de42
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
60de42
index e6a8f58..ed6ee7f 100644
60de42
--- a/lib/pengine/unpack.c
60de42
+++ b/lib/pengine/unpack.c
60de42
@@ -89,16 +89,22 @@ pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
60de42
         set_bit(node->details->remote_rsc->flags, pe_rsc_failed);
60de42
 
60de42
     } else if (is_baremetal_remote_node(node)) {
60de42
-        if(pe_can_fence(data_set, node)) {
60de42
-            crm_warn("Node %s will be fenced %s", node->details->uname, reason);
60de42
+        resource_t *rsc = node->details->remote_rsc;
60de42
+
60de42
+        if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
60de42
+            crm_notice("Not fencing node %s because connection is unmanaged, "
60de42
+                       "otherwise would %s", node->details->uname, reason);
60de42
         } else {
60de42
-            crm_warn("Node %s is unclean %s", node->details->uname, reason);
60de42
+            if (pe_can_fence(data_set, node)) {
60de42
+                crm_warn("Node %s will be fenced %s", node->details->uname, reason);
60de42
+            } else {
60de42
+                crm_warn("Node %s is unclean %s", node->details->uname, reason);
60de42
+            }
60de42
+            node->details->remote_requires_reset = TRUE;
60de42
         }
60de42
         node->details->unclean = TRUE;
60de42
-        node->details->remote_requires_reset = TRUE;
60de42
-
60de42
     } else if (node->details->unclean == FALSE) {
60de42
-        if(pe_can_fence(data_set, node)) {
60de42
+        if (pe_can_fence(data_set, node)) {
60de42
             crm_warn("Node %s will be fenced %s", node->details->uname, reason);
60de42
         } else {
60de42
             crm_warn("Node %s is unclean %s", node->details->uname, reason);
60de42
@@ -1163,6 +1169,7 @@ unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
60de42
     const char *id = NULL;
60de42
     const char *uname = NULL;
60de42
     const char *shutdown = NULL;
60de42
+    resource_t *rsc = NULL;
60de42
 
60de42
     GListPtr gIter = NULL;
60de42
 
60de42
@@ -1202,6 +1209,10 @@ unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
60de42
         }
60de42
         crm_trace("Processing remote node id=%s, uname=%s", id, uname);
60de42
 
60de42
+        this_node->details->remote_maintenance =
60de42
+            crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0");
60de42
+
60de42
+        rsc = this_node->details->remote_rsc;
60de42
         if (this_node->details->remote_requires_reset == FALSE) {
60de42
             this_node->details->unclean = FALSE;
60de42
             this_node->details->unseen = FALSE;
60de42
@@ -1211,11 +1222,11 @@ unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
60de42
 
60de42
         shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN);
60de42
         if (shutdown != NULL && safe_str_neq("0", shutdown)) {
60de42
-            resource_t *rsc = this_node->details->remote_rsc;
60de42
-
60de42
             crm_info("Node %s is shutting down", this_node->details->uname);
60de42
             this_node->details->shutdown = TRUE;
60de42
-            rsc->next_role = RSC_ROLE_STOPPED;
60de42
+            if (rsc) {
60de42
+                rsc->next_role = RSC_ROLE_STOPPED;
60de42
+            }
60de42
         }
60de42
  
60de42
         if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) {
60de42
@@ -1223,7 +1234,8 @@ unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
60de42
             this_node->details->standby = TRUE;
60de42
         }
60de42
 
60de42
-        if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) {
60de42
+        if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance")) ||
60de42
+            (rsc && !is_set(rsc->flags, pe_rsc_managed))) {
60de42
             crm_info("Node %s is in maintenance-mode", this_node->details->uname);
60de42
             this_node->details->maintenance = TRUE;
60de42
         }
60de42
@@ -2825,7 +2837,7 @@ determine_op_status(
60de42
                 result = PCMK_LRM_OP_NOTSUPPORTED;
60de42
                 break;
60de42
 
60de42
-            } else if(pe_can_fence(data_set, node) == FALSE
60de42
+            } else if (pe_can_fence(data_set, node) == FALSE
60de42
                && safe_str_eq(task, CRMD_ACTION_STOP)) {
60de42
                 /* If a stop fails and we can't fence, there's nothing else we can do */
60de42
                 pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
60de42
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
60de42
index 2b53999..0ce5c53 100644
60de42
--- a/lib/pengine/utils.c
60de42
+++ b/lib/pengine/utils.c
60de42
@@ -824,20 +824,28 @@ unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container,
60de42
      * 2. start - a start failure indicates that an active connection does not already
60de42
      * exist. The user can set op on-fail=fence if they really want to fence start
60de42
      * failures. */
60de42
-    } else if (value == NULL &&
60de42
-               is_rsc_baremetal_remote_node(action->rsc, data_set) &&
60de42
+    } else if (((value == NULL) || !is_set(action->rsc->flags, pe_rsc_managed)) &&
60de42
+                (is_rsc_baremetal_remote_node(action->rsc, data_set) &&
60de42
                !(safe_str_eq(action->task, CRMD_ACTION_STATUS) && interval == 0) &&
60de42
-                (safe_str_neq(action->task, CRMD_ACTION_START))) {
60de42
+                (safe_str_neq(action->task, CRMD_ACTION_START)))) {
60de42
 
60de42
-        if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
60de42
-            value = "fence baremetal remote node (default)";
60de42
-        } else {
60de42
-            value = "recover baremetal remote node connection (default)";
60de42
-        }
60de42
-        if (action->rsc->remote_reconnect_interval) {
60de42
+        if (!is_set(action->rsc->flags, pe_rsc_managed)) {
60de42
+            action->on_fail = action_fail_stop;
60de42
             action->fail_role = RSC_ROLE_STOPPED;
60de42
+            value = "stop unmanaged baremetal remote node (enforcing default)";
60de42
+
60de42
+        } else {
60de42
+            if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
60de42
+                value = "fence baremetal remote node (default)";
60de42
+            } else {
60de42
+                value = "recover baremetal remote node connection (default)";
60de42
+            }
60de42
+
60de42
+            if (action->rsc->remote_reconnect_interval) {
60de42
+                action->fail_role = RSC_ROLE_STOPPED;
60de42
+            }
60de42
+            action->on_fail = action_fail_reset_remote;
60de42
         }
60de42
-        action->on_fail = action_fail_reset_remote;
60de42
 
60de42
     } else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) {
60de42
         if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
60de42
diff --git a/lrmd/ipc_proxy.c b/lrmd/ipc_proxy.c
60de42
index 07c13ab..5d6ab34 100644
60de42
--- a/lrmd/ipc_proxy.c
60de42
+++ b/lrmd/ipc_proxy.c
60de42
@@ -164,6 +164,11 @@ ipc_proxy_forward_client(crm_client_t *ipc_proxy, xmlNode *xml)
60de42
         return;
60de42
     }
60de42
 
60de42
+    if (safe_str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_NACK)) {
60de42
+        handle_shutdown_nack();
60de42
+        return;
60de42
+    }
60de42
+
60de42
     ipc_client = crm_client_get_by_id(session);
60de42
     if (ipc_client == NULL) {
60de42
         xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY);
60de42
diff --git a/lrmd/lrmd_private.h b/lrmd/lrmd_private.h
60de42
index 62e9c84..5579b92 100644
60de42
--- a/lrmd/lrmd_private.h
60de42
+++ b/lrmd/lrmd_private.h
60de42
@@ -85,6 +85,8 @@ void free_rsc(gpointer data);
60de42
 
60de42
 void handle_shutdown_ack(void);
60de42
 
60de42
+void handle_shutdown_nack(void);
60de42
+
60de42
 void lrmd_client_destroy(crm_client_t *client);
60de42
 
60de42
 void client_disconnect_cleanup(const char *client_id);
60de42
diff --git a/lrmd/main.c b/lrmd/main.c
60de42
index e3d3aaa..a3aa08f 100644
60de42
--- a/lrmd/main.c
60de42
+++ b/lrmd/main.c
60de42
@@ -364,6 +364,7 @@ void handle_shutdown_ack()
60de42
         crm_info("Received shutdown ack");
60de42
         if (shutdown_ack_timer > 0) {
60de42
             g_source_remove(shutdown_ack_timer);
60de42
+            shutdown_ack_timer = 0;
60de42
         }
60de42
         return;
60de42
     }
60de42
@@ -371,6 +372,29 @@ void handle_shutdown_ack()
60de42
     crm_debug("Ignoring unexpected shutdown ack");
60de42
 }
60de42
 
60de42
+/*!
60de42
+ * \internal
60de42
+ * \brief Make short exit timer fire immediately
60de42
+ */
60de42
+void handle_shutdown_nack()
60de42
+{
60de42
+#ifdef ENABLE_PCMK_REMOTE
60de42
+    if (shutting_down) {
60de42
+        crm_info("Received shutdown nack");
60de42
+        if (shutdown_ack_timer > 0) {
60de42
+            GSource *timer =
60de42
+                g_main_context_find_source_by_id(NULL, shutdown_ack_timer);
60de42
+
60de42
+            if (timer != NULL) {
60de42
+                g_source_set_ready_time(timer, 0);
60de42
+            }
60de42
+        }
60de42
+        return;
60de42
+    }
60de42
+#endif
60de42
+    crm_debug("Ignoring unexpected shutdown nack");
60de42
+}
60de42
+
60de42
 /* *INDENT-OFF* */
60de42
 static struct crm_option long_options[] = {
60de42
     /* Top-level Options */
60de42
diff --git a/pengine/allocate.c b/pengine/allocate.c
60de42
index 9a87816..7562253 100644
60de42
--- a/pengine/allocate.c
60de42
+++ b/pengine/allocate.c
60de42
@@ -2150,6 +2150,9 @@ stage8(pe_working_set_t * data_set)
60de42
 
60de42
     crm_log_xml_trace(data_set->graph, "created resource-driven action list");
60de42
 
60de42
+    /* pseudo action to distribute list of nodes with maintenance state update */
60de42
+    add_maintenance_update(data_set);
60de42
+
60de42
     /* catch any non-resource specific actions */
60de42
     crm_trace("processing non-resource actions");
60de42
 
60de42
diff --git a/pengine/graph.c b/pengine/graph.c
60de42
index 81d8355..5ba60f7 100644
60de42
--- a/pengine/graph.c
60de42
+++ b/pengine/graph.c
60de42
@@ -788,13 +788,15 @@ get_router_node(action_t *action)
60de42
  * \param[in]     id      Node UUID to add
60de42
  * \param[in,out] xml     Parent XML tag to add to
60de42
  */
60de42
-static void
60de42
+static xmlNode*
60de42
 add_node_to_xml_by_id(const char *id, xmlNode *xml)
60de42
 {
60de42
     xmlNode *node_xml;
60de42
 
60de42
     node_xml = create_xml_node(xml, XML_CIB_TAG_NODE);
60de42
     crm_xml_add(node_xml, XML_ATTR_UUID, id);
60de42
+
60de42
+    return node_xml;
60de42
 }
60de42
 
60de42
 /*!
60de42
@@ -812,6 +814,62 @@ add_node_to_xml(const node_t *node, void *xml)
60de42
 
60de42
 /*!
60de42
  * \internal
60de42
+ * \brief Add XML with nodes that need an update of their maintenance state
60de42
+ *
60de42
+ * \param[in,out] xml       Parent XML tag to add to
60de42
+ * \param[in]     data_set  Working set for cluster
60de42
+ */
60de42
+static int
60de42
+add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set)
60de42
+{
60de42
+    GListPtr gIter = NULL;
60de42
+    xmlNode *maintenance =
60de42
+        xml?create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE):NULL;
60de42
+    int count = 0;
60de42
+
60de42
+    for (gIter = data_set->nodes; gIter != NULL;
60de42
+         gIter = gIter->next) {
60de42
+        node_t *node = (node_t *) gIter->data;
60de42
+        struct node_shared_s *details = node->details;
60de42
+
60de42
+        if (!(is_remote_node(node))) {
60de42
+            continue; /* just remote nodes need to know atm */
60de42
+        }
60de42
+
60de42
+        if (details->maintenance != details->remote_maintenance) {
60de42
+            if (maintenance) {
60de42
+                crm_xml_add(
60de42
+                    add_node_to_xml_by_id(node->details->id, maintenance),
60de42
+                    XML_NODE_IS_MAINTENANCE, details->maintenance?"1":"0");
60de42
+            }
60de42
+            count++;
60de42
+        }
60de42
+    }
60de42
+    crm_trace("%s %d nodes to adjust maintenance-mode "
60de42
+              "to transition", maintenance?"Added":"Counted", count);
60de42
+    return count;
60de42
+}
60de42
+
60de42
+/*!
60de42
+ * \internal
60de42
+ * \brief Add pseudo action with nodes needing maintenance state update
60de42
+ *
60de42
+ * \param[in,out] data_set  Working set for cluster
60de42
+ */
60de42
+void
60de42
+add_maintenance_update(pe_working_set_t *data_set)
60de42
+{
60de42
+    action_t *action = NULL;
60de42
+
60de42
+    if (add_maintenance_nodes(NULL, data_set)) {
60de42
+        crm_trace("adding maintenance state update pseudo action");
60de42
+        action = get_pseudo_op(CRM_OP_MAINTENANCE_NODES, data_set);
60de42
+        set_bit(action->flags, pe_action_print_always);
60de42
+    }
60de42
+}
60de42
+
60de42
+/*!
60de42
+ * \internal
60de42
  * \brief Add XML with nodes that an action is expected to bring down
60de42
  *
60de42
  * If a specified action is expected to bring any nodes down, add an XML block
60de42
@@ -874,6 +932,7 @@ static xmlNode *
60de42
 action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
60de42
 {
60de42
     gboolean needs_node_info = TRUE;
60de42
+    gboolean needs_maintenance_info = FALSE;
60de42
     xmlNode *action_xml = NULL;
60de42
     xmlNode *args_xml = NULL;
60de42
 
60de42
@@ -901,6 +960,9 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
60de42
 /* 		action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); */
60de42
 
60de42
     } else if (is_set(action->flags, pe_action_pseudo)) {
60de42
+        if (safe_str_eq(action->task, CRM_OP_MAINTENANCE_NODES)) {
60de42
+            needs_maintenance_info = TRUE;
60de42
+        }
60de42
         action_xml = create_xml_node(NULL, XML_GRAPH_TAG_PSEUDO_EVENT);
60de42
         needs_node_info = FALSE;
60de42
 
60de42
@@ -1082,6 +1144,10 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
60de42
         add_downed_nodes(action_xml, action, data_set);
60de42
     }
60de42
 
60de42
+    if (needs_maintenance_info) {
60de42
+        add_maintenance_nodes(action_xml, data_set);
60de42
+    }
60de42
+
60de42
     crm_log_xml_trace(action_xml, "dumped action");
60de42
     return action_xml;
60de42
 }
60de42
diff --git a/pengine/pengine.h b/pengine/pengine.h
60de42
index 5500819..e3f4874 100644
60de42
--- a/pengine/pengine.h
60de42
+++ b/pengine/pengine.h
60de42
@@ -145,6 +145,7 @@ extern int new_rsc_order(resource_t * lh_rsc, const char *lh_task,
60de42
     new_rsc_order(rsc1, CRMD_ACTION_STOP, rsc2, CRMD_ACTION_STOP, type, data_set)
60de42
 
60de42
 extern void graph_element_from_action(action_t * action, pe_working_set_t * data_set);
60de42
+extern void add_maintenance_update(pe_working_set_t *data_set);
60de42
 
60de42
 extern gboolean show_scores;
60de42
 extern int scores_log_level;
60de42
-- 
60de42
1.8.3.1
60de42